diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 265 |
1 files changed, 163 insertions, 102 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index dd4f114a7cbf..8e7d0cb540cd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -107,6 +107,7 @@ #include <net/pkt_cls.h> #include <net/checksum.h> #include <net/xfrm.h> +#include <net/tcx.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/module.h> @@ -154,7 +155,6 @@ #include "dev.h" #include "net-sysfs.h" - static DEFINE_SPINLOCK(ptype_lock); struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; struct list_head ptype_all __read_mostly; /* Taps */ @@ -3882,69 +3882,198 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL(dev_loopback_xmit); #ifdef CONFIG_NET_EGRESS -static struct sk_buff * -sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) +static struct netdev_queue * +netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb) +{ + int qm = skb_get_queue_mapping(skb); + + return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm)); +} + +static bool netdev_xmit_txqueue_skipped(void) { + return __this_cpu_read(softnet_data.xmit.skip_txqueue); +} + +void netdev_xmit_skip_txqueue(bool skip) +{ + __this_cpu_write(softnet_data.xmit.skip_txqueue, skip); +} +EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); +#endif /* CONFIG_NET_EGRESS */ + +#ifdef CONFIG_NET_XGRESS +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) +{ + int ret = TC_ACT_UNSPEC; #ifdef CONFIG_NET_CLS_ACT - struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); - struct tcf_result cl_res; + struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq); + struct tcf_result res; if (!miniq) - return skb; + return ret; - /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; - mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { + mini_qdisc_bstats_cpu_update(miniq, skb); + ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); + /* Only tcf related quirks below. */ + switch (ret) { + case TC_ACT_SHOT: + mini_qdisc_qstats_cpu_drop(miniq); + break; case TC_ACT_OK: case TC_ACT_RECLASSIFY: - skb->tc_index = TC_H_MIN(cl_res.classid); + skb->tc_index = TC_H_MIN(res.classid); break; + } +#endif /* CONFIG_NET_CLS_ACT */ + return ret; +} + +static DEFINE_STATIC_KEY_FALSE(tcx_needed_key); + +void tcx_inc(void) +{ + static_branch_inc(&tcx_needed_key); +} + +void tcx_dec(void) +{ + static_branch_dec(&tcx_needed_key); +} + +static __always_inline enum tcx_action_base +tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb, + const bool needs_mac) +{ + const struct bpf_mprog_fp *fp; + const struct bpf_prog *prog; + int ret = TCX_NEXT; + + if (needs_mac) + __skb_push(skb, skb->mac_len); + bpf_mprog_foreach_prog(entry, fp, prog) { + bpf_compute_data_pointers(skb); + ret = bpf_prog_run(prog, skb); + if (ret != TCX_NEXT) + break; + } + if (needs_mac) + __skb_pull(skb, skb->mac_len); + return tcx_action_code(skb, ret); +} + +static __always_inline struct sk_buff * +sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev, bool *another) +{ + struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); + int sch_ret; + + if (!entry) + return skb; + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + + qdisc_skb_cb(skb)->pkt_len = skb->len; + tcx_set_ingress(skb, true); + + if (static_branch_unlikely(&tcx_needed_key)) { + sch_ret = tcx_run(entry, skb, true); + if (sch_ret != TC_ACT_UNSPEC) + goto ingress_verdict; + } + sch_ret = tc_run(tcx_entry(entry), skb); +ingress_verdict: + switch (sch_ret) { + case TC_ACT_REDIRECT: + /* skb_mac_header check was done by BPF, so we can safely + * push the L2 header back before redirecting to another + * netdev. + */ + __skb_push(skb, skb->mac_len); + if (skb_do_redirect(skb) == -EAGAIN) { + __skb_pull(skb, skb->mac_len); + *another = true; + break; + } + *ret = NET_RX_SUCCESS; + return NULL; case TC_ACT_SHOT: - mini_qdisc_qstats_cpu_drop(miniq); - *ret = NET_XMIT_DROP; - kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS); + kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); + *ret = NET_RX_DROP; return NULL; + /* used by tc_run */ case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: - *ret = NET_XMIT_SUCCESS; consume_skb(skb); + fallthrough; + case TC_ACT_CONSUMED: + *ret = NET_RX_SUCCESS; return NULL; + } + + return skb; +} + +static __always_inline struct sk_buff * +sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) +{ + struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); + int sch_ret; + + if (!entry) + return skb; + + /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was + * already set by the caller. + */ + if (static_branch_unlikely(&tcx_needed_key)) { + sch_ret = tcx_run(entry, skb, false); + if (sch_ret != TC_ACT_UNSPEC) + goto egress_verdict; + } + sch_ret = tc_run(tcx_entry(entry), skb); +egress_verdict: + switch (sch_ret) { case TC_ACT_REDIRECT: /* No need to push/pop skb's mac_header here on egress! */ skb_do_redirect(skb); *ret = NET_XMIT_SUCCESS; return NULL; - default: - break; + case TC_ACT_SHOT: + kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS); + *ret = NET_XMIT_DROP; + return NULL; + /* used by tc_run */ + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + case TC_ACT_TRAP: + *ret = NET_XMIT_SUCCESS; + return NULL; } -#endif /* CONFIG_NET_CLS_ACT */ return skb; } - -static struct netdev_queue * -netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb) -{ - int qm = skb_get_queue_mapping(skb); - - return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm)); -} - -static bool netdev_xmit_txqueue_skipped(void) +#else +static __always_inline struct sk_buff * +sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev, bool *another) { - return __this_cpu_read(softnet_data.xmit.skip_txqueue); + return skb; } -void netdev_xmit_skip_txqueue(bool skip) +static __always_inline struct sk_buff * +sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { - __this_cpu_write(softnet_data.xmit.skip_txqueue, skip); + return skb; } -EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); -#endif /* CONFIG_NET_EGRESS */ +#endif /* CONFIG_NET_XGRESS */ #ifdef CONFIG_XPS static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb, @@ -4128,9 +4257,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) skb_update_prio(skb); qdisc_pkt_len_init(skb); -#ifdef CONFIG_NET_CLS_ACT - skb->tc_at_ingress = 0; -#endif + tcx_set_ingress(skb, false); #ifdef CONFIG_NET_EGRESS if (static_branch_unlikely(&egress_needed_key)) { if (nf_hook_egress_active()) { @@ -5064,72 +5191,6 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev, EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif -static inline struct sk_buff * -sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev, bool *another) -{ -#ifdef CONFIG_NET_CLS_ACT - struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); - struct tcf_result cl_res; - - /* If there's at least one ingress present somewhere (so - * we get here via enabled static key), remaining devices - * that are not configured with an ingress qdisc will bail - * out here. - */ - if (!miniq) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - - qdisc_skb_cb(skb)->pkt_len = skb->len; - tc_skb_cb(skb)->mru = 0; - tc_skb_cb(skb)->post_ct = false; - skb->tc_at_ingress = 1; - mini_qdisc_bstats_cpu_update(miniq, skb); - - switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { - case TC_ACT_OK: - case TC_ACT_RECLASSIFY: - skb->tc_index = TC_H_MIN(cl_res.classid); - break; - case TC_ACT_SHOT: - mini_qdisc_qstats_cpu_drop(miniq); - kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); - *ret = NET_RX_DROP; - return NULL; - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - case TC_ACT_TRAP: - consume_skb(skb); - *ret = NET_RX_SUCCESS; - return NULL; - case TC_ACT_REDIRECT: - /* skb_mac_header check was done by cls/act_bpf, so - * we can safely push the L2 header back before - * redirecting to another netdev - */ - __skb_push(skb, skb->mac_len); - if (skb_do_redirect(skb) == -EAGAIN) { - __skb_pull(skb, skb->mac_len); - *another = true; - break; - } - *ret = NET_RX_SUCCESS; - return NULL; - case TC_ACT_CONSUMED: - *ret = NET_RX_SUCCESS; - return NULL; - default: - break; - } -#endif /* CONFIG_NET_CLS_ACT */ - return skb; -} - /** * netdev_is_rx_handler_busy - check if receive handler is registered * @dev: device to check @@ -10835,7 +10896,7 @@ void unregister_netdevice_many_notify(struct list_head *head, /* Shutdown queueing discipline. */ dev_shutdown(dev); - + dev_tcx_uninstall(dev); dev_xdp_uninstall(dev); bpf_dev_bound_netdev_unregister(dev); |