diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/Kconfig | 11 | ||||
-rw-r--r-- | net/ipv6/Makefile | 3 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 65 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 16 | ||||
-rw-r--r-- | net/ipv6/exthdrs.c | 158 | ||||
-rw-r--r-- | net/ipv6/ioam6.c | 910 | ||||
-rw-r--r-- | net/ipv6/ioam6_iptunnel.c | 274 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 6 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 19 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 80 | ||||
-rw-r--r-- | net/ipv6/ip6_tunnel.c | 21 | ||||
-rw-r--r-- | net/ipv6/ip6_vti.c | 21 | ||||
-rw-r--r-- | net/ipv6/ip6mr.c | 3 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 18 | ||||
-rw-r--r-- | net/ipv6/mcast.c | 20 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 17 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6table_filter.c | 23 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6table_mangle.c | 22 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6table_nat.c | 16 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6table_raw.c | 24 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6table_security.c | 22 | ||||
-rw-r--r-- | net/ipv6/route.c | 50 | ||||
-rw-r--r-- | net/ipv6/seg6_iptunnel.c | 74 | ||||
-rw-r--r-- | net/ipv6/seg6_local.c | 110 | ||||
-rw-r--r-- | net/ipv6/sit.c | 40 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 19 | ||||
-rw-r--r-- | net/ipv6/udp.c | 2 |
27 files changed, 1731 insertions, 313 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 747f56e0c636..e504204bca92 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL If unsure, say N. +config IPV6_IOAM6_LWTUNNEL + bool "IPv6: IOAM Pre-allocated Trace insertion support" + depends on IPV6 + select LWTUNNEL + help + Support for the inline insertion of IOAM Pre-allocated + Trace Header (only on locally generated packets), using + the lightweight tunnels mechanism. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index cf7b47bdb9b3..1bc7e143217b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o fib6_notifier.o rpl.o + udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o +ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3bf685fe64b9..17756f3ed33b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -89,6 +89,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> +#include <linux/ioam6.h> #define INFINITY_LIFE_TIME 0xFFFFFFFF @@ -237,6 +238,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -293,6 +297,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64, .disable_policy = 0, .rpl_seg_enabled = 0, + .ioam6_enabled = 0, + .ioam6_id = IOAM6_DEFAULT_IF_ID, + .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -387,6 +394,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; ndev->cnf.mtu6 = dev->mtu; + ndev->ra_mtu = 0; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (!ndev->nd_parms) { kfree(ndev); @@ -694,8 +702,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, errout: if (in6_dev) in6_dev_put(in6_dev); - if (dev) - dev_put(dev); + dev_put(dev); return err; } @@ -1080,7 +1087,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - ifa = kzalloc(sizeof(*ifa), gfp_flags); + ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT); if (!ifa) { err = -ENOBUFS; goto out; @@ -3843,6 +3850,7 @@ restart: } idev->tstamp = jiffies; + idev->ra_mtu = 0; /* Last: Shot the device (if unregistered) */ if (unregister) { @@ -5211,8 +5219,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, .netnsid = -1, .type = type, }; - struct net *net = sock_net(skb->sk); - struct net *tgt_net = net; + struct net *tgt_net = sock_net(skb->sk); int idx, s_idx, s_ip_idx; int h, s_h; struct net_device *dev; @@ -5351,7 +5358,7 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct net *net = sock_net(in_skb->sk); + struct net *tgt_net = sock_net(in_skb->sk); struct inet6_fill_args fillargs = { .portid = NETLINK_CB(in_skb).portid, .seq = nlh->nlmsg_seq, @@ -5359,7 +5366,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, .flags = 0, .netnsid = -1, }; - struct net *tgt_net = net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL, *peer; @@ -5412,8 +5418,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, errout_ifa: in6_ifa_put(ifa); errout: - if (dev) - dev_put(dev); + dev_put(dev); if (fillargs.netnsid >= 0) put_net(tgt_net); @@ -5526,6 +5531,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled; + array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; + array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; + array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; } static inline size_t inet6_ifla6_size(void) @@ -5537,6 +5545,7 @@ static inline size_t inet6_ifla6_size(void) + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */ + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */ + + nla_total_size(4) /* IFLA_INET6_RA_MTU */ + 0; } @@ -5645,6 +5654,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) goto nla_put_failure; + if (idev->ra_mtu && + nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -5761,6 +5774,9 @@ update_lft: static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = { [IFLA_INET6_ADDR_GEN_MODE] = { .type = NLA_U8 }, [IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) }, + [IFLA_INET6_RA_MTU] = { .type = NLA_REJECT, + .reject_message = + "IFLA_INET6_RA_MTU can not be set" }, }; static int check_addr_gen_mode(int mode) @@ -5784,7 +5800,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net, } static int inet6_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_INET6_MAX + 1]; struct inet6_dev *idev = NULL; @@ -5797,7 +5814,7 @@ static int inet6_validate_link_af(const struct net_device *dev, } err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, - inet6_af_policy, NULL); + inet6_af_policy, extack); if (err) return err; @@ -6540,6 +6557,7 @@ static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, static int minus_one = -1; static const int two_five_five = 255; +static u32 ioam6_if_id_max = U16_MAX; static const struct ctl_table addrconf_sysctl[] = { { @@ -6933,6 +6951,31 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, { + .procname = "ioam6_enabled", + .data = &ipv6_devconf.ioam6_enabled, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)SYSCTL_ONE, + }, + { + .procname = "ioam6_id", + .data = &ipv6_devconf.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (void *)SYSCTL_ZERO, + .extra2 = (void *)&ioam6_if_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &ipv6_devconf.ioam6_id_wide, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { /* sentinel */ } }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2389ff702f51..b5878bb8e419 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -62,6 +62,7 @@ #include <net/rpl.h> #include <net/compat.h> #include <net/xfrm.h> +#include <net/ioam6.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -454,7 +455,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * changes context in a wrong way it will be caught. */ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, - BPF_CGROUP_INET6_BIND, &flags); + CGROUP_INET6_BIND, &flags); if (err) return err; @@ -531,7 +532,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, if (np->sndflow) sin->sin6_flowinfo = np->flow_label; BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin, - BPF_CGROUP_INET6_GETPEERNAME, + CGROUP_INET6_GETPEERNAME, NULL); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -540,7 +541,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin, - BPF_CGROUP_INET6_GETSOCKNAME, + CGROUP_INET6_GETSOCKNAME, NULL); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, @@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); + net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; + net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE; + err = ipv6_init_mibs(net); if (err) return err; @@ -1191,6 +1195,10 @@ static int __init inet6_init(void) if (err) goto rpl_fail; + err = ioam6_init(); + if (err) + goto ioam6_fail; + err = igmp6_late_init(); if (err) goto igmp6_late_err; @@ -1213,6 +1221,8 @@ sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: + ioam6_exit(); +ioam6_fail: rpl_exit(); rpl_fail: seg6_exit(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 26882e165c9e..3a871a09f962 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -49,22 +49,12 @@ #include <net/seg6_hmac.h> #endif #include <net/rpl.h> +#include <linux/ioam6.h> +#include <net/ioam6.h> +#include <net/dst_metadata.h> #include <linux/uaccess.h> -/* - * Parsing tlv encoded headers. - * - * Parsing function "func" returns true, if parsing succeed - * and false, if it failed. - * It MUST NOT touch skb->h. - */ - -struct tlvtype_proc { - int type; - bool (*func)(struct sk_buff *skb, int offset); -}; - /********************* Generic functions *********************/ @@ -109,16 +99,23 @@ drop: return false; } +static bool ipv6_hop_ra(struct sk_buff *skb, int optoff); +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff); +static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff); +static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff); +#if IS_ENABLED(CONFIG_IPV6_MIP6) +static bool ipv6_dest_hao(struct sk_buff *skb, int optoff); +#endif + /* Parse tlv encoded option header (hop-by-hop or destination) */ -static bool ip6_parse_tlv(const struct tlvtype_proc *procs, +static bool ip6_parse_tlv(bool hopbyhop, struct sk_buff *skb, int max_count) { int len = (skb_transport_header(skb)[1] + 1) << 3; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); - const struct tlvtype_proc *curr; bool disallow_unknowns = false; int tlv_count = 0; int padlen = 0; @@ -173,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, if (tlv_count > max_count) goto bad; - for (curr = procs; curr->type >= 0; curr++) { - if (curr->type == nh[off]) { - /* type specific length/alignment - checks will be performed in the - func(). */ - if (curr->func(skb, off) == false) + if (hopbyhop) { + switch (nh[off]) { + case IPV6_TLV_ROUTERALERT: + if (!ipv6_hop_ra(skb, off)) + return false; + break; + case IPV6_TLV_IOAM: + if (!ipv6_hop_ioam(skb, off)) + return false; + break; + case IPV6_TLV_JUMBO: + if (!ipv6_hop_jumbo(skb, off)) + return false; + break; + case IPV6_TLV_CALIPSO: + if (!ipv6_hop_calipso(skb, off)) + return false; + break; + default: + if (!ip6_tlvopt_unknown(skb, off, + disallow_unknowns)) + return false; + break; + } + } else { + switch (nh[off]) { +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPV6_TLV_HAO: + if (!ipv6_dest_hao(skb, off)) + return false; + break; +#endif + default: + if (!ip6_tlvopt_unknown(skb, off, + disallow_unknowns)) return false; break; } } - if (curr->type < 0 && - !ip6_tlvopt_unknown(skb, off, disallow_unknowns)) - return false; - padlen = 0; } off += optlen; @@ -264,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) } #endif -static const struct tlvtype_proc tlvprocdestopt_lst[] = { -#if IS_ENABLED(CONFIG_IPV6_MIP6) - { - .type = IPV6_TLV_HAO, - .func = ipv6_dest_hao, - }, -#endif - {-1, NULL} -}; - static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); @@ -304,8 +316,7 @@ fail_and_free: dstbuf = opt->dst1; #endif - if (ip6_parse_tlv(tlvprocdestopt_lst, skb, - net->ipv6.sysctl.max_dst_opts_cnt)) { + if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) @@ -928,6 +939,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) return false; } +/* IOAM */ + +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) +{ + struct ioam6_trace_hdr *trace; + struct ioam6_namespace *ns; + struct ioam6_hdr *hdr; + + /* Bad alignment (must be 4n-aligned) */ + if (optoff & 3) + goto drop; + + /* Ignore if IOAM is not enabled on ingress */ + if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled) + goto ignore; + + /* Truncated Option header */ + hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); + if (hdr->opt_len < 2) + goto drop; + + switch (hdr->type) { + case IOAM6_TYPE_PREALLOC: + /* Truncated Pre-allocated Trace header */ + if (hdr->opt_len < 2 + sizeof(*trace)) + goto drop; + + /* Malformed Pre-allocated Trace header */ + trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); + if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4) + goto drop; + + /* Ignore if the IOAM namespace is unknown */ + ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id); + if (!ns) + goto ignore; + + if (!skb_valid_dst(skb)) + ip6_route_input(skb); + + ioam6_fill_trace_data(skb, ns, trace); + break; + default: + break; + } + +ignore: + return true; + +drop: + kfree_skb(skb); + return false; +} + /* Jumbo payload */ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) @@ -994,22 +1059,6 @@ drop: return false; } -static const struct tlvtype_proc tlvprochopopt_lst[] = { - { - .type = IPV6_TLV_ROUTERALERT, - .func = ipv6_hop_ra, - }, - { - .type = IPV6_TLV_JUMBO, - .func = ipv6_hop_jumbo, - }, - { - .type = IPV6_TLV_CALIPSO, - .func = ipv6_hop_calipso, - }, - { -1, } -}; - int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); @@ -1035,8 +1084,7 @@ fail_and_free: goto fail_and_free; opt->flags |= IP6SKB_HOPBYHOP; - if (ip6_parse_tlv(tlvprochopopt_lst, skb, - net->ipv6.sysctl.max_hbh_opts_cnt)) { + if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c new file mode 100644 index 000000000000..5e8961004832 --- /dev/null +++ b/net/ipv6/ioam6.c @@ -0,0 +1,910 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/ioam6.h> +#include <linux/ioam6_genl.h> +#include <linux/rhashtable.h> + +#include <net/addrconf.h> +#include <net/genetlink.h> +#include <net/ioam6.h> + +static void ioam6_ns_release(struct ioam6_namespace *ns) +{ + kfree_rcu(ns, rcu); +} + +static void ioam6_sc_release(struct ioam6_schema *sc) +{ + kfree_rcu(sc, rcu); +} + +static void ioam6_free_ns(void *ptr, void *arg) +{ + struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr; + + if (ns) + ioam6_ns_release(ns); +} + +static void ioam6_free_sc(void *ptr, void *arg) +{ + struct ioam6_schema *sc = (struct ioam6_schema *)ptr; + + if (sc) + ioam6_sc_release(sc); +} + +static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_namespace *ns = obj; + + return (ns->id != *(__be16 *)arg->key); +} + +static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct ioam6_schema *sc = obj; + + return (sc->id != *(u32 *)arg->key); +} + +static const struct rhashtable_params rht_ns_params = { + .key_len = sizeof(__be16), + .key_offset = offsetof(struct ioam6_namespace, id), + .head_offset = offsetof(struct ioam6_namespace, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_ns_cmpfn, +}; + +static const struct rhashtable_params rht_sc_params = { + .key_len = sizeof(u32), + .key_offset = offsetof(struct ioam6_schema, id), + .head_offset = offsetof(struct ioam6_schema, head), + .automatic_shrinking = true, + .obj_cmpfn = ioam6_sc_cmpfn, +}; + +static struct genl_family ioam6_genl_family; + +static const struct nla_policy ioam6_genl_policy_addns[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, + [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 }, + [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 }, +}; + +static const struct nla_policy ioam6_genl_policy_delns[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, +}; + +static const struct nla_policy ioam6_genl_policy_addsc[] = { + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, + [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY, + .len = IOAM6_MAX_SCHEMA_DATA_LEN }, +}; + +static const struct nla_policy ioam6_genl_policy_delsc[] = { + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, +}; + +static const struct nla_policy ioam6_genl_policy_ns_sc[] = { + [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 }, + [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 }, + [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG }, +}; + +static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + u64 data64; + u32 data32; + __be16 id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID]) + return -EINVAL; + + id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); + if (ns) { + err = -EEXIST; + goto out_unlock; + } + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) { + err = -ENOMEM; + goto out_unlock; + } + + ns->id = id; + + if (!info->attrs[IOAM6_ATTR_NS_DATA]) + data32 = IOAM6_U32_UNAVAILABLE; + else + data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]); + + if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE]) + data64 = IOAM6_U64_UNAVAILABLE; + else + data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]); + + ns->data = cpu_to_be32(data32); + ns->data_wide = cpu_to_be64(data64); + + err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head, + rht_ns_params); + if (err) + kfree(ns); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + struct ioam6_schema *sc; + __be16 id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID]) + return -EINVAL; + + id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); + if (!ns) { + err = -ENOENT; + goto out_unlock; + } + + sc = rcu_dereference_protected(ns->schema, + lockdep_is_held(&nsdata->lock)); + + err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head, + rht_ns_params); + if (err) + goto out_unlock; + + if (sc) + rcu_assign_pointer(sc->ns, NULL); + + ioam6_ns_release(ns); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns, + u32 portid, + u32 seq, + u32 flags, + struct sk_buff *skb, + u8 cmd) +{ + struct ioam6_schema *sc; + u64 data64; + u32 data32; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + data32 = be32_to_cpu(ns->data); + data64 = be64_to_cpu(ns->data_wide); + + if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) || + (data32 != IOAM6_U32_UNAVAILABLE && + nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) || + (data64 != IOAM6_U64_UNAVAILABLE && + nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE, + data64, IOAM6_ATTR_PAD))) + goto nla_put_failure; + + rcu_read_lock(); + + sc = rcu_dereference(ns->schema); + if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) { + rcu_read_unlock(); + goto nla_put_failure; + } + + rcu_read_unlock(); + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ioam6_genl_dumpns_start(struct netlink_callback *cb) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&nsdata->namespaces, iter); + + return 0; +} + +static int ioam6_genl_dumpns_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + kfree(iter); + + return 0; +} + +static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter; + struct ioam6_namespace *ns; + int err; + + iter = (struct rhashtable_iter *)cb->args[0]; + rhashtable_walk_start(iter); + + for (;;) { + ns = rhashtable_walk_next(iter); + + if (IS_ERR(ns)) { + if (PTR_ERR(ns) == -EAGAIN) + continue; + err = PTR_ERR(ns); + goto done; + } else if (!ns) { + break; + } + + err = __ioam6_genl_dumpns_element(ns, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, + IOAM6_CMD_DUMP_NAMESPACES); + if (err) + goto done; + } + + err = skb->len; + +done: + rhashtable_walk_stop(iter); + return err; +} + +static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + int len, len_aligned, err; + struct ioam6_schema *sc; + u32 id; + + if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA]) + return -EINVAL; + + id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); + if (sc) { + err = -EEXIST; + goto out_unlock; + } + + len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]); + len_aligned = ALIGN(len, 4); + + sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL); + if (!sc) { + err = -ENOMEM; + goto out_unlock; + } + + sc->id = id; + sc->len = len_aligned; + sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24)); + nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len); + + err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head, + rht_sc_params); + if (err) + goto free_sc; + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +free_sc: + kfree(sc); + goto out_unlock; +} + +static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_pernet_data *nsdata; + struct ioam6_namespace *ns; + struct ioam6_schema *sc; + int err; + u32 id; + + if (!info->attrs[IOAM6_ATTR_SC_ID]) + return -EINVAL; + + id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params); + if (!sc) { + err = -ENOENT; + goto out_unlock; + } + + ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock)); + + err = rhashtable_remove_fast(&nsdata->schemas, &sc->head, + rht_sc_params); + if (err) + goto out_unlock; + + if (ns) + rcu_assign_pointer(ns->schema, NULL); + + ioam6_sc_release(sc); + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + struct ioam6_namespace *ns; + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) || + nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data)) + goto nla_put_failure; + + rcu_read_lock(); + + ns = rcu_dereference(sc->ns); + if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) { + rcu_read_unlock(); + goto nla_put_failure; + } + + rcu_read_unlock(); + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ioam6_genl_dumpsc_start(struct netlink_callback *cb) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk)); + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[0] = (long)iter; + } + + rhashtable_walk_enter(&nsdata->schemas, iter); + + return 0; +} + +static int ioam6_genl_dumpsc_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0]; + + rhashtable_walk_exit(iter); + kfree(iter); + + return 0; +} + +static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rhashtable_iter *iter; + struct ioam6_schema *sc; + int err; + + iter = (struct rhashtable_iter *)cb->args[0]; + rhashtable_walk_start(iter); + + for (;;) { + sc = rhashtable_walk_next(iter); + + if (IS_ERR(sc)) { + if (PTR_ERR(sc) == -EAGAIN) + continue; + err = PTR_ERR(sc); + goto done; + } else if (!sc) { + break; + } + + err = __ioam6_genl_dumpsc_element(sc, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + skb, + IOAM6_CMD_DUMP_SCHEMAS); + if (err) + goto done; + } + + err = skb->len; + +done: + rhashtable_walk_stop(iter); + return err; +} + +static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info) +{ + struct ioam6_namespace *ns, *ns_ref; + struct ioam6_schema *sc, *sc_ref; + struct ioam6_pernet_data *nsdata; + __be16 ns_id; + u32 sc_id; + int err; + + if (!info->attrs[IOAM6_ATTR_NS_ID] || + (!info->attrs[IOAM6_ATTR_SC_ID] && + !info->attrs[IOAM6_ATTR_SC_NONE])) + return -EINVAL; + + ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID])); + nsdata = ioam6_pernet(genl_info_net(info)); + + mutex_lock(&nsdata->lock); + + ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params); + if (!ns) { + err = -ENOENT; + goto out_unlock; + } + + if (info->attrs[IOAM6_ATTR_SC_NONE]) { + sc = NULL; + } else { + sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]); + sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id, + rht_sc_params); + if (!sc) { + err = -ENOENT; + goto out_unlock; + } + } + + sc_ref = rcu_dereference_protected(ns->schema, + lockdep_is_held(&nsdata->lock)); + if (sc_ref) + rcu_assign_pointer(sc_ref->ns, NULL); + rcu_assign_pointer(ns->schema, sc); + + if (sc) { + ns_ref = rcu_dereference_protected(sc->ns, + lockdep_is_held(&nsdata->lock)); + if (ns_ref) + rcu_assign_pointer(ns_ref->schema, NULL); + rcu_assign_pointer(sc->ns, ns); + } + + err = 0; + +out_unlock: + mutex_unlock(&nsdata->lock); + return err; +} + +static const struct genl_ops ioam6_genl_ops[] = { + { + .cmd = IOAM6_CMD_ADD_NAMESPACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_addns, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_addns, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1, + }, + { + .cmd = IOAM6_CMD_DEL_NAMESPACE, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_delns, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_delns, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1, + }, + { + .cmd = IOAM6_CMD_DUMP_NAMESPACES, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = ioam6_genl_dumpns_start, + .dumpit = ioam6_genl_dumpns, + .done = ioam6_genl_dumpns_done, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IOAM6_CMD_ADD_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_addsc, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_addsc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1, + }, + { + .cmd = IOAM6_CMD_DEL_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_delsc, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_delsc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1, + }, + { + .cmd = IOAM6_CMD_DUMP_SCHEMAS, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .start = ioam6_genl_dumpsc_start, + .dumpit = ioam6_genl_dumpsc, + .done = ioam6_genl_dumpsc_done, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = IOAM6_CMD_NS_SET_SCHEMA, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = ioam6_genl_ns_set_schema, + .flags = GENL_ADMIN_PERM, + .policy = ioam6_genl_policy_ns_sc, + .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1, + }, +}; + +static struct genl_family ioam6_genl_family __ro_after_init = { + .name = IOAM6_GENL_NAME, + .version = IOAM6_GENL_VERSION, + .netnsok = true, + .parallel_ops = true, + .ops = ioam6_genl_ops, + .n_ops = ARRAY_SIZE(ioam6_genl_ops), + .module = THIS_MODULE, +}; + +struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params); +} + +static void __ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace, + struct ioam6_schema *sc, + u8 sclen) +{ + struct __kernel_sock_timeval ts; + u64 raw64; + u32 raw32; + u16 raw16; + u8 *data; + u8 byte; + + data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4; + + /* hop_lim and node_id */ + if (trace->type.bit0) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; + + *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); + data += sizeof(__be32); + } + + /* ingress_if_id and egress_if_id */ + if (trace->type.bit1) { + if (!skb->dev) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw16 = IOAM6_U16_UNAVAILABLE; + else + raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id; + + *(__be16 *)data = cpu_to_be16(raw16); + data += sizeof(__be16); + } + + /* timestamp seconds */ + if (trace->type.bit2) { + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); + + skb_get_new_timestamp(skb, &ts); + *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); + } + data += sizeof(__be32); + } + + /* timestamp subseconds */ + if (trace->type.bit3) { + if (!skb->dev) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + } else { + if (!skb->tstamp) + __net_timestamp(skb); + + if (!trace->type.bit2) + skb_get_new_timestamp(skb, &ts); + + *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); + } + data += sizeof(__be32); + } + + /* transit delay */ + if (trace->type.bit4) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* namespace data */ + if (trace->type.bit5) { + *(__be32 *)data = ns->data; + data += sizeof(__be32); + } + + /* queue depth */ + if (trace->type.bit6) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* checksum complement */ + if (trace->type.bit7) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* hop_lim and node_id (wide) */ + if (trace->type.bit8) { + byte = ipv6_hdr(skb)->hop_limit; + if (skb->dev) + byte--; + + raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; + + *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); + data += sizeof(__be64); + } + + /* ingress_if_id and egress_if_id (wide) */ + if (trace->type.bit9) { + if (!skb->dev) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + raw32 = IOAM6_U32_UNAVAILABLE; + else + raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide; + + *(__be32 *)data = cpu_to_be32(raw32); + data += sizeof(__be32); + } + + /* namespace data (wide) */ + if (trace->type.bit10) { + *(__be64 *)data = ns->data_wide; + data += sizeof(__be64); + } + + /* buffer occupancy */ + if (trace->type.bit11) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); + data += sizeof(__be32); + } + + /* opaque state snapshot */ + if (trace->type.bit22) { + if (!sc) { + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8); + } else { + *(__be32 *)data = sc->hdr; + data += sizeof(__be32); + + memcpy(data, sc->data, sc->len); + } + } +} + +/* called with rcu_read_lock() */ +void ioam6_fill_trace_data(struct sk_buff *skb, + struct ioam6_namespace *ns, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_schema *sc; + u8 sclen = 0; + + /* Skip if Overflow flag is set OR + * if an unknown type (bit 12-21) is set + */ + if (trace->overflow || + trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21) { + return; + } + + /* NodeLen does not include Opaque State Snapshot length. We need to + * take it into account if the corresponding bit is set (bit 22) and + * if the current IOAM namespace has an active schema attached to it + */ + sc = rcu_dereference(ns->schema); + if (trace->type.bit22) { + sclen = sizeof_field(struct ioam6_schema, hdr) / 4; + + if (sc) + sclen += sc->len / 4; + } + + /* If there is no space remaining, we set the Overflow flag and we + * skip without filling the trace + */ + if (!trace->remlen || trace->remlen < trace->nodelen + sclen) { + trace->overflow = 1; + return; + } + + __ioam6_fill_trace_data(skb, ns, trace, sc, sclen); + trace->remlen -= trace->nodelen + sclen; +} + +static int __net_init ioam6_net_init(struct net *net) +{ + struct ioam6_pernet_data *nsdata; + int err = -ENOMEM; + + nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL); + if (!nsdata) + goto out; + + mutex_init(&nsdata->lock); + net->ipv6.ioam6_data = nsdata; + + err = rhashtable_init(&nsdata->namespaces, &rht_ns_params); + if (err) + goto free_nsdata; + + err = rhashtable_init(&nsdata->schemas, &rht_sc_params); + if (err) + goto free_rht_ns; + +out: + return err; +free_rht_ns: + rhashtable_destroy(&nsdata->namespaces); +free_nsdata: + kfree(nsdata); + net->ipv6.ioam6_data = NULL; + goto out; +} + +static void __net_exit ioam6_net_exit(struct net *net) +{ + struct ioam6_pernet_data *nsdata = ioam6_pernet(net); + + rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL); + rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL); + + kfree(nsdata); +} + +static struct pernet_operations ioam6_net_ops = { + .init = ioam6_net_init, + .exit = ioam6_net_exit, +}; + +int __init ioam6_init(void) +{ + int err = register_pernet_subsys(&ioam6_net_ops); + if (err) + goto out; + + err = genl_register_family(&ioam6_genl_family); + if (err) + goto out_unregister_pernet_subsys; + +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + err = ioam6_iptunnel_init(); + if (err) + goto out_unregister_genl; +#endif + + pr_info("In-situ OAM (IOAM) with IPv6\n"); + +out: + return err; +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL +out_unregister_genl: + genl_unregister_family(&ioam6_genl_family); +#endif +out_unregister_pernet_subsys: + unregister_pernet_subsys(&ioam6_net_ops); + goto out; +} + +void ioam6_exit(void) +{ +#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL + ioam6_iptunnel_exit(); +#endif + genl_unregister_family(&ioam6_genl_family); + unregister_pernet_subsys(&ioam6_net_ops); +} diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c new file mode 100644 index 000000000000..f9ee04541c17 --- /dev/null +++ b/net/ipv6/ioam6_iptunnel.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IPv6 IOAM Lightweight Tunnel implementation + * + * Author: + * Justin Iurman <justin.iurman@uliege.be> + */ + +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/netlink.h> +#include <linux/in6.h> +#include <linux/ioam6.h> +#include <linux/ioam6_iptunnel.h> +#include <net/dst.h> +#include <net/sock.h> +#include <net/lwtunnel.h> +#include <net/ioam6.h> + +#define IOAM6_MASK_SHORT_FIELDS 0xff100000 +#define IOAM6_MASK_WIDE_FIELDS 0xe00000 + +struct ioam6_lwt_encap { + struct ipv6_hopopt_hdr eh; + u8 pad[2]; /* 2-octet padding for 4n-alignment */ + struct ioam6_hdr ioamh; + struct ioam6_trace_hdr traceh; +} __packed; + +struct ioam6_lwt { + struct ioam6_lwt_encap tuninfo; +}; + +static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) +{ + return (struct ioam6_lwt *)lwt->data; +} + +static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) +{ + return &ioam6_lwt_state(lwt)->tuninfo; +} + +static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) +{ + return &(ioam6_lwt_state(lwt)->tuninfo.traceh); +} + +static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { + [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), +}; + +static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, + struct ioam6_trace_hdr *trace) +{ + struct ioam6_trace_hdr *data; + struct nlattr *nla; + int len; + + len = sizeof(*trace); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, trace, len); + + return 0; +} + +static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) +{ + u32 fields; + + if (!trace->type_be32 || !trace->remlen || + trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4) + return false; + + trace->nodelen = 0; + fields = be32_to_cpu(trace->type_be32); + + trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) + * (sizeof(__be32) / 4); + trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) + * (sizeof(__be64) / 4); + + return true; +} + +static int ioam6_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; + struct ioam6_lwt_encap *tuninfo; + struct ioam6_trace_hdr *trace; + struct lwtunnel_state *s; + int len_aligned; + int len, err; + + if (family != AF_INET6) + return -EINVAL; + + err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla, + ioam6_iptunnel_policy, extack); + if (err < 0) + return err; + + if (!tb[IOAM6_IPTUNNEL_TRACE]) { + NL_SET_ERR_MSG(extack, "missing trace"); + return -EINVAL; + } + + trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]); + if (!ioam6_validate_trace_hdr(trace)) { + NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE], + "invalid trace validation"); + return -EINVAL; + } + + len = sizeof(*tuninfo) + trace->remlen * 4; + len_aligned = ALIGN(len, 8); + + s = lwtunnel_state_alloc(len_aligned); + if (!s) + return -ENOMEM; + + tuninfo = ioam6_lwt_info(s); + tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; + tuninfo->pad[0] = IPV6_TLV_PADN; + tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; + tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; + tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace) + + trace->remlen * 4; + + memcpy(&tuninfo->traceh, trace, sizeof(*trace)); + + len = len_aligned - len; + if (len == 1) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; + } else if (len > 0) { + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; + tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; + } + + s->type = LWTUNNEL_ENCAP_IOAM6; + s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + + *ts = s; + + return 0; +} + +static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) +{ + struct ioam6_trace_hdr *trace; + struct ipv6hdr *oldhdr, *hdr; + struct ioam6_namespace *ns; + int hdrlen, err; + + hdrlen = (tuninfo->eh.hdrlen + 1) << 3; + + err = skb_cow_head(skb, hdrlen + skb->mac_len); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + skb_pull(skb, sizeof(*oldhdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr)); + + skb_push(skb, sizeof(*oldhdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + memmove(hdr, oldhdr, sizeof(*oldhdr)); + tuninfo->eh.nexthdr = hdr->nexthdr; + + skb_set_transport_header(skb, sizeof(*hdr)); + skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen); + + memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); + + hdr->nexthdr = NEXTHDR_HOP; + hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); + + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) + + sizeof(struct ipv6_hopopt_hdr) + 2 + + sizeof(struct ioam6_hdr)); + + ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); + if (ns) + ioam6_fill_trace_data(skb, ns, trace); + + return 0; +} + +static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; + int err = -EINVAL; + + if (skb->protocol != htons(ETH_P_IPV6)) + goto drop; + + /* Only for packets we send and + * that do not contain a Hop-by-Hop yet + */ + if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) + goto out; + + err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); + if (unlikely(err)) + goto drop; + + err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); + if (unlikely(err)) + goto drop; + +out: + return lwt->orig_output(net, sk, skb); + +drop: + kfree_skb(skb); + return err; +} + +static int ioam6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) + return -EMSGSIZE; + + return 0; +} + +static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); + + return nla_total_size(sizeof(*trace)); +} + +static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); + struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); + + return (a_hdr->namespace_id != b_hdr->namespace_id); +} + +static const struct lwtunnel_encap_ops ioam6_iptun_ops = { + .build_state = ioam6_build_state, + .output = ioam6_output, + .fill_encap = ioam6_fill_encap_info, + .get_encap_size = ioam6_encap_nlsize, + .cmp_encap = ioam6_encap_cmp, + .owner = THIS_MODULE, +}; + +int __init ioam6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} + +void ioam6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); +} diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2d650dc24349..1bec5b22f80d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1341,7 +1341,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, lockdep_is_held(&rt->fib6_table->tb6_lock)); - /* paired with smp_rmb() in rt6_get_cookie_safe() */ + /* paired with smp_rmb() in fib6_get_cookie_safe() */ smp_wmb(); while (fn) { fn->fn_sernum = sernum; @@ -2449,8 +2449,8 @@ int __init fib6_init(void) int ret = -ENOMEM; fib6_node_kmem = kmem_cache_create("fib6_nodes", - sizeof(struct fib6_node), - 0, SLAB_HWCACHE_ALIGN, + sizeof(struct fib6_node), 0, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!fib6_node_kmem) goto out; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bc224f917bbd..7baf41d160f5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -629,6 +629,8 @@ drop: static int gre_handle_offloads(struct sk_buff *skb, bool csum) { + if (csum && skb_checksum_start(skb) < skb->data) + return -EINVAL; return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } @@ -1244,8 +1246,9 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, memcpy(u->name, p->name, sizeof(u->name)); } -static int ip6gre_tunnel_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) +static int ip6gre_tunnel_siocdevprivate(struct net_device *dev, + struct ifreq *ifr, void __user *data, + int cmd) { int err = 0; struct ip6_tnl_parm2 p; @@ -1259,7 +1262,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, switch (cmd) { case SIOCGETTUNNEL: if (dev == ign->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -1270,7 +1273,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; @@ -1281,7 +1284,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, goto done; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) goto done; err = -EINVAL; @@ -1318,7 +1321,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); @@ -1331,7 +1334,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, if (dev == ign->fb_tunnel_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) goto done; err = -ENOENT; ip6gre_tnl_parm_from_user(&p1, &p); @@ -1398,7 +1401,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_init = ip6gre_tunnel_init, .ndo_uninit = ip6gre_tunnel_uninit, .ndo_start_xmit = ip6gre_tunnel_xmit, - .ndo_do_ioctl = ip6gre_tunnel_ioctl, + .ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e6ca9ad6812..12f985f43bcc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); - int delta = hh_len - skb_headroom(skb); - const struct in6_addr *nexthop; + const struct in6_addr *daddr, *nexthop; + struct ipv6hdr *hdr; struct neighbour *neigh; int ret; /* Be paranoid, rather than too clever. */ - if (unlikely(delta > 0) && dev->header_ops) { - /* pskb_expand_head() might crash, if skb is shared */ - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - - if (likely(nskb)) { - if (skb->sk) - skb_set_owner_w(nskb, skb->sk); - consume_skb(skb); - } else { - kfree_skb(skb); - } - skb = nskb; - } - if (skb && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - skb = NULL; - } + if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { + skb = skb_expand_head(skb, hh_len); if (!skb) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOMEM; } } - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - + hdr = ipv6_hdr(skb); + daddr = &hdr->daddr; + if (ipv6_addr_is_multicast(daddr)) { if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || - ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr))) { + ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing @@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); - if (ipv6_hdr(skb)->hop_limit == 0) { + if (hdr->hop_limit == 0) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); - - if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= - IPV6_ADDR_SCOPE_NODELOCAL && + if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && !(dev->flags & IFF_LOOPBACK)) { kfree_skb(skb); return 0; @@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); - neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); + neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) - neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); @@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } rcu_read_unlock_bh(); - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } @@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; @@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, int hlimit = -1; u32 mtu; - head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; - if (unlikely(skb_headroom(skb) < head_room)) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); + if (unlikely(head_room > skb_headroom(skb))) { + skb = skb_expand_head(skb, head_room); + if (!skb) { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOBUFS; } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } if (opt) { @@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUT, skb->len); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * we promote our socket to non const */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, (struct sock *)sk, skb, NULL, dst->dev, + net, (struct sock *)sk, skb, NULL, dev, dst_output); } - skb->dev = dst->dev; + skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const */ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -608,7 +584,7 @@ int ip6_forward(struct sk_buff *skb) } } - mtu = ip6_dst_mtu_forward(dst); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 322698d9fcf4..20a67efda47f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1581,9 +1581,10 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) } /** - * ip6_tnl_ioctl - configure ipv6 tunnels from userspace + * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel - * @ifr: parameters passed from userspace + * @ifr: unused + * @data: parameters passed from userspace * @cmd: command to be performed * * Description: @@ -1609,7 +1610,8 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) **/ static int -ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm p; @@ -1623,7 +1625,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -1635,9 +1637,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) memset(&p, 0, sizeof(p)); } ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) { + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; - } break; case SIOCADDTUNNEL: case SIOCCHGTUNNEL: @@ -1645,7 +1646,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && @@ -1669,7 +1670,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!IS_ERR(t)) { err = 0; ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else { @@ -1683,7 +1684,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); @@ -1802,7 +1803,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_start_xmit, - .ndo_do_ioctl = ip6_tnl_ioctl, + .ndo_siocdevprivate = ip6_tnl_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 2d048e21abbb..1d8e3ffa225d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -771,13 +771,14 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) } /** - * vti6_ioctl - configure vti6 tunnels from userspace + * vti6_siocdevprivate - configure vti6 tunnels from userspace * @dev: virtual device associated with tunnel - * @ifr: parameters passed from userspace + * @ifr: unused + * @data: parameters passed from userspace * @cmd: command to be performed * * Description: - * vti6_ioctl() is used for managing vti6 tunnels + * vti6_siocdevprivate() is used for managing vti6 tunnels * from userspace. * * The possible commands are the following: @@ -798,7 +799,7 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) * %-ENODEV if attempting to change or delete a nonexisting device **/ static int -vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm2 p; @@ -810,7 +811,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } @@ -822,7 +823,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!t) t = netdev_priv(dev); vti6_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; case SIOCADDTUNNEL: @@ -831,7 +832,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != 0) @@ -852,7 +853,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (t) { err = 0; vti6_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else @@ -865,7 +866,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; vti6_parm_from_user(&p1, &p); @@ -890,7 +891,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, - .ndo_do_ioctl = vti6_ioctl, + .ndo_siocdevprivate = vti6_siocdevprivate, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 06b0d2c329b9..36ed9efb8825 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) reg_dev = mrt->vif_table[reg_vif_num].dev; - if (reg_dev) - dev_hold(reg_dev); + dev_hold(reg_dev); read_unlock(&mrt_lock); if (!reg_dev) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a6804a7e34c1..e4bdb09c5586 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) goto out_free_gsf; - ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); + ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return ret; @@ -234,7 +234,7 @@ out_free_gsf: static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; void *p; int ret; @@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ ret = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_p; @@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_p; ret = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_p; ret = ip6_mc_msfilter(sk, &(struct group_filter){ .gf_interface = gf32->gf_interface, .gf_group = gf32->gf_group, .gf_fmode = gf32->gf_fmode, - .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex); out_free_p: kfree(p); @@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, static int ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex); if (!err) { if (num > gsf.gf_numsrc) num = gsf.gf_numsrc; @@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; lock_sock(sk); - err = ip6_mc_msfget(sk, &gf, p->gf_slist); + err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex); release_sock(sk); if (err) return err; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 54ec163fbafa..cd951faa2fac 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) { for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } psl = newpsl; @@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } if (gsf->gf_numsrc) { - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), - GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, + gsf->gf_numsrc), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { mutex_unlock(&idev->mc_lock); - sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); + sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr, + newpsl->sl_max)); goto done; } mutex_unlock(&idev->mc_lock); @@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); @@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, psl->sl_count, psl->sl_addr, 0); RCU_INIT_POINTER(iml->sflist, NULL); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c467c6419893..4b098521a44c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1391,12 +1391,6 @@ skip_defrtr: } } - /* - * Send a notify if RA changed managed/otherconf flags or timer settings - */ - if (send_ifinfo_notify) - inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); - skip_linkparms: /* @@ -1496,6 +1490,11 @@ skip_routeinfo: memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); + if (in6_dev->ra_mtu != mtu) { + in6_dev->ra_mtu = mtu; + send_ifinfo_notify = true; + } + if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (in6_dev->cnf.mtu6 != mtu) { @@ -1519,6 +1518,12 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: + /* Send a notify if RA changed managed/otherconf flags or + * timer settings or ra_mtu value + */ + if (send_ifinfo_notify) + inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); + fib6_info_release(rt); if (neigh) neigh_release(neigh); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index bb784ea7bbd3..727ee8097012 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -19,15 +19,12 @@ MODULE_DESCRIPTION("ip6tables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) -static int __net_init ip6table_filter_table_init(struct net *net); - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_FILTER, - .table_init = ip6table_filter_table_init, }; /* The work comes in here from netfilter.c. */ @@ -44,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly; static bool forward = true; module_param(forward, bool, 0000); -static int __net_init ip6table_filter_table_init(struct net *net) +static int ip6table_filter_table_init(struct net *net) { struct ip6t_replace *repl; int err; @@ -63,7 +60,7 @@ static int __net_init ip6table_filter_table_init(struct net *net) static int __net_init ip6table_filter_net_init(struct net *net) { - if (net == &init_net || !forward) + if (!forward) return ip6table_filter_table_init(net); return 0; @@ -87,15 +84,24 @@ static struct pernet_operations ip6table_filter_net_ops = { static int __init ip6table_filter_init(void) { - int ret; + int ret = xt_register_template(&packet_filter, + ip6table_filter_table_init); + + if (ret < 0) + return ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); - if (IS_ERR(filter_ops)) + if (IS_ERR(filter_ops)) { + xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); + } ret = register_pernet_subsys(&ip6table_filter_net_ops); - if (ret < 0) + if (ret < 0) { + xt_unregister_template(&packet_filter); kfree(filter_ops); + return ret; + } return ret; } @@ -103,6 +109,7 @@ static int __init ip6table_filter_init(void) static void __exit ip6table_filter_fini(void) { unregister_pernet_subsys(&ip6table_filter_net_ops); + xt_unregister_template(&packet_filter); kfree(filter_ops); } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c76cffd63041..9b518ce37d6a 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -20,15 +20,12 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -static int __net_init ip6table_mangle_table_init(struct net *net); - static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_MANGLE, - .table_init = ip6table_mangle_table_init, }; static unsigned int @@ -76,7 +73,7 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, } static struct nf_hook_ops *mangle_ops __read_mostly; -static int __net_init ip6table_mangle_table_init(struct net *net) +static int ip6table_mangle_table_init(struct net *net) { struct ip6t_replace *repl; int ret; @@ -106,29 +103,32 @@ static struct pernet_operations ip6table_mangle_net_ops = { static int __init ip6table_mangle_init(void) { - int ret; + int ret = xt_register_template(&packet_mangler, + ip6table_mangle_table_init); + + if (ret < 0) + return ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); - if (IS_ERR(mangle_ops)) + if (IS_ERR(mangle_ops)) { + xt_unregister_template(&packet_mangler); return PTR_ERR(mangle_ops); + } ret = register_pernet_subsys(&ip6table_mangle_net_ops); if (ret < 0) { + xt_unregister_template(&packet_mangler); kfree(mangle_ops); return ret; } - ret = ip6table_mangle_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&ip6table_mangle_net_ops); - kfree(mangle_ops); - } return ret; } static void __exit ip6table_mangle_fini(void) { unregister_pernet_subsys(&ip6table_mangle_net_ops); + xt_unregister_template(&packet_mangler); kfree(mangle_ops); } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index b0292251e655..921c1723a01e 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -19,8 +19,6 @@ struct ip6table_nat_pernet { struct nf_hook_ops *nf_nat_ops; }; -static int __net_init ip6table_nat_table_init(struct net *net); - static unsigned int ip6table_nat_net_id __read_mostly; static const struct xt_table nf_nat_ipv6_table = { @@ -31,7 +29,6 @@ static const struct xt_table nf_nat_ipv6_table = { (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, .af = NFPROTO_IPV6, - .table_init = ip6table_nat_table_init, }; static unsigned int ip6table_nat_do_chain(void *priv, @@ -115,7 +112,7 @@ static void ip6t_nat_unregister_lookups(struct net *net) kfree(ops); } -static int __net_init ip6table_nat_table_init(struct net *net) +static int ip6table_nat_table_init(struct net *net) { struct ip6t_replace *repl; int ret; @@ -157,20 +154,23 @@ static struct pernet_operations ip6table_nat_net_ops = { static int __init ip6table_nat_init(void) { - int ret = register_pernet_subsys(&ip6table_nat_net_ops); + int ret = xt_register_template(&nf_nat_ipv6_table, + ip6table_nat_table_init); - if (ret) + if (ret < 0) return ret; - ret = ip6table_nat_table_init(&init_net); + ret = register_pernet_subsys(&ip6table_nat_net_ops); if (ret) - unregister_pernet_subsys(&ip6table_nat_net_ops); + xt_unregister_template(&nf_nat_ipv6_table); + return ret; } static void __exit ip6table_nat_exit(void) { unregister_pernet_subsys(&ip6table_nat_net_ops); + xt_unregister_template(&nf_nat_ipv6_table); } module_init(ip6table_nat_init); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index f63c106c521e..4f2a04af71d3 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -11,8 +11,6 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static int __net_init ip6table_raw_table_init(struct net *net); - static bool raw_before_defrag __read_mostly; MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag"); module_param(raw_before_defrag, bool, 0000); @@ -23,7 +21,6 @@ static const struct xt_table packet_raw = { .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_RAW, - .table_init = ip6table_raw_table_init, }; static const struct xt_table packet_raw_before_defrag = { @@ -32,7 +29,6 @@ static const struct xt_table packet_raw_before_defrag = { .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG, - .table_init = ip6table_raw_table_init, }; /* The work comes in here from netfilter.c. */ @@ -45,7 +41,7 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *rawtable_ops __read_mostly; -static int __net_init ip6table_raw_table_init(struct net *net) +static int ip6table_raw_table_init(struct net *net) { struct ip6t_replace *repl; const struct xt_table *table = &packet_raw; @@ -79,37 +75,39 @@ static struct pernet_operations ip6table_raw_net_ops = { static int __init ip6table_raw_init(void) { - int ret; const struct xt_table *table = &packet_raw; + int ret; if (raw_before_defrag) { table = &packet_raw_before_defrag; - pr_info("Enabling raw table before defrag\n"); } + ret = xt_register_template(table, ip6table_raw_table_init); + if (ret < 0) + return ret; + /* Register hooks */ rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook); - if (IS_ERR(rawtable_ops)) + if (IS_ERR(rawtable_ops)) { + xt_unregister_template(table); return PTR_ERR(rawtable_ops); + } ret = register_pernet_subsys(&ip6table_raw_net_ops); if (ret < 0) { kfree(rawtable_ops); + xt_unregister_template(table); return ret; } - ret = ip6table_raw_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&ip6table_raw_net_ops); - kfree(rawtable_ops); - } return ret; } static void __exit ip6table_raw_fini(void) { unregister_pernet_subsys(&ip6table_raw_net_ops); + xt_unregister_template(&packet_raw); kfree(rawtable_ops); } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 8dc335cf450b..931674034d8b 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -24,15 +24,12 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static int __net_init ip6table_security_table_init(struct net *net); - static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_SECURITY, - .table_init = ip6table_security_table_init, }; static unsigned int @@ -44,7 +41,7 @@ ip6table_security_hook(void *priv, struct sk_buff *skb, static struct nf_hook_ops *sectbl_ops __read_mostly; -static int __net_init ip6table_security_table_init(struct net *net) +static int ip6table_security_table_init(struct net *net) { struct ip6t_replace *repl; int ret; @@ -74,29 +71,32 @@ static struct pernet_operations ip6table_security_net_ops = { static int __init ip6table_security_init(void) { - int ret; + int ret = xt_register_template(&security_table, + ip6table_security_table_init); + + if (ret < 0) + return ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); - if (IS_ERR(sectbl_ops)) + if (IS_ERR(sectbl_ops)) { + xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); + } ret = register_pernet_subsys(&ip6table_security_net_ops); if (ret < 0) { kfree(sectbl_ops); + xt_unregister_template(&security_table); return ret; } - ret = ip6table_security_table_init(&init_net); - if (ret) { - unregister_pernet_subsys(&ip6table_security_net_ops); - kfree(sectbl_ops); - } return ret; } static void __exit ip6table_security_fini(void) { unregister_pernet_subsys(&ip6table_security_net_ops); + xt_unregister_template(&security_table); kfree(sectbl_ops); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b6ddf23d3833..dbc224023977 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -41,6 +41,7 @@ #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/jhash.h> +#include <linux/siphash.h> #include <net/net_namespace.h> #include <net/snmp.h> #include <net/ipv6.h> @@ -1484,17 +1485,24 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) static u32 rt6_exception_hash(const struct in6_addr *dst, const struct in6_addr *src) { - static u32 seed __read_mostly; - u32 val; + static siphash_key_t rt6_exception_key __read_mostly; + struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + }; + u64 val; - net_get_random_once(&seed, sizeof(seed)); - val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed); + net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key)); #ifdef CONFIG_IPV6_SUBTREES if (src) - val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val); + combined.src = *src; #endif - return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); + val = siphash(&combined, sizeof(combined), &rt6_exception_key); + + return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); } /* Helper function to find the cached rt in the hash table @@ -1649,6 +1657,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; struct fib6_nh *nh = res->nh; + int max_depth; int err = 0; spin_lock_bh(&rt6_exception_lock); @@ -1703,7 +1712,9 @@ static int rt6_insert_exception(struct rt6_info *nrt, bucket->depth++; net->ipv6.rt6_stats->fib_rt_cache++; - if (bucket->depth > FIB6_MAX_DEPTH) + /* Randomize max depth to avoid some side channels attacks. */ + max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH); + while (bucket->depth > max_depth) rt6_exception_remove_oldest(bucket); out: @@ -3201,25 +3212,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst) { - struct inet6_dev *idev; - unsigned int mtu; - - mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu) - goto out; - - mtu = IPV6_MIN_MTU; - - rcu_read_lock(); - idev = __in6_dev_get(dst->dev); - if (idev) - mtu = idev->cnf.mtu6; - rcu_read_unlock(); - -out: - mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); - - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + return ip6_dst_mtu_maybe_forward(dst, false); } EXPORT_INDIRECT_CALLABLE(ip6_mtu); @@ -3644,8 +3637,7 @@ out: if (err) { lwtstate_put(fib6_nh->fib_nh_lws); fib6_nh->fib_nh_lws = NULL; - if (dev) - dev_put(dev); + dev_put(dev); } return err; @@ -6638,7 +6630,7 @@ int __init ip6_route_init(void) ret = -ENOMEM; ip6_dst_ops_template.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); if (!ip6_dst_ops_template.kmem_cachep) goto out; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 897fa59c47de..1bf5f5ae75ac 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -26,6 +26,7 @@ #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif +#include <linux/netfilter.h> static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) { @@ -295,11 +296,19 @@ static int seg6_do_srh(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + nf_reset_ct(skb); return 0; } -static int seg6_input(struct sk_buff *skb) +static int seg6_input_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dst_input(skb); +} + +static int seg6_input_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; @@ -337,10 +346,41 @@ static int seg6_input(struct sk_buff *skb) if (unlikely(err)) return err; - return dst_input(skb); + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, seg6_input_finish); + + return seg6_input_finish(dev_net(skb->dev), NULL, skb); } -static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int seg6_input_nf(struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + struct net *net = dev_net(skb->dev); + + switch (skb->protocol) { + case htons(ETH_P_IP): + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, + skb, NULL, dev, seg6_input_core); + case htons(ETH_P_IPV6): + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, + skb, NULL, dev, seg6_input_core); + } + + return -EINVAL; +} + +static int seg6_input(struct sk_buff *skb) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return seg6_input_nf(skb); + + return seg6_input_core(dev_net(skb->dev), NULL, skb); +} + +static int seg6_output_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; @@ -387,12 +427,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (unlikely(err)) goto drop; + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, skb_dst(skb)->dev, dst_output); + return dst_output(net, sk, skb); drop: kfree_skb(skb); return err; } +static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + + switch (skb->protocol) { + case htons(ETH_P_IP): + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, + NULL, dev, seg6_output_core); + case htons(ETH_P_IPV6): + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, + NULL, dev, seg6_output_core); + } + + return -EINVAL; +} + +static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return seg6_output_nf(net, sk, skb); + + return seg6_output_core(net, sk, skb); +} + static int seg6_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 60bf3b877957..2dc40b3f373e 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -30,6 +30,7 @@ #include <net/seg6_local.h> #include <linux/etherdevice.h> #include <linux/bpf.h> +#include <linux/netfilter.h> #define SEG6_F_ATTR(i) BIT(i) @@ -413,12 +414,33 @@ drop: return -EINVAL; } +static int input_action_end_dx6_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct in6_addr *nhaddr = NULL; + struct seg6_local_lwt *slwt; + + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); + + /* The inner packet is not associated to any local interface, + * so we do not call netif_rx(). + * + * If slwt->nh6 is set to ::, then lookup the nexthop for the + * inner packet's DA. Otherwise, use the specified nexthop. + */ + if (!ipv6_addr_any(&slwt->nh6)) + nhaddr = &slwt->nh6; + + seg6_lookup_nexthop(skb, nhaddr, 0); + + return dst_input(skb); +} + /* decapsulate and forward to specified nexthop */ static int input_action_end_dx6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - struct in6_addr *nhaddr = NULL; - /* this function accepts IPv6 encapsulated packets, with either * an SRH with SL=0, or no SRH. */ @@ -429,40 +451,30 @@ static int input_action_end_dx6(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; - /* The inner packet is not associated to any local interface, - * so we do not call netif_rx(). - * - * If slwt->nh6 is set to ::, then lookup the nexthop for the - * inner packet's DA. Otherwise, use the specified nexthop. - */ - - if (!ipv6_addr_any(&slwt->nh6)) - nhaddr = &slwt->nh6; - skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + nf_reset_ct(skb); - seg6_lookup_nexthop(skb, nhaddr, 0); + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, input_action_end_dx6_finish); - return dst_input(skb); + return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; } -static int input_action_end_dx4(struct sk_buff *skb, - struct seg6_local_lwt *slwt) +static int input_action_end_dx4_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) { + struct dst_entry *orig_dst = skb_dst(skb); + struct seg6_local_lwt *slwt; struct iphdr *iph; __be32 nhaddr; int err; - if (!decap_and_validate(skb, IPPROTO_IPIP)) - goto drop; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto drop; - - skb->protocol = htons(ETH_P_IP); + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); iph = ip_hdr(skb); @@ -470,14 +482,34 @@ static int input_action_end_dx4(struct sk_buff *skb, skb_dst_drop(skb); - skb_set_transport_header(skb, sizeof(struct iphdr)); - err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); - if (err) - goto drop; + if (err) { + kfree_skb(skb); + return -EINVAL; + } return dst_input(skb); +} + +static int input_action_end_dx4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb->protocol = htons(ETH_P_IP); + skb_set_transport_header(skb, sizeof(struct iphdr)); + nf_reset_ct(skb); + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, input_action_end_dx4_finish); + return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; @@ -645,6 +677,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, skb_dst_drop(skb); skb_set_transport_header(skb, hdrlen); + nf_reset_ct(skb); return end_dt_vrf_rcv(skb, family, vrf); @@ -1078,7 +1111,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt, u64_stats_update_end(&pcounters->syncp); } -static int seg6_local_input(struct sk_buff *skb) +static int seg6_local_input_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct seg6_action_desc *desc; @@ -1086,11 +1120,6 @@ static int seg6_local_input(struct sk_buff *skb) unsigned int len = skb->len; int rc; - if (skb->protocol != htons(ETH_P_IPV6)) { - kfree_skb(skb); - return -EINVAL; - } - slwt = seg6_local_lwtunnel(orig_dst->lwtstate); desc = slwt->desc; @@ -1104,6 +1133,21 @@ static int seg6_local_input(struct sk_buff *skb) return rc; } +static int seg6_local_input(struct sk_buff *skb) +{ + if (skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return -EINVAL; + } + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + seg6_local_input_core); + + return seg6_local_input_core(dev_net(skb->dev), NULL, skb); +} + static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index df5bea818410..ef0c7a7c18e2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -299,9 +299,8 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) +static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a) { - struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data; struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; @@ -321,7 +320,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) * we try harder to allocate. */ kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ? - kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) : + kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) : NULL; rcu_read_lock(); @@ -334,7 +333,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) * For root users, retry allocating enough memory for * the answer. */ - kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC); + kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT | + __GFP_NOWARN); if (!kp) { ret = -ENOMEM; goto out; @@ -453,8 +453,8 @@ out: return err; } -static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, - int cmd) +static int ipip6_tunnel_prl_ctl(struct net_device *dev, + struct ip_tunnel_prl __user *data, int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl prl; @@ -465,7 +465,7 @@ static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; - if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) + if (copy_from_user(&prl, data, sizeof(prl))) return -EFAULT; switch (cmd) { @@ -1197,14 +1197,14 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, } static int -ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) +ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_6rd ip6rd; struct ip_tunnel_parm p; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + if (copy_from_user(&p, data, sizeof(p))) return -EFAULT; t = ipip6_tunnel_locate(t->net, &p, 0); } @@ -1215,13 +1215,14 @@ ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) ip6rd.relay_prefix = t->ip6rd.relay_prefix; ip6rd.prefixlen = t->ip6rd.prefixlen; ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd))) + if (copy_to_user(data, &ip6rd, sizeof(ip6rd))) return -EFAULT; return 0; } static int -ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data, + int cmd) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_6rd ip6rd; @@ -1229,7 +1230,7 @@ ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd))) + if (copy_from_user(&ip6rd, data, sizeof(ip6rd))) return -EFAULT; if (cmd != SIOCDEL6RD) { @@ -1368,27 +1369,28 @@ ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) } static int -ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, + void __user *data, int cmd) { switch (cmd) { case SIOCGETTUNNEL: case SIOCADDTUNNEL: case SIOCCHGTUNNEL: case SIOCDELTUNNEL: - return ip_tunnel_ioctl(dev, ifr, cmd); + return ip_tunnel_siocdevprivate(dev, ifr, data, cmd); case SIOCGETPRL: - return ipip6_tunnel_get_prl(dev, ifr); + return ipip6_tunnel_get_prl(dev, data); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: - return ipip6_tunnel_prl_ctl(dev, ifr, cmd); + return ipip6_tunnel_prl_ctl(dev, data, cmd); #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: - return ipip6_tunnel_get6rd(dev, ifr); + return ipip6_tunnel_get6rd(dev, data); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: - return ipip6_tunnel_6rdctl(dev, ifr, cmd); + return ipip6_tunnel_6rdctl(dev, data, cmd); #endif default: return -EINVAL; @@ -1399,7 +1401,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, - .ndo_do_ioctl = ipip6_tunnel_ioctl, + .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip_tunnel_get_iflink, .ndo_tunnel_ctl = ipip6_tunnel_ctl, diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d7cf26f730d7..d53dd142bf87 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -21,6 +21,7 @@ #ifdef CONFIG_NETLABEL #include <net/calipso.h> #endif +#include <linux/ioam6.h> static int two = 2; static int three = 3; @@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; +static u32 ioam6_id_max = IOAM6_DEFAULT_ID; +static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "ioam6_id", + .data = &init_net.ipv6.sysctl.ioam6_id, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra2 = &ioam6_id_max, + }, + { + .procname = "ioam6_id_wide", + .data = &init_net.ipv6.sysctl.ioam6_id_wide, + .maxlen = sizeof(u64), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra2 = &ioam6_id_wide_max, + }, { } }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c5e15e94bb00..ea53847b5b7e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1475,7 +1475,7 @@ do_udp_sendmsg: fl6.saddr = np->saddr; fl6.fl6_sport = inet->inet_sport; - if (cgroup_bpf_enabled(BPF_CGROUP_UDP6_SENDMSG) && !connected) { + if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) { err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, (struct sockaddr *)sin6, &fl6.saddr); if (err) |