diff options
author | David S. Miller <davem@davemloft.net> | 2012-11-30 12:01:30 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-11-30 12:01:30 -0500 |
commit | e7165030db8e932a9a968f7015cd3b2e984f8e7c (patch) | |
tree | ab46a0baf25f72b7001bb4673ba47534b81a0d2d | |
parent | bb728820fe7c42fdb838ab2745fb5fe6b18b5ffa (diff) | |
parent | 92eb1d477145b2e7780b5002e856f70b8c3d74da (diff) | |
download | linux-stable-e7165030db8e932a9a968f7015cd3b2e984f8e7c.tar.gz linux-stable-e7165030db8e932a9a968f7015cd3b2e984f8e7c.tar.bz2 linux-stable-e7165030db8e932a9a968f7015cd3b2e984f8e7c.zip |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
Conflicts:
net/ipv6/exthdrs_core.c
Jesse Gross says:
====================
This series of improvements for 3.8/net-next contains four components:
* Support for modifying IPv6 headers
* Support for matching and setting skb->mark for better integration with
things like iptables
* Ability to recognize the EtherType for RARP packets
* Two small performance enhancements
The movement of ipv6_find_hdr() into exthdrs_core.c causes two small merge
conflicts. I left it as is but can do the merge if you want. The conflicts
are:
* ipv6_find_hdr() and ipv6_find_tlv() were both moved to the bottom of
exthdrs_core.c. Both should stay.
* A new use of ipv6_find_hdr() was added to net/netfilter/ipvs/ip_vs_core.c
after this patch. The IPVS user has two instances of the old constant
name IP6T_FH_F_FRAG which has been renamed to IP6_FH_F_FRAG.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netfilter_ipv6/ip6_tables.h | 9 | ||||
-rw-r--r-- | include/linux/openvswitch.h | 1 | ||||
-rw-r--r-- | include/net/ipv6.h | 10 | ||||
-rw-r--r-- | net/ipv6/exthdrs_core.c | 124 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 103 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 4 | ||||
-rw-r--r-- | net/netfilter/xt_HMARK.c | 8 | ||||
-rw-r--r-- | net/openvswitch/actions.c | 97 | ||||
-rw-r--r-- | net/openvswitch/datapath.c | 27 | ||||
-rw-r--r-- | net/openvswitch/flow.c | 28 | ||||
-rw-r--r-- | net/openvswitch/flow.h | 8 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.c | 14 | ||||
-rw-r--r-- | net/openvswitch/vport-netdev.h | 3 | ||||
-rw-r--r-- | net/openvswitch/vport.c | 5 |
14 files changed, 307 insertions, 134 deletions
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 5f84c6229dc6..610208b18c05 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -47,15 +47,6 @@ ip6t_ext_hdr(u8 nexthdr) (nexthdr == IPPROTO_DSTOPTS); } -enum { - IP6T_FH_F_FRAG = (1 << 0), - IP6T_FH_F_AUTH = (1 << 1), -}; - -/* find specified header and get offset to it */ -extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, - int target, unsigned short *fragoff, int *fragflg); - #ifdef CONFIG_COMPAT #include <net/compat.h> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index eb1efa54fe84..d42e174bd0c8 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -243,6 +243,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ + OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ __OVS_KEY_ATTR_MAX }; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 979bf6c13141..acbd8e034310 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -630,6 +630,16 @@ extern int ipv6_skip_exthdr(const struct sk_buff *, int start, extern bool ipv6_ext_hdr(u8 nexthdr); +enum { + IP6_FH_F_FRAG = (1 << 0), + IP6_FH_F_AUTH = (1 << 1), + IP6_FH_F_SKIP_RH = (1 << 2), +}; + +/* find specified header and get offset to it */ +extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, + int target, unsigned short *fragoff, int *fragflg); + extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type); extern struct in6_addr *fl6_update_dst(struct flowi6 *fl6, diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index e7d756e19d1d..c5e83fae4df4 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -155,3 +155,127 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) return -1; } EXPORT_SYMBOL_GPL(ipv6_find_tlv); + +/* + * find the offset to specified header or the protocol number of last header + * if target < 0. "last header" is transport protocol header, ESP, or + * "No next header". + * + * Note that *offset is used as input/output parameter. an if it is not zero, + * then it must be a valid offset to an inner IPv6 header. This can be used + * to explore inner IPv6 header, eg. ICMPv6 error messages. + * + * If target header is found, its offset is set in *offset and return protocol + * number. Otherwise, return -1. + * + * If the first fragment doesn't contain the final protocol header or + * NEXTHDR_NONE it is considered invalid. + * + * Note that non-1st fragment is special case that "the protocol number + * of last header" is "next header" field in Fragment header. In this case, + * *offset is meaningless and fragment offset is stored in *fragoff if fragoff + * isn't NULL. + * + * if flags is not NULL and it's a fragment, then the frag flag + * IP6_FH_F_FRAG will be set. If it's an AH header, the + * IP6_FH_F_AUTH flag is set and target < 0, then this function will + * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this + * function will skip all those routing headers, where segements_left was 0. + */ +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, + int target, unsigned short *fragoff, int *flags) +{ + unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); + u8 nexthdr = ipv6_hdr(skb)->nexthdr; + unsigned int len; + bool found; + + if (fragoff) + *fragoff = 0; + + if (*offset) { + struct ipv6hdr _ip6, *ip6; + + ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); + if (!ip6 || (ip6->version != 6)) { + printk(KERN_ERR "IPv6 header not found\n"); + return -EBADMSG; + } + start = *offset + sizeof(struct ipv6hdr); + nexthdr = ip6->nexthdr; + } + len = skb->len - start; + + do { + struct ipv6_opt_hdr _hdr, *hp; + unsigned int hdrlen; + found = (nexthdr == target); + + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { + if (target < 0) + break; + return -ENOENT; + } + + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) + return -EBADMSG; + + if (nexthdr == NEXTHDR_ROUTING) { + struct ipv6_rt_hdr _rh, *rh; + + rh = skb_header_pointer(skb, start, sizeof(_rh), + &_rh); + if (rh == NULL) + return -EBADMSG; + + if (flags && (*flags & IP6_FH_F_SKIP_RH) && + rh->segments_left == 0) + found = false; + } + + if (nexthdr == NEXTHDR_FRAGMENT) { + unsigned short _frag_off; + __be16 *fp; + + if (flags) /* Indicate that this is a fragment */ + *flags |= IP6_FH_F_FRAG; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) + return -EBADMSG; + + _frag_off = ntohs(*fp) & ~0x7; + if (_frag_off) { + if (target < 0 && + ((!ipv6_ext_hdr(hp->nexthdr)) || + hp->nexthdr == NEXTHDR_NONE)) { + if (fragoff) + *fragoff = _frag_off; + return hp->nexthdr; + } + return -ENOENT; + } + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) { + if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0)) + break; + hdrlen = (hp->hdrlen + 2) << 2; + } else + hdrlen = ipv6_optlen(hp); + + if (!found) { + nexthdr = hp->nexthdr; + len -= hdrlen; + start += hdrlen; + } + } while (!found); + + *offset = start; + return nexthdr; +} +EXPORT_SYMBOL(ipv6_find_hdr); + diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 74cadd0719a5..125a90d6a795 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -2271,112 +2271,9 @@ static void __exit ip6_tables_fini(void) unregister_pernet_subsys(&ip6_tables_net_ops); } -/* - * find the offset to specified header or the protocol number of last header - * if target < 0. "last header" is transport protocol header, ESP, or - * "No next header". - * - * Note that *offset is used as input/output parameter. an if it is not zero, - * then it must be a valid offset to an inner IPv6 header. This can be used - * to explore inner IPv6 header, eg. ICMPv6 error messages. - * - * If target header is found, its offset is set in *offset and return protocol - * number. Otherwise, return -1. - * - * If the first fragment doesn't contain the final protocol header or - * NEXTHDR_NONE it is considered invalid. - * - * Note that non-1st fragment is special case that "the protocol number - * of last header" is "next header" field in Fragment header. In this case, - * *offset is meaningless and fragment offset is stored in *fragoff if fragoff - * isn't NULL. - * - * if flags is not NULL and it's a fragment, then the frag flag IP6T_FH_F_FRAG - * will be set. If it's an AH header, the IP6T_FH_F_AUTH flag is set and - * target < 0, then this function will stop at the AH header. - */ -int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, - int target, unsigned short *fragoff, int *flags) -{ - unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); - u8 nexthdr = ipv6_hdr(skb)->nexthdr; - unsigned int len; - - if (fragoff) - *fragoff = 0; - - if (*offset) { - struct ipv6hdr _ip6, *ip6; - - ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); - if (!ip6 || (ip6->version != 6)) { - printk(KERN_ERR "IPv6 header not found\n"); - return -EBADMSG; - } - start = *offset + sizeof(struct ipv6hdr); - nexthdr = ip6->nexthdr; - } - len = skb->len - start; - - while (nexthdr != target) { - struct ipv6_opt_hdr _hdr, *hp; - unsigned int hdrlen; - - if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { - if (target < 0) - break; - return -ENOENT; - } - - hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); - if (hp == NULL) - return -EBADMSG; - if (nexthdr == NEXTHDR_FRAGMENT) { - unsigned short _frag_off; - __be16 *fp; - - if (flags) /* Indicate that this is a fragment */ - *flags |= IP6T_FH_F_FRAG; - fp = skb_header_pointer(skb, - start+offsetof(struct frag_hdr, - frag_off), - sizeof(_frag_off), - &_frag_off); - if (fp == NULL) - return -EBADMSG; - - _frag_off = ntohs(*fp) & ~0x7; - if (_frag_off) { - if (target < 0 && - ((!ipv6_ext_hdr(hp->nexthdr)) || - hp->nexthdr == NEXTHDR_NONE)) { - if (fragoff) - *fragoff = _frag_off; - return hp->nexthdr; - } - return -ENOENT; - } - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) { - if (flags && (*flags & IP6T_FH_F_AUTH) && (target < 0)) - break; - hdrlen = (hp->hdrlen + 2) << 2; - } else - hdrlen = ipv6_optlen(hp); - - nexthdr = hp->nexthdr; - len -= hdrlen; - start += hdrlen; - } - - *offset = start; - return nexthdr; -} - EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); -EXPORT_SYMBOL(ipv6_find_hdr); module_init(ip6_tables_init); module_exit(ip6_tables_fini); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index fb45640dc1fb..47edf5a40a59 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -942,7 +942,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, /* Fragment header that is before ICMP header tells us that: * it's not an error message since they can't be fragmented. */ - if (ipvsh->flags & IP6T_FH_F_FRAG) + if (ipvsh->flags & IP6_FH_F_FRAG) return NF_DROP; IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n", @@ -1475,7 +1475,7 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, /* Fragment header that is before ICMP header tells us that: * it's not an error message since they can't be fragmented. */ - if (iph->flags & IP6T_FH_F_FRAG) + if (iph->flags & IP6_FH_F_FRAG) return NF_DROP; IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n", diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 1686ca1b53a1..73b73f687c58 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -167,7 +167,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, const struct xt_hmark_info *info) { struct ipv6hdr *ip6, _ip6; - int flag = IP6T_FH_F_AUTH; + int flag = IP6_FH_F_AUTH; unsigned int nhoff = 0; u16 fragoff = 0; int nexthdr; @@ -177,7 +177,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, if (nexthdr < 0) return 0; /* No need to check for icmp errors on fragments */ - if ((flag & IP6T_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) + if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) goto noicmp; /* Use inner header in case of ICMP errors */ if (get_inner6_hdr(skb, &nhoff)) { @@ -185,7 +185,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, if (ip6 == NULL) return -1; /* If AH present, use SPI like in ESP. */ - flag = IP6T_FH_F_AUTH; + flag = IP6_FH_F_AUTH; nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); if (nexthdr < 0) return -1; @@ -201,7 +201,7 @@ noicmp: if (t->proto == IPPROTO_ICMPV6) return 0; - if (flag & IP6T_FH_F_FRAG) + if (flag & IP6_FH_F_FRAG) return 0; hmark_set_tuple_ports(skb, nhoff, t, info); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 08114478cb85..ac2defeeba83 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -28,6 +28,7 @@ #include <linux/if_arp.h> #include <linux/if_vlan.h> #include <net/ip.h> +#include <net/ipv6.h> #include <net/checksum.h> #include <net/dsfield.h> @@ -162,6 +163,53 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, *addr = new_addr; } +static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, + __be32 addr[4], const __be32 new_addr[4]) +{ + int transport_len = skb->len - skb_transport_offset(skb); + + if (l4_proto == IPPROTO_TCP) { + if (likely(transport_len >= sizeof(struct tcphdr))) + inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, + addr, new_addr, 1); + } else if (l4_proto == IPPROTO_UDP) { + if (likely(transport_len >= sizeof(struct udphdr))) { + struct udphdr *uh = udp_hdr(skb); + + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace16(&uh->check, skb, + addr, new_addr, 1); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + } + } +} + +static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, + __be32 addr[4], const __be32 new_addr[4], + bool recalculate_csum) +{ + if (recalculate_csum) + update_ipv6_checksum(skb, l4_proto, addr, new_addr); + + skb->rxhash = 0; + memcpy(addr, new_addr, sizeof(__be32[4])); +} + +static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) +{ + nh->priority = tc >> 4; + nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); +} + +static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) +{ + nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; + nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; + nh->flow_lbl[2] = fl & 0x000000FF; +} + static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) { csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); @@ -195,6 +243,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) return 0; } +static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) +{ + struct ipv6hdr *nh; + int err; + __be32 *saddr; + __be32 *daddr; + + err = make_writable(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr)); + if (unlikely(err)) + return err; + + nh = ipv6_hdr(skb); + saddr = (__be32 *)&nh->saddr; + daddr = (__be32 *)&nh->daddr; + + if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) + set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, + ipv6_key->ipv6_src, true); + + if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { + unsigned int offset = 0; + int flags = IP6_FH_F_SKIP_RH; + bool recalc_csum = true; + + if (ipv6_ext_hdr(nh->nexthdr)) + recalc_csum = ipv6_find_hdr(skb, &offset, + NEXTHDR_ROUTING, NULL, + &flags) != NEXTHDR_ROUTING; + + set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, + ipv6_key->ipv6_dst, recalc_csum); + } + + set_ipv6_tc(nh, ipv6_key->ipv6_tclass); + set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); + nh->hop_limit = ipv6_key->ipv6_hlimit; + + return 0; +} + /* Must follow make_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -339,6 +428,10 @@ static int execute_set_action(struct sk_buff *skb, skb->priority = nla_get_u32(nested_attr); break; + case OVS_KEY_ATTR_SKB_MARK: + skb->mark = nla_get_u32(nested_attr); + break; + case OVS_KEY_ATTR_ETHERNET: err = set_eth_addr(skb, nla_data(nested_attr)); break; @@ -347,6 +440,10 @@ static int execute_set_action(struct sk_buff *skb, err = set_ipv4(skb, nla_data(nested_attr)); break; + case OVS_KEY_ATTR_IPV6: + err = set_ipv6(skb, nla_data(nested_attr)); + break; + case OVS_KEY_ATTR_TCP: err = set_tcp(skb, nla_data(nested_attr)); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4c4b62ccc7d7..f996db343247 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -208,7 +208,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) int error; int key_len; - stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); + stats = this_cpu_ptr(dp->stats_percpu); /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_extract(skb, p->port_no, &key, &key_len); @@ -282,7 +282,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, return 0; err: - stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id()); + stats = this_cpu_ptr(dp->stats_percpu); u64_stats_update_begin(&stats->sync); stats->n_lost++; @@ -479,8 +479,10 @@ static int validate_set(const struct nlattr *a, switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; + const struct ovs_key_ipv6 *ipv6_key; case OVS_KEY_ATTR_PRIORITY: + case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_ETHERNET: break; @@ -500,6 +502,25 @@ static int validate_set(const struct nlattr *a, break; + case OVS_KEY_ATTR_IPV6: + if (flow_key->eth.type != htons(ETH_P_IPV6)) + return -EINVAL; + + if (!flow_key->ip.proto) + return -EINVAL; + + ipv6_key = nla_data(ovs_key); + if (ipv6_key->ipv6_proto != flow_key->ip.proto) + return -EINVAL; + + if (ipv6_key->ipv6_frag != flow_key->ip.frag) + return -EINVAL; + + if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) + return -EINVAL; + + break; + case OVS_KEY_ATTR_TCP: if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; @@ -675,6 +696,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, + &flow->key.phy.skb_mark, &flow->key.phy.in_port, a[OVS_PACKET_ATTR_KEY]); if (err) @@ -694,6 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) OVS_CB(packet)->flow = flow; packet->priority = flow->key.phy.priority; + packet->mark = flow->key.phy.skb_mark; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 733cbf49ed1f..c3294cebc4f2 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -604,6 +604,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, key->phy.priority = skb->priority; key->phy.in_port = in_port; + key->phy.skb_mark = skb->mark; skb_reset_mac_header(skb); @@ -689,7 +690,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, } } - } else if (key->eth.type == htons(ETH_P_ARP) && arphdr_ok(skb)) { + } else if ((key->eth.type == htons(ETH_P_ARP) || + key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) { struct arp_eth_header *arp; arp = (struct arp_eth_header *)skb_network_header(skb); @@ -802,6 +804,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ENCAP] = -1, [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), + [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), [OVS_KEY_ATTR_VLAN] = sizeof(__be16), [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), @@ -987,6 +990,10 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, } else { swkey->phy.in_port = DP_MAX_PORTS; } + if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { + swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); + attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); + } /* Data attributes. */ if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) @@ -1086,7 +1093,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (err) return err; } - } else if (swkey->eth.type == htons(ETH_P_ARP)) { + } else if (swkey->eth.type == htons(ETH_P_ARP) || + swkey->eth.type == htons(ETH_P_RARP)) { const struct ovs_key_arp *arp_key; if (!(attrs & (1 << OVS_KEY_ATTR_ARP))) @@ -1113,6 +1121,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. + * @priority: receives the skb priority + * @mark: receives the skb mark * @in_port: receives the extracted input port. * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. @@ -1122,7 +1132,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, const struct nlattr *attr) { const struct nlattr *nla; @@ -1130,6 +1140,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, *in_port = DP_MAX_PORTS; *priority = 0; + *mark = 0; nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); @@ -1148,6 +1159,10 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, return -EINVAL; *in_port = nla_get_u32(nla); break; + + case OVS_KEY_ATTR_SKB_MARK: + *mark = nla_get_u32(nla); + break; } } } @@ -1169,6 +1184,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; + if (swkey->phy.skb_mark && + nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark)) + goto nla_put_failure; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -1222,7 +1241,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) ipv6_key->ipv6_tclass = swkey->ip.tos; ipv6_key->ipv6_hlimit = swkey->ip.ttl; ipv6_key->ipv6_frag = swkey->ip.frag; - } else if (swkey->eth.type == htons(ETH_P_ARP)) { + } else if (swkey->eth.type == htons(ETH_P_ARP) || + swkey->eth.type == htons(ETH_P_RARP)) { struct ovs_key_arp *arp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 14a324eb017b..a7bb60ff3b5b 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -43,6 +43,7 @@ struct sw_flow_actions { struct sw_flow_key { struct { u32 priority; /* Packet QoS priority. */ + u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; struct { @@ -144,6 +145,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * ------ --- ------ ----- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 + * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) * OVS_KEY_ATTR_8021Q 4 -- 4 8 @@ -153,14 +155,14 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 144 + * total 152 */ -#define FLOW_BUFSIZE 144 +#define FLOW_BUFSIZE 152 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, +int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, const struct nlattr *); #define MAX_ACTIONS_BUFSIZE (16 * 1024) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index a9033481fa5e..a9327e2e48ce 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -114,6 +114,15 @@ error: return ERR_PTR(err); } +static void free_port_rcu(struct rcu_head *rcu) +{ + struct netdev_vport *netdev_vport = container_of(rcu, + struct netdev_vport, rcu); + + dev_put(netdev_vport->dev); + ovs_vport_free(vport_from_priv(netdev_vport)); +} + static void netdev_destroy(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); @@ -122,10 +131,7 @@ static void netdev_destroy(struct vport *vport) netdev_rx_handler_unregister(netdev_vport->dev); dev_set_promiscuity(netdev_vport->dev, -1); - synchronize_rcu(); - - dev_put(netdev_vport->dev); - ovs_vport_free(vport); + call_rcu(&netdev_vport->rcu, free_port_rcu); } const char *ovs_netdev_get_name(const struct vport *vport) diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index f7072a25c604..6478079b3417 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -20,12 +20,15 @@ #define VPORT_NETDEV_H 1 #include <linux/netdevice.h> +#include <linux/rcupdate.h> #include "vport.h" struct vport *ovs_netdev_get_vport(struct net_device *dev); struct netdev_vport { + struct rcu_head rcu; + struct net_device *dev; }; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 03779e8a2622..70af0bedbac4 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -333,8 +333,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) { struct vport_percpu_stats *stats; - stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); - + stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->sync); stats->rx_packets++; stats->rx_bytes += skb->len; @@ -359,7 +358,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) if (likely(sent)) { struct vport_percpu_stats *stats; - stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id()); + stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->sync); stats->tx_packets++; |