summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-11-30 12:01:30 -0500
committerDavid S. Miller <davem@davemloft.net>2012-11-30 12:01:30 -0500
commite7165030db8e932a9a968f7015cd3b2e984f8e7c (patch)
treeab46a0baf25f72b7001bb4673ba47534b81a0d2d
parentbb728820fe7c42fdb838ab2745fb5fe6b18b5ffa (diff)
parent92eb1d477145b2e7780b5002e856f70b8c3d74da (diff)
downloadlinux-stable-e7165030db8e932a9a968f7015cd3b2e984f8e7c.tar.gz
linux-stable-e7165030db8e932a9a968f7015cd3b2e984f8e7c.tar.bz2
linux-stable-e7165030db8e932a9a968f7015cd3b2e984f8e7c.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch
Conflicts: net/ipv6/exthdrs_core.c Jesse Gross says: ==================== This series of improvements for 3.8/net-next contains four components: * Support for modifying IPv6 headers * Support for matching and setting skb->mark for better integration with things like iptables * Ability to recognize the EtherType for RARP packets * Two small performance enhancements The movement of ipv6_find_hdr() into exthdrs_core.c causes two small merge conflicts. I left it as is but can do the merge if you want. The conflicts are: * ipv6_find_hdr() and ipv6_find_tlv() were both moved to the bottom of exthdrs_core.c. Both should stay. * A new use of ipv6_find_hdr() was added to net/netfilter/ipvs/ip_vs_core.c after this patch. The IPVS user has two instances of the old constant name IP6T_FH_F_FRAG which has been renamed to IP6_FH_F_FRAG. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netfilter_ipv6/ip6_tables.h9
-rw-r--r--include/linux/openvswitch.h1
-rw-r--r--include/net/ipv6.h10
-rw-r--r--net/ipv6/exthdrs_core.c124
-rw-r--r--net/ipv6/netfilter/ip6_tables.c103
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c4
-rw-r--r--net/netfilter/xt_HMARK.c8
-rw-r--r--net/openvswitch/actions.c97
-rw-r--r--net/openvswitch/datapath.c27
-rw-r--r--net/openvswitch/flow.c28
-rw-r--r--net/openvswitch/flow.h8
-rw-r--r--net/openvswitch/vport-netdev.c14
-rw-r--r--net/openvswitch/vport-netdev.h3
-rw-r--r--net/openvswitch/vport.c5
14 files changed, 307 insertions, 134 deletions
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 5f84c6229dc6..610208b18c05 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -47,15 +47,6 @@ ip6t_ext_hdr(u8 nexthdr)
(nexthdr == IPPROTO_DSTOPTS);
}
-enum {
- IP6T_FH_F_FRAG = (1 << 0),
- IP6T_FH_F_AUTH = (1 << 1),
-};
-
-/* find specified header and get offset to it */
-extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
- int target, unsigned short *fragoff, int *fragflg);
-
#ifdef CONFIG_COMPAT
#include <net/compat.h>
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index eb1efa54fe84..d42e174bd0c8 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -243,6 +243,7 @@ enum ovs_key_attr {
OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */
OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */
OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
+ OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
__OVS_KEY_ATTR_MAX
};
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 979bf6c13141..acbd8e034310 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -630,6 +630,16 @@ extern int ipv6_skip_exthdr(const struct sk_buff *, int start,
extern bool ipv6_ext_hdr(u8 nexthdr);
+enum {
+ IP6_FH_F_FRAG = (1 << 0),
+ IP6_FH_F_AUTH = (1 << 1),
+ IP6_FH_F_SKIP_RH = (1 << 2),
+};
+
+/* find specified header and get offset to it */
+extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+ int target, unsigned short *fragoff, int *fragflg);
+
extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
extern struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index e7d756e19d1d..c5e83fae4df4 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -155,3 +155,127 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
return -1;
}
EXPORT_SYMBOL_GPL(ipv6_find_tlv);
+
+/*
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * Note that *offset is used as input/output parameter. an if it is not zero,
+ * then it must be a valid offset to an inner IPv6 header. This can be used
+ * to explore inner IPv6 header, eg. ICMPv6 error messages.
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * If the first fragment doesn't contain the final protocol header or
+ * NEXTHDR_NONE it is considered invalid.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
+ *
+ * if flags is not NULL and it's a fragment, then the frag flag
+ * IP6_FH_F_FRAG will be set. If it's an AH header, the
+ * IP6_FH_F_AUTH flag is set and target < 0, then this function will
+ * stop at the AH header. If IP6_FH_F_SKIP_RH flag was passed, then this
+ * function will skip all those routing headers, where segements_left was 0.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+ int target, unsigned short *fragoff, int *flags)
+{
+ unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ unsigned int len;
+ bool found;
+
+ if (fragoff)
+ *fragoff = 0;
+
+ if (*offset) {
+ struct ipv6hdr _ip6, *ip6;
+
+ ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
+ if (!ip6 || (ip6->version != 6)) {
+ printk(KERN_ERR "IPv6 header not found\n");
+ return -EBADMSG;
+ }
+ start = *offset + sizeof(struct ipv6hdr);
+ nexthdr = ip6->nexthdr;
+ }
+ len = skb->len - start;
+
+ do {
+ struct ipv6_opt_hdr _hdr, *hp;
+ unsigned int hdrlen;
+ found = (nexthdr == target);
+
+ if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+ if (target < 0)
+ break;
+ return -ENOENT;
+ }
+
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return -EBADMSG;
+
+ if (nexthdr == NEXTHDR_ROUTING) {
+ struct ipv6_rt_hdr _rh, *rh;
+
+ rh = skb_header_pointer(skb, start, sizeof(_rh),
+ &_rh);
+ if (rh == NULL)
+ return -EBADMSG;
+
+ if (flags && (*flags & IP6_FH_F_SKIP_RH) &&
+ rh->segments_left == 0)
+ found = false;
+ }
+
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ unsigned short _frag_off;
+ __be16 *fp;
+
+ if (flags) /* Indicate that this is a fragment */
+ *flags |= IP6_FH_F_FRAG;
+ fp = skb_header_pointer(skb,
+ start+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_frag_off),
+ &_frag_off);
+ if (fp == NULL)
+ return -EBADMSG;
+
+ _frag_off = ntohs(*fp) & ~0x7;
+ if (_frag_off) {
+ if (target < 0 &&
+ ((!ipv6_ext_hdr(hp->nexthdr)) ||
+ hp->nexthdr == NEXTHDR_NONE)) {
+ if (fragoff)
+ *fragoff = _frag_off;
+ return hp->nexthdr;
+ }
+ return -ENOENT;
+ }
+ hdrlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH) {
+ if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0))
+ break;
+ hdrlen = (hp->hdrlen + 2) << 2;
+ } else
+ hdrlen = ipv6_optlen(hp);
+
+ if (!found) {
+ nexthdr = hp->nexthdr;
+ len -= hdrlen;
+ start += hdrlen;
+ }
+ } while (!found);
+
+ *offset = start;
+ return nexthdr;
+}
+EXPORT_SYMBOL(ipv6_find_hdr);
+
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 74cadd0719a5..125a90d6a795 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2271,112 +2271,9 @@ static void __exit ip6_tables_fini(void)
unregister_pernet_subsys(&ip6_tables_net_ops);
}
-/*
- * find the offset to specified header or the protocol number of last header
- * if target < 0. "last header" is transport protocol header, ESP, or
- * "No next header".
- *
- * Note that *offset is used as input/output parameter. an if it is not zero,
- * then it must be a valid offset to an inner IPv6 header. This can be used
- * to explore inner IPv6 header, eg. ICMPv6 error messages.
- *
- * If target header is found, its offset is set in *offset and return protocol
- * number. Otherwise, return -1.
- *
- * If the first fragment doesn't contain the final protocol header or
- * NEXTHDR_NONE it is considered invalid.
- *
- * Note that non-1st fragment is special case that "the protocol number
- * of last header" is "next header" field in Fragment header. In this case,
- * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
- * isn't NULL.
- *
- * if flags is not NULL and it's a fragment, then the frag flag IP6T_FH_F_FRAG
- * will be set. If it's an AH header, the IP6T_FH_F_AUTH flag is set and
- * target < 0, then this function will stop at the AH header.
- */
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
- int target, unsigned short *fragoff, int *flags)
-{
- unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- unsigned int len;
-
- if (fragoff)
- *fragoff = 0;
-
- if (*offset) {
- struct ipv6hdr _ip6, *ip6;
-
- ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6);
- if (!ip6 || (ip6->version != 6)) {
- printk(KERN_ERR "IPv6 header not found\n");
- return -EBADMSG;
- }
- start = *offset + sizeof(struct ipv6hdr);
- nexthdr = ip6->nexthdr;
- }
- len = skb->len - start;
-
- while (nexthdr != target) {
- struct ipv6_opt_hdr _hdr, *hp;
- unsigned int hdrlen;
-
- if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0)
- break;
- return -ENOENT;
- }
-
- hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return -EBADMSG;
- if (nexthdr == NEXTHDR_FRAGMENT) {
- unsigned short _frag_off;
- __be16 *fp;
-
- if (flags) /* Indicate that this is a fragment */
- *flags |= IP6T_FH_F_FRAG;
- fp = skb_header_pointer(skb,
- start+offsetof(struct frag_hdr,
- frag_off),
- sizeof(_frag_off),
- &_frag_off);
- if (fp == NULL)
- return -EBADMSG;
-
- _frag_off = ntohs(*fp) & ~0x7;
- if (_frag_off) {
- if (target < 0 &&
- ((!ipv6_ext_hdr(hp->nexthdr)) ||
- hp->nexthdr == NEXTHDR_NONE)) {
- if (fragoff)
- *fragoff = _frag_off;
- return hp->nexthdr;
- }
- return -ENOENT;
- }
- hdrlen = 8;
- } else if (nexthdr == NEXTHDR_AUTH) {
- if (flags && (*flags & IP6T_FH_F_AUTH) && (target < 0))
- break;
- hdrlen = (hp->hdrlen + 2) << 2;
- } else
- hdrlen = ipv6_optlen(hp);
-
- nexthdr = hp->nexthdr;
- len -= hdrlen;
- start += hdrlen;
- }
-
- *offset = start;
- return nexthdr;
-}
-
EXPORT_SYMBOL(ip6t_register_table);
EXPORT_SYMBOL(ip6t_unregister_table);
EXPORT_SYMBOL(ip6t_do_table);
-EXPORT_SYMBOL(ipv6_find_hdr);
module_init(ip6_tables_init);
module_exit(ip6_tables_fini);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index fb45640dc1fb..47edf5a40a59 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -942,7 +942,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
/* Fragment header that is before ICMP header tells us that:
* it's not an error message since they can't be fragmented.
*/
- if (ipvsh->flags & IP6T_FH_F_FRAG)
+ if (ipvsh->flags & IP6_FH_F_FRAG)
return NF_DROP;
IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
@@ -1475,7 +1475,7 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
/* Fragment header that is before ICMP header tells us that:
* it's not an error message since they can't be fragmented.
*/
- if (iph->flags & IP6T_FH_F_FRAG)
+ if (iph->flags & IP6_FH_F_FRAG)
return NF_DROP;
IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 1686ca1b53a1..73b73f687c58 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -167,7 +167,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
const struct xt_hmark_info *info)
{
struct ipv6hdr *ip6, _ip6;
- int flag = IP6T_FH_F_AUTH;
+ int flag = IP6_FH_F_AUTH;
unsigned int nhoff = 0;
u16 fragoff = 0;
int nexthdr;
@@ -177,7 +177,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
if (nexthdr < 0)
return 0;
/* No need to check for icmp errors on fragments */
- if ((flag & IP6T_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
+ if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
goto noicmp;
/* Use inner header in case of ICMP errors */
if (get_inner6_hdr(skb, &nhoff)) {
@@ -185,7 +185,7 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
if (ip6 == NULL)
return -1;
/* If AH present, use SPI like in ESP. */
- flag = IP6T_FH_F_AUTH;
+ flag = IP6_FH_F_AUTH;
nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
if (nexthdr < 0)
return -1;
@@ -201,7 +201,7 @@ noicmp:
if (t->proto == IPPROTO_ICMPV6)
return 0;
- if (flag & IP6T_FH_F_FRAG)
+ if (flag & IP6_FH_F_FRAG)
return 0;
hmark_set_tuple_ports(skb, nhoff, t, info);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 08114478cb85..ac2defeeba83 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -28,6 +28,7 @@
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
+#include <net/ipv6.h>
#include <net/checksum.h>
#include <net/dsfield.h>
@@ -162,6 +163,53 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
*addr = new_addr;
}
+static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
+ __be32 addr[4], const __be32 new_addr[4])
+{
+ int transport_len = skb->len - skb_transport_offset(skb);
+
+ if (l4_proto == IPPROTO_TCP) {
+ if (likely(transport_len >= sizeof(struct tcphdr)))
+ inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
+ addr, new_addr, 1);
+ } else if (l4_proto == IPPROTO_UDP) {
+ if (likely(transport_len >= sizeof(struct udphdr))) {
+ struct udphdr *uh = udp_hdr(skb);
+
+ if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&uh->check, skb,
+ addr, new_addr, 1);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+ }
+ }
+}
+
+static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
+ __be32 addr[4], const __be32 new_addr[4],
+ bool recalculate_csum)
+{
+ if (recalculate_csum)
+ update_ipv6_checksum(skb, l4_proto, addr, new_addr);
+
+ skb->rxhash = 0;
+ memcpy(addr, new_addr, sizeof(__be32[4]));
+}
+
+static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+{
+ nh->priority = tc >> 4;
+ nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+}
+
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+{
+ nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
+ nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
+ nh->flow_lbl[2] = fl & 0x000000FF;
+}
+
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
{
csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
@@ -195,6 +243,47 @@ static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
return 0;
}
+static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key)
+{
+ struct ipv6hdr *nh;
+ int err;
+ __be32 *saddr;
+ __be32 *daddr;
+
+ err = make_writable(skb, skb_network_offset(skb) +
+ sizeof(struct ipv6hdr));
+ if (unlikely(err))
+ return err;
+
+ nh = ipv6_hdr(skb);
+ saddr = (__be32 *)&nh->saddr;
+ daddr = (__be32 *)&nh->daddr;
+
+ if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src)))
+ set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
+ ipv6_key->ipv6_src, true);
+
+ if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+ unsigned int offset = 0;
+ int flags = IP6_FH_F_SKIP_RH;
+ bool recalc_csum = true;
+
+ if (ipv6_ext_hdr(nh->nexthdr))
+ recalc_csum = ipv6_find_hdr(skb, &offset,
+ NEXTHDR_ROUTING, NULL,
+ &flags) != NEXTHDR_ROUTING;
+
+ set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
+ ipv6_key->ipv6_dst, recalc_csum);
+ }
+
+ set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
+ set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
+ nh->hop_limit = ipv6_key->ipv6_hlimit;
+
+ return 0;
+}
+
/* Must follow make_writable() since that can move the skb data. */
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
@@ -339,6 +428,10 @@ static int execute_set_action(struct sk_buff *skb,
skb->priority = nla_get_u32(nested_attr);
break;
+ case OVS_KEY_ATTR_SKB_MARK:
+ skb->mark = nla_get_u32(nested_attr);
+ break;
+
case OVS_KEY_ATTR_ETHERNET:
err = set_eth_addr(skb, nla_data(nested_attr));
break;
@@ -347,6 +440,10 @@ static int execute_set_action(struct sk_buff *skb,
err = set_ipv4(skb, nla_data(nested_attr));
break;
+ case OVS_KEY_ATTR_IPV6:
+ err = set_ipv6(skb, nla_data(nested_attr));
+ break;
+
case OVS_KEY_ATTR_TCP:
err = set_tcp(skb, nla_data(nested_attr));
break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4c4b62ccc7d7..f996db343247 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -208,7 +208,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
int error;
int key_len;
- stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+ stats = this_cpu_ptr(dp->stats_percpu);
/* Extract flow from 'skb' into 'key'. */
error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
@@ -282,7 +282,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
return 0;
err:
- stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+ stats = this_cpu_ptr(dp->stats_percpu);
u64_stats_update_begin(&stats->sync);
stats->n_lost++;
@@ -479,8 +479,10 @@ static int validate_set(const struct nlattr *a,
switch (key_type) {
const struct ovs_key_ipv4 *ipv4_key;
+ const struct ovs_key_ipv6 *ipv6_key;
case OVS_KEY_ATTR_PRIORITY:
+ case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_ETHERNET:
break;
@@ -500,6 +502,25 @@ static int validate_set(const struct nlattr *a,
break;
+ case OVS_KEY_ATTR_IPV6:
+ if (flow_key->eth.type != htons(ETH_P_IPV6))
+ return -EINVAL;
+
+ if (!flow_key->ip.proto)
+ return -EINVAL;
+
+ ipv6_key = nla_data(ovs_key);
+ if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+ return -EINVAL;
+
+ if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+ return -EINVAL;
+
+ if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
+ return -EINVAL;
+
+ break;
+
case OVS_KEY_ATTR_TCP:
if (flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL;
@@ -675,6 +696,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_flow_free;
err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
+ &flow->key.phy.skb_mark,
&flow->key.phy.in_port,
a[OVS_PACKET_ATTR_KEY]);
if (err)
@@ -694,6 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
OVS_CB(packet)->flow = flow;
packet->priority = flow->key.phy.priority;
+ packet->mark = flow->key.phy.skb_mark;
rcu_read_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 733cbf49ed1f..c3294cebc4f2 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -604,6 +604,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
key->phy.priority = skb->priority;
key->phy.in_port = in_port;
+ key->phy.skb_mark = skb->mark;
skb_reset_mac_header(skb);
@@ -689,7 +690,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
}
}
- } else if (key->eth.type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
+ } else if ((key->eth.type == htons(ETH_P_ARP) ||
+ key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) {
struct arp_eth_header *arp;
arp = (struct arp_eth_header *)skb_network_header(skb);
@@ -802,6 +804,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ENCAP] = -1,
[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
+ [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
@@ -987,6 +990,10 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
} else {
swkey->phy.in_port = DP_MAX_PORTS;
}
+ if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
+ swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+ attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+ }
/* Data attributes. */
if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
@@ -1086,7 +1093,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
if (err)
return err;
}
- } else if (swkey->eth.type == htons(ETH_P_ARP)) {
+ } else if (swkey->eth.type == htons(ETH_P_ARP) ||
+ swkey->eth.type == htons(ETH_P_RARP)) {
const struct ovs_key_arp *arp_key;
if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
@@ -1113,6 +1121,8 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
/**
* ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
+ * @priority: receives the skb priority
+ * @mark: receives the skb mark
* @in_port: receives the extracted input port.
* @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence.
@@ -1122,7 +1132,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
* get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself.
*/
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port,
const struct nlattr *attr)
{
const struct nlattr *nla;
@@ -1130,6 +1140,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
*in_port = DP_MAX_PORTS;
*priority = 0;
+ *mark = 0;
nla_for_each_nested(nla, attr, rem) {
int type = nla_type(nla);
@@ -1148,6 +1159,10 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
return -EINVAL;
*in_port = nla_get_u32(nla);
break;
+
+ case OVS_KEY_ATTR_SKB_MARK:
+ *mark = nla_get_u32(nla);
+ break;
}
}
}
@@ -1169,6 +1184,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
goto nla_put_failure;
+ if (swkey->phy.skb_mark &&
+ nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
+ goto nla_put_failure;
+
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
@@ -1222,7 +1241,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
ipv6_key->ipv6_tclass = swkey->ip.tos;
ipv6_key->ipv6_hlimit = swkey->ip.ttl;
ipv6_key->ipv6_frag = swkey->ip.frag;
- } else if (swkey->eth.type == htons(ETH_P_ARP)) {
+ } else if (swkey->eth.type == htons(ETH_P_ARP) ||
+ swkey->eth.type == htons(ETH_P_RARP)) {
struct ovs_key_arp *arp_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 14a324eb017b..a7bb60ff3b5b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -43,6 +43,7 @@ struct sw_flow_actions {
struct sw_flow_key {
struct {
u32 priority; /* Packet QoS priority. */
+ u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} phy;
struct {
@@ -144,6 +145,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
* ------ --- ------ -----
* OVS_KEY_ATTR_PRIORITY 4 -- 4 8
* OVS_KEY_ATTR_IN_PORT 4 -- 4 8
+ * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
* OVS_KEY_ATTR_ETHERNET 12 -- 4 16
* OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
* OVS_KEY_ATTR_8021Q 4 -- 4 8
@@ -153,14 +155,14 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* -------------------------------------------------
- * total 144
+ * total 152
*/
-#define FLOW_BUFSIZE 144
+#define FLOW_BUFSIZE 152
int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port,
const struct nlattr *);
#define MAX_ACTIONS_BUFSIZE (16 * 1024)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index a9033481fa5e..a9327e2e48ce 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -114,6 +114,15 @@ error:
return ERR_PTR(err);
}
+static void free_port_rcu(struct rcu_head *rcu)
+{
+ struct netdev_vport *netdev_vport = container_of(rcu,
+ struct netdev_vport, rcu);
+
+ dev_put(netdev_vport->dev);
+ ovs_vport_free(vport_from_priv(netdev_vport));
+}
+
static void netdev_destroy(struct vport *vport)
{
struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
@@ -122,10 +131,7 @@ static void netdev_destroy(struct vport *vport)
netdev_rx_handler_unregister(netdev_vport->dev);
dev_set_promiscuity(netdev_vport->dev, -1);
- synchronize_rcu();
-
- dev_put(netdev_vport->dev);
- ovs_vport_free(vport);
+ call_rcu(&netdev_vport->rcu, free_port_rcu);
}
const char *ovs_netdev_get_name(const struct vport *vport)
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index f7072a25c604..6478079b3417 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -20,12 +20,15 @@
#define VPORT_NETDEV_H 1
#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
#include "vport.h"
struct vport *ovs_netdev_get_vport(struct net_device *dev);
struct netdev_vport {
+ struct rcu_head rcu;
+
struct net_device *dev;
};
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 03779e8a2622..70af0bedbac4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -333,8 +333,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
{
struct vport_percpu_stats *stats;
- stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
-
+ stats = this_cpu_ptr(vport->percpu_stats);
u64_stats_update_begin(&stats->sync);
stats->rx_packets++;
stats->rx_bytes += skb->len;
@@ -359,7 +358,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
if (likely(sent)) {
struct vport_percpu_stats *stats;
- stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+ stats = this_cpu_ptr(vport->percpu_stats);
u64_stats_update_begin(&stats->sync);
stats->tx_packets++;