summaryrefslogtreecommitdiffstats
path: root/net/netfilter/ipvs
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/ipvs')
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c131
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c88
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c134
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c215
9 files changed, 472 insertions, 116 deletions
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index bfd4365a8d73..4515056ef1c2 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -358,7 +358,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct tcphdr *th;
__u32 seq;
- if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+ if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
return 0;
th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
@@ -435,7 +435,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct tcphdr *th;
__u32 seq;
- if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
+ if (skb_ensure_writable(skb, tcp_offset + sizeof(*th)))
return 0;
th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7138556b206b..46f06f92ab8f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -34,6 +34,8 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
+#include <net/gue.h>
+#include <net/gre.h>
#include <net/route.h>
#include <net/ip6_checksum.h>
#include <net/netns/generic.h> /* net_generic() */
@@ -892,7 +894,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16);
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto out;
#ifdef CONFIG_IP_VS_IPV6
@@ -1282,7 +1284,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
- if (!skb_make_writable(skb, iph->len))
+ if (skb_ensure_writable(skb, iph->len))
goto drop;
/* mangle the packet */
@@ -1574,6 +1576,73 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
return 1;
}
+/* Check the UDP tunnel and return its header length */
+static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ unsigned int offset, __u16 af,
+ const union nf_inet_addr *daddr, __u8 *proto)
+{
+ struct udphdr _udph, *udph;
+ struct ip_vs_dest *dest;
+
+ udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+ if (!udph)
+ goto unk;
+ offset += sizeof(struct udphdr);
+ dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest);
+ if (!dest)
+ goto unk;
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ struct guehdr _gueh, *gueh;
+
+ gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh);
+ if (!gueh)
+ goto unk;
+ if (gueh->control != 0 || gueh->version != 0)
+ goto unk;
+ /* Later we can support also IPPROTO_IPV6 */
+ if (gueh->proto_ctype != IPPROTO_IPIP)
+ goto unk;
+ *proto = gueh->proto_ctype;
+ return sizeof(struct udphdr) + sizeof(struct guehdr) +
+ (gueh->hlen << 2);
+ }
+
+unk:
+ return 0;
+}
+
+/* Check the GRE tunnel and return its header length */
+static int ipvs_gre_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ unsigned int offset, __u16 af,
+ const union nf_inet_addr *daddr, __u8 *proto)
+{
+ struct gre_base_hdr _greh, *greh;
+ struct ip_vs_dest *dest;
+
+ greh = skb_header_pointer(skb, offset, sizeof(_greh), &_greh);
+ if (!greh)
+ goto unk;
+ dest = ip_vs_find_tunnel(ipvs, af, daddr, 0);
+ if (!dest)
+ goto unk;
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ __be16 type;
+
+ /* Only support version 0 and C (csum) */
+ if ((greh->flags & ~GRE_CSUM) != 0)
+ goto unk;
+ type = greh->protocol;
+ /* Later we can support also IPPROTO_IPV6 */
+ if (type != htons(ETH_P_IP))
+ goto unk;
+ *proto = IPPROTO_IPIP;
+ return gre_calc_hlen(gre_flags_to_tnl_flags(greh->flags));
+ }
+
+unk:
+ return 0;
+}
+
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections,
@@ -1593,6 +1662,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict;
bool ipip, new_cp = false;
+ union nf_inet_addr *raddr;
*related = 1;
@@ -1631,20 +1701,56 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
+ raddr = (union nf_inet_addr *)&cih->daddr;
/* Special case for errors for IPIP packets */
ipip = false;
if (cih->protocol == IPPROTO_IPIP) {
+ struct ip_vs_dest *dest;
+
if (unlikely(cih->frag_off & htons(IP_OFFSET)))
return NF_ACCEPT;
/* Error for our IPIP must arrive at LOCAL_IN */
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
return NF_ACCEPT;
+ dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0);
+ /* Only for known tunnel */
+ if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP)
+ return NF_ACCEPT;
offset += cih->ihl * 4;
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
ipip = true;
+ } else if ((cih->protocol == IPPROTO_UDP || /* Can be UDP encap */
+ cih->protocol == IPPROTO_GRE) && /* Can be GRE encap */
+ /* Error for our tunnel must arrive at LOCAL_IN */
+ (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) {
+ __u8 iproto;
+ int ulen;
+
+ /* Non-first fragment has no UDP header */
+ if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+ return NF_ACCEPT;
+ offset2 = offset + cih->ihl * 4;
+ if (cih->protocol == IPPROTO_UDP)
+ ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET,
+ raddr, &iproto);
+ else
+ ulen = ipvs_gre_decap(ipvs, skb, offset2, AF_INET,
+ raddr, &iproto);
+ if (ulen > 0) {
+ /* Skip IP and UDP/GRE tunnel headers */
+ offset = offset2 + ulen;
+ /* Now we should be at the original IP header */
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph),
+ &_ciph);
+ if (cih && cih->version == 4 && cih->ihl >= 5 &&
+ iproto == IPPROTO_IPIP)
+ ipip = true;
+ else
+ return NF_ACCEPT;
+ }
}
pd = ip_vs_proto_data_get(ipvs, cih->protocol);
@@ -2245,7 +2351,6 @@ static const struct nf_hook_ops ip_vs_ops[] = {
static int __net_init __ip_vs_init(struct net *net)
{
struct netns_ipvs *ipvs;
- int ret;
ipvs = net_generic(net, ip_vs_net_id);
if (ipvs == NULL)
@@ -2277,17 +2382,11 @@ static int __net_init __ip_vs_init(struct net *net)
if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
- ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- if (ret < 0)
- goto hook_fail;
-
return 0;
/*
* Error handling
*/
-hook_fail:
- ip_vs_sync_net_cleanup(ipvs);
sync_fail:
ip_vs_conn_net_cleanup(ipvs);
conn_fail:
@@ -2317,6 +2416,19 @@ static void __net_exit __ip_vs_cleanup(struct net *net)
net->ipvs = NULL;
}
+static int __net_init __ip_vs_dev_init(struct net *net)
+{
+ int ret;
+
+ ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+ if (ret < 0)
+ goto hook_fail;
+ return 0;
+
+hook_fail:
+ return ret;
+}
+
static void __net_exit __ip_vs_dev_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -2336,6 +2448,7 @@ static struct pernet_operations ipvs_core_ops = {
};
static struct pernet_operations ipvs_core_dev_ops = {
+ .init = __ip_vs_dev_init,
.exit = __ip_vs_dev_cleanup,
};
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 776c87ed4813..07e0967bf129 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -510,15 +510,37 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
unsigned int hash;
+ __be16 port;
if (dest->in_rs_table)
return;
+ switch (IP_VS_DFWD_METHOD(dest)) {
+ case IP_VS_CONN_F_MASQ:
+ port = dest->port;
+ break;
+ case IP_VS_CONN_F_TUNNEL:
+ switch (dest->tun_type) {
+ case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
+ port = dest->tun_port;
+ break;
+ case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
+ case IP_VS_CONN_F_TUNNEL_TYPE_GRE:
+ port = 0;
+ break;
+ default:
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+
/*
* Hash by proto,addr,port,
* which are the parameters of the real service.
*/
- hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
dest->in_rs_table = 1;
@@ -550,7 +572,8 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
- (dest->protocol == protocol || dest->vfwmark)) {
+ (dest->protocol == protocol || dest->vfwmark) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
/* HIT */
return true;
}
@@ -580,7 +603,37 @@ struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
if (dest->port == dport &&
dest->af == af &&
ip_vs_addr_equal(af, &dest->addr, daddr) &&
- (dest->protocol == protocol || dest->vfwmark)) {
+ (dest->protocol == protocol || dest->vfwmark) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
+ /* HIT */
+ return dest;
+ }
+ }
+
+ return NULL;
+}
+
+/* Find real service record by <af,addr,tun_port>.
+ * In case of multiple records with the same <af,addr,tun_port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
+ const union nf_inet_addr *daddr,
+ __be16 tun_port)
+{
+ struct ip_vs_dest *dest;
+ unsigned int hash;
+
+ /* Check for "full" addressed entries */
+ hash = ip_vs_rs_hashkey(af, daddr, tun_port);
+
+ hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+ if (dest->tun_port == tun_port &&
+ dest->af == af &&
+ ip_vs_addr_equal(af, &dest->addr, daddr) &&
+ IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
/* HIT */
return dest;
}
@@ -826,24 +879,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
conn_flags |= IP_VS_CONN_F_INACTIVE;
+ /* Need to rehash? */
+ if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
+ IP_VS_DFWD_METHOD(dest) ||
+ udest->tun_type != dest->tun_type ||
+ udest->tun_port != dest->tun_port)
+ ip_vs_rs_unhash(dest);
+
/* set the tunnel info */
dest->tun_type = udest->tun_type;
dest->tun_port = udest->tun_port;
+ dest->tun_flags = udest->tun_flags;
/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
} else {
- /*
- * Put the real service in rs_table if not present.
- * For now only for NAT!
- */
- ip_vs_rs_hash(ipvs, dest);
/* FTP-NAT requires conntrack for mangling */
if (svc->port == FTPPORT)
ip_vs_register_conntrack(svc);
}
atomic_set(&dest->conn_flags, conn_flags);
+ /* Put the real service in rs_table if not present. */
+ ip_vs_rs_hash(ipvs, dest);
/* bind the service */
old_svc = rcu_dereference_protected(dest->svc, 1);
@@ -2396,9 +2454,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cfg.syncid = dm->syncid;
ret = start_sync_thread(ipvs, &cfg, dm->state);
} else {
- mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(ipvs, dm->state);
- mutex_unlock(&ipvs->sync_mutex);
}
goto out_dec;
}
@@ -2906,6 +2962,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
[IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 },
[IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 },
+ [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3212,6 +3269,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
dest->tun_type) ||
nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
dest->tun_port) ||
+ nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
+ dest->tun_flags) ||
nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
@@ -3332,7 +3391,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
- *nla_l_thresh, *nla_tun_type, *nla_tun_port;
+ *nla_l_thresh, *nla_tun_type, *nla_tun_port,
+ *nla_tun_flags;
nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
@@ -3340,6 +3400,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE];
nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT];
+ nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS];
if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
return -EINVAL;
@@ -3355,6 +3416,9 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
if (nla_tun_port)
udest->tun_port = nla_get_be16(nla_tun_port);
+
+ if (nla_tun_flags)
+ udest->tun_flags = nla_get_u16(nla_tun_flags);
}
return 0;
@@ -3515,10 +3579,8 @@ static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
- mutex_lock(&ipvs->sync_mutex);
ret = stop_sync_thread(ipvs,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
- mutex_unlock(&ipvs->sync_mutex);
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index c244b2545e24..cf925906f59b 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -267,7 +267,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
/* Linear packets are much easier to deal with. */
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return 0;
if (cp->app_data == (void *) IP_VS_FTP_PASV) {
@@ -433,7 +433,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
/* Linear packets are much easier to deal with. */
- if (!skb_make_writable(skb, skb->len))
+ if (skb_ensure_writable(skb, skb->len))
return 0;
data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index b58ddb7dffd1..a0921adc31a9 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -101,7 +101,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
#endif
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+ if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -148,7 +148,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
#endif
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+ if (skb_ensure_writable(skb, sctphoff + sizeof(*sctph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 915ac8206076..000d961b97e4 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -159,7 +159,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+ if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -237,7 +237,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - tcphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
+ if (skb_ensure_writable(skb, tcphoff + sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 379140075e95..153d89647c87 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -148,7 +148,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+ if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
@@ -231,7 +231,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
oldlen = skb->len - udphoff;
/* csum_check requires unshared skb */
- if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
+ if (skb_ensure_writable(skb, udphoff + sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 2526be6b3d90..a4a78c4b06de 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -195,6 +195,7 @@ union ip_vs_sync_conn {
#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
struct ip_vs_sync_thread_data {
+ struct task_struct *task;
struct netns_ipvs *ipvs;
struct socket *sock;
char *buf;
@@ -374,8 +375,11 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs,
max(IPVS_SYNC_SEND_DELAY, 1));
ms->sync_queue_len++;
list_add_tail(&sb->list, &ms->sync_queue);
- if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
- wake_up_process(ms->master_thread);
+ if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) {
+ int id = (int)(ms - ipvs->ms);
+
+ wake_up_process(ipvs->master_tinfo[id].task);
+ }
} else
ip_vs_sync_buff_release(sb);
spin_unlock(&ipvs->sync_lock);
@@ -1636,8 +1640,10 @@ static void master_wakeup_work_handler(struct work_struct *work)
spin_lock_bh(&ipvs->sync_lock);
if (ms->sync_queue_len &&
ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
+ int id = (int)(ms - ipvs->ms);
+
ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
- wake_up_process(ms->master_thread);
+ wake_up_process(ipvs->master_tinfo[id].task);
}
spin_unlock_bh(&ipvs->sync_lock);
}
@@ -1703,10 +1709,6 @@ done:
if (sb)
ip_vs_sync_buff_release(sb);
- /* release the sending multicast socket */
- sock_release(tinfo->sock);
- kfree(tinfo);
-
return 0;
}
@@ -1740,11 +1742,6 @@ static int sync_thread_backup(void *data)
}
}
- /* release the sending multicast socket */
- sock_release(tinfo->sock);
- kfree(tinfo->buf);
- kfree(tinfo);
-
return 0;
}
@@ -1752,8 +1749,8 @@ static int sync_thread_backup(void *data)
int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
int state)
{
- struct ip_vs_sync_thread_data *tinfo = NULL;
- struct task_struct **array = NULL, *task;
+ struct ip_vs_sync_thread_data *ti = NULL, *tinfo;
+ struct task_struct *task;
struct net_device *dev;
char *name;
int (*threadfn)(void *data);
@@ -1822,7 +1819,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
threadfn = sync_thread_master;
} else if (state == IP_VS_STATE_BACKUP) {
result = -EEXIST;
- if (ipvs->backup_threads)
+ if (ipvs->backup_tinfo)
goto out_early;
ipvs->bcfg = *c;
@@ -1849,28 +1846,22 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
master_wakeup_work_handler);
ms->ipvs = ipvs;
}
- } else {
- array = kcalloc(count, sizeof(struct task_struct *),
- GFP_KERNEL);
- result = -ENOMEM;
- if (!array)
- goto out;
}
+ result = -ENOMEM;
+ ti = kcalloc(count, sizeof(struct ip_vs_sync_thread_data),
+ GFP_KERNEL);
+ if (!ti)
+ goto out;
for (id = 0; id < count; id++) {
- result = -ENOMEM;
- tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
- if (!tinfo)
- goto out;
+ tinfo = &ti[id];
tinfo->ipvs = ipvs;
- tinfo->sock = NULL;
if (state == IP_VS_STATE_BACKUP) {
+ result = -ENOMEM;
tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
GFP_KERNEL);
if (!tinfo->buf)
goto out;
- } else {
- tinfo->buf = NULL;
}
tinfo->id = id;
if (state == IP_VS_STATE_MASTER)
@@ -1885,17 +1876,15 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
result = PTR_ERR(task);
goto out;
}
- tinfo = NULL;
- if (state == IP_VS_STATE_MASTER)
- ipvs->ms[id].master_thread = task;
- else
- array[id] = task;
+ tinfo->task = task;
}
/* mark as active */
- if (state == IP_VS_STATE_BACKUP)
- ipvs->backup_threads = array;
+ if (state == IP_VS_STATE_MASTER)
+ ipvs->master_tinfo = ti;
+ else
+ ipvs->backup_tinfo = ti;
spin_lock_bh(&ipvs->sync_buff_lock);
ipvs->sync_state |= state;
spin_unlock_bh(&ipvs->sync_buff_lock);
@@ -1910,29 +1899,31 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
out:
/* We do not need RTNL lock anymore, release it here so that
- * sock_release below and in the kthreads can use rtnl_lock
- * to leave the mcast group.
+ * sock_release below can use rtnl_lock to leave the mcast group.
*/
rtnl_unlock();
- count = id;
- while (count-- > 0) {
- if (state == IP_VS_STATE_MASTER)
- kthread_stop(ipvs->ms[count].master_thread);
- else
- kthread_stop(array[count]);
+ id = min(id, count - 1);
+ if (ti) {
+ for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+ if (tinfo->task)
+ kthread_stop(tinfo->task);
+ }
}
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
kfree(ipvs->ms);
ipvs->ms = NULL;
}
mutex_unlock(&ipvs->sync_mutex);
- if (tinfo) {
- if (tinfo->sock)
- sock_release(tinfo->sock);
- kfree(tinfo->buf);
- kfree(tinfo);
+
+ /* No more mutexes, release socks */
+ if (ti) {
+ for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+ if (tinfo->sock)
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ }
+ kfree(ti);
}
- kfree(array);
return result;
out_early:
@@ -1944,15 +1935,18 @@ out_early:
int stop_sync_thread(struct netns_ipvs *ipvs, int state)
{
- struct task_struct **array;
+ struct ip_vs_sync_thread_data *ti, *tinfo;
int id;
int retc = -EINVAL;
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
+ mutex_lock(&ipvs->sync_mutex);
if (state == IP_VS_STATE_MASTER) {
+ retc = -ESRCH;
if (!ipvs->ms)
- return -ESRCH;
+ goto err;
+ ti = ipvs->master_tinfo;
/*
* The lock synchronizes with sb_queue_tail(), so that we don't
@@ -1971,38 +1965,56 @@ int stop_sync_thread(struct netns_ipvs *ipvs, int state)
struct ipvs_master_sync_state *ms = &ipvs->ms[id];
int ret;
+ tinfo = &ti[id];
pr_info("stopping master sync thread %d ...\n",
- task_pid_nr(ms->master_thread));
+ task_pid_nr(tinfo->task));
cancel_delayed_work_sync(&ms->master_wakeup_work);
- ret = kthread_stop(ms->master_thread);
+ ret = kthread_stop(tinfo->task);
if (retc >= 0)
retc = ret;
}
kfree(ipvs->ms);
ipvs->ms = NULL;
+ ipvs->master_tinfo = NULL;
} else if (state == IP_VS_STATE_BACKUP) {
- if (!ipvs->backup_threads)
- return -ESRCH;
+ retc = -ESRCH;
+ if (!ipvs->backup_tinfo)
+ goto err;
+ ti = ipvs->backup_tinfo;
ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
- array = ipvs->backup_threads;
retc = 0;
for (id = ipvs->threads_mask; id >= 0; id--) {
int ret;
+ tinfo = &ti[id];
pr_info("stopping backup sync thread %d ...\n",
- task_pid_nr(array[id]));
- ret = kthread_stop(array[id]);
+ task_pid_nr(tinfo->task));
+ ret = kthread_stop(tinfo->task);
if (retc >= 0)
retc = ret;
}
- kfree(array);
- ipvs->backup_threads = NULL;
+ ipvs->backup_tinfo = NULL;
+ } else {
+ goto err;
}
+ id = ipvs->threads_mask;
+ mutex_unlock(&ipvs->sync_mutex);
+
+ /* No more mutexes, release socks */
+ for (tinfo = ti + id; tinfo >= ti; tinfo--) {
+ if (tinfo->sock)
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ }
+ kfree(ti);
/* decrease the module use count */
ip_vs_use_count_dec();
+ return retc;
+err:
+ mutex_unlock(&ipvs->sync_mutex);
return retc;
}
@@ -2021,7 +2033,6 @@ void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
{
int retc;
- mutex_lock(&ipvs->sync_mutex);
retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Master Daemon\n");
@@ -2029,5 +2040,4 @@ void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Backup Daemon\n");
- mutex_unlock(&ipvs->sync_mutex);
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index e101eda05d55..9c464d24beec 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -29,6 +29,7 @@
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/gue.h>
+#include <net/gre.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
@@ -36,6 +37,7 @@
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip_tunnels.h>
+#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
@@ -275,7 +277,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
}
/* don't propagate ttl change to cloned packets */
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
return false;
ipv6_hdr(skb)->hop_limit--;
@@ -290,7 +292,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
}
/* don't propagate ttl change to cloned packets */
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
return false;
/* Decrease ttl */
@@ -381,8 +383,19 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (!dest)
goto err_put;
- if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+ if ((dest->tun_flags &
+ IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL)
+ mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ __be16 tflags = 0;
+
+ if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+ mtu -= gre_calc_hlen(tflags);
+ }
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
@@ -536,8 +549,19 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (!dest)
goto err_put;
- if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
+ if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
+ if ((dest->tun_flags &
+ IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL)
+ mtu -= GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ } else if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ __be16 tflags = 0;
+
+ if (dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+ mtu -= gre_calc_hlen(tflags);
+ }
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
@@ -792,7 +816,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ if (skb_ensure_writable(skb, sizeof(struct iphdr)))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -881,7 +905,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+ if (skb_ensure_writable(skb, sizeof(struct ipv6hdr)))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -1002,17 +1026,56 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb,
__be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
struct udphdr *udph; /* Our new UDP header */
struct guehdr *gueh; /* Our new GUE header */
+ size_t hdrlen, optlen = 0;
+ void *data;
+ bool need_priv = false;
+
+ if ((cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ need_priv = true;
+ }
- skb_push(skb, sizeof(struct guehdr));
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ skb_push(skb, hdrlen);
gueh = (struct guehdr *)skb->data;
gueh->control = 0;
gueh->version = 0;
- gueh->hlen = 0;
+ gueh->hlen = optlen >> 2;
gueh->flags = 0;
gueh->proto_ctype = *next_protocol;
+ data = &gueh[1];
+
+ if (need_priv) {
+ __be32 *flags = data;
+ u16 csum_start = skb_checksum_start_offset(skb);
+ __be16 *pd;
+
+ gueh->flags |= GUE_FLAG_PRIV;
+ *flags = 0;
+ data += GUE_LEN_PRIV;
+
+ if (csum_start < hdrlen)
+ return -EINVAL;
+
+ csum_start -= hdrlen;
+ pd = data;
+ pd[0] = htons(csum_start);
+ pd[1] = htons(csum_start + skb->csum_offset);
+
+ if (!skb_is_gso(skb)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->encapsulation = 0;
+ }
+
+ *flags |= GUE_PFLAG_REMCSUM;
+ data += GUE_PLEN_REMCSUM;
+ }
+
skb_push(skb, sizeof(struct udphdr));
skb_reset_transport_header(skb);
@@ -1029,6 +1092,24 @@ ipvs_gue_encap(struct net *net, struct sk_buff *skb,
return 0;
}
+static void
+ipvs_gre_encap(struct net *net, struct sk_buff *skb,
+ struct ip_vs_conn *cp, __u8 *next_protocol)
+{
+ __be16 proto = *next_protocol == IPPROTO_IPIP ?
+ htons(ETH_P_IP) : htons(ETH_P_IPV6);
+ __be16 tflags = 0;
+ size_t hdrlen;
+
+ if (cp->dest->tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+
+ hdrlen = gre_calc_hlen(tflags);
+ gre_build_header(skb, hdrlen, tflags, proto, 0, 0);
+
+ *next_protocol = IPPROTO_GRE;
+}
+
/*
* IP Tunneling transmitter
*
@@ -1066,6 +1147,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
int tun_type, gso_type;
+ int tun_flags;
EnterFunction(10);
@@ -1088,9 +1170,28 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
tun_type = cp->dest->tun_type;
+ tun_flags = cp->dest->tun_flags;
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ size_t gue_hdrlen, gue_optlen = 0;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ }
+ gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+ max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ size_t gre_hdrlen;
+ __be16 tflags = 0;
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
+ if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+ gre_hdrlen = gre_calc_hlen(tflags);
+
+ max_headroom += gre_hdrlen;
+ }
/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
@@ -1101,8 +1202,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
gso_type = __tun_gso_type_mask(AF_INET, cp->af);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- gso_type |= SKB_GSO_UDP_TUNNEL;
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+ }
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ gso_type |= SKB_GSO_GRE_CSUM;
+ else
+ gso_type |= SKB_GSO_GRE;
+ }
if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;
@@ -1111,8 +1226,19 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_set_inner_ipproto(skb, next_protocol);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- ipvs_gue_encap(net, skb, cp, &next_protocol);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ bool check = false;
+
+ if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+ goto tx_error;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ check = true;
+
+ udp_set_csum(!check, skb, saddr, cp->daddr.ip, skb->len);
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE)
+ ipvs_gre_encap(net, skb, cp, &next_protocol);
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
@@ -1170,6 +1296,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
int tun_type, gso_type;
+ int tun_flags;
EnterFunction(10);
@@ -1193,9 +1320,28 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
tun_type = cp->dest->tun_type;
+ tun_flags = cp->dest->tun_flags;
+
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ size_t gue_hdrlen, gue_optlen = 0;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gue_optlen += GUE_PLEN_REMCSUM + GUE_LEN_PRIV;
+ }
+ gue_hdrlen = sizeof(struct guehdr) + gue_optlen;
+
+ max_headroom += sizeof(struct udphdr) + gue_hdrlen;
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ size_t gre_hdrlen;
+ __be16 tflags = 0;
+
+ if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ tflags |= TUNNEL_CSUM;
+ gre_hdrlen = gre_calc_hlen(tflags);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);
+ max_headroom += gre_hdrlen;
+ }
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
&next_protocol, &payload_len,
@@ -1204,8 +1350,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- gso_type |= SKB_GSO_UDP_TUNNEL;
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ else
+ gso_type |= SKB_GSO_UDP_TUNNEL;
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ gso_type |= SKB_GSO_TUNNEL_REMCSUM;
+ }
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE) {
+ if (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM)
+ gso_type |= SKB_GSO_GRE_CSUM;
+ else
+ gso_type |= SKB_GSO_GRE;
+ }
if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;
@@ -1214,8 +1374,19 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_set_inner_ipproto(skb, next_protocol);
- if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
- ipvs_gue_encap(net, skb, cp, &next_protocol);
+ if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+ bool check = false;
+
+ if (ipvs_gue_encap(net, skb, cp, &next_protocol))
+ goto tx_error;
+
+ if ((tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_CSUM) ||
+ (tun_flags & IP_VS_TUNNEL_ENCAP_FLAG_REMCSUM))
+ check = true;
+
+ udp6_set_csum(!check, skb, &saddr, &cp->daddr.in6, skb->len);
+ } else if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GRE)
+ ipvs_gre_encap(net, skb, cp, &next_protocol);
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
@@ -1400,7 +1571,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
@@ -1489,7 +1660,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!skb_make_writable(skb, offset))
+ if (skb_ensure_writable(skb, offset))
goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))