diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/devinet.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 29 | ||||
-rw-r--r-- | net/ipv4/netfilter/Kconfig | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_h323.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_reject_ipv4.c | 75 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 18 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 15 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 17 |
10 files changed, 130 insertions, 40 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ac2dff3c2c1c..bdbf68bb2e2d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ - + nla_total_size(4); /* IFA_FLAGS */ + + nla_total_size(4) /* IFA_FLAGS */ + + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ } static inline u32 cstamp_delta(unsigned long cstamp) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd28f386bd02..50228be5c17b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -101,28 +101,22 @@ static void tunnel_dst_reset_all(struct ip_tunnel *t) __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); } -static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t) +static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) { struct dst_entry *dst; rcu_read_lock(); dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); - if (dst) + if (dst) { + if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + rcu_read_unlock(); + tunnel_dst_reset(t); + return NULL; + } dst_hold(dst); - rcu_read_unlock(); - return dst; -} - -static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie) -{ - struct dst_entry *dst = tunnel_dst_get(t); - - if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { - tunnel_dst_reset(t); - return NULL; } - - return dst; + rcu_read_unlock(); + return (struct rtable *)dst; } /* Often modified stats are per cpu, other are shared (netdev->stats) */ @@ -584,7 +578,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi4 fl4; u8 tos, ttl; __be16 df; - struct rtable *rt = NULL; /* Route to the other host */ + struct rtable *rt; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; @@ -657,8 +651,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); - if (connected) - rt = (struct rtable *)tunnel_dst_check(tunnel, 0); + rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; if (!rt) { rt = ip_route_output_key(tunnel->net, &fl4); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 81c6910cfa92..a26ce035e3fa 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4 packet transformations such as the source, destination address and source and destination ports. +config NFT_REJECT_IPV4 + depends on NF_TABLES_IPV4 + default NFT_REJECT + tristate + config NF_TABLES_ARP depends on NF_TABLES tristate "ARP nf_tables support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c16be9d58420..90b82405331e 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 9eea059dd621..574f7ebba0b6 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, ret = nf_ct_expect_related(rtcp_exp); if (ret == 0) break; - else if (ret != -EBUSY) { + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { nf_ct_unexpect_related(rtp_exp); nated_port = 0; break; diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 000000000000..e79718a382f2 --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/icmp.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/nft_reject.h> + +void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(pkt->skb, pkt->ops->hooknum); + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} +EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval); + +static struct nft_expr_type nft_reject_ipv4_type; +static const struct nft_expr_ops nft_reject_ipv4_ops = { + .type = &nft_reject_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv4_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "reject", + .ops = &nft_reject_ipv4_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv4_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv4_type); +} + +static void __exit nft_reject_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv4_type); +} + +module_init(nft_reject_ipv4_module_init); +module_exit(nft_reject_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4475b3bb494d..9f3a2db9109e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2229,7 +2229,7 @@ adjudge_to_death: /* This is a (useful) BSD violating of the RFC. There is a * problem with TCP as specified in that the other end could * keep a socket open forever with no application left this end. - * We use a 3 minute timeout (about the same as BSD) then kill + * We use a 1 minute timeout (about the same as BSD) then kill * our end. If they send after that then tough - BUT: long enough * that we won't make the old 4*rto = almost no time - whoops * reset mistake. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 65cf90e063d5..227cba79fa6b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -671,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) { struct tcp_sock *tp = tcp_sk(sk); long m = mrtt; /* RTT */ + u32 srtt = tp->srtt; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev @@ -688,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) * does not matter how to _calculate_ it. Seems, it was trap * that VJ failed to avoid. 8) */ - if (m == 0) - m = 1; - if (tp->srtt != 0) { - m -= (tp->srtt >> 3); /* m is now error in rtt est */ - tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (srtt != 0) { + m -= (srtt >> 3); /* m is now error in rtt est */ + srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ m -= (tp->mdev >> 2); /* similar update on mdev */ @@ -723,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) } } else { /* no previous measure. */ - tp->srtt = m << 3; /* take the measured time to be rtt */ + srtt = m << 3; /* take the measured time to be rtt */ tp->mdev = m << 1; /* make sure rto = 3*rtt */ tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); tp->rtt_seq = tp->snd_nxt; } + tp->srtt = max(1U, srtt); } /* Set the sk_pacing_rate to allow proper sizing of TSO packets. @@ -746,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk) rate *= max(tp->snd_cwnd, tp->packets_out); - /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), - * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) + /* Correction for small srtt and scheduling constraints. + * For small rtt, consider noise is too high, and use + * the minimal value (srtt = 1 -> 125 us for HZ=1000) + * * We probably need usec resolution in the future. * Note: This also takes care of possible srtt=0 case, * when tcp_rtt_estimator() was not yet called. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 03d26b85eab8..3be16727f058 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -698,7 +698,8 @@ static void tcp_tsq_handler(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); + tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, + 0, GFP_ATOMIC); } /* * One tasklet per cpu tries to send more skbs. @@ -1904,7 +1905,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); - break; + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED, so we must + * test again the condition. + * We abuse smp_mb__after_clear_bit() because + * there is no smp_mb__after_set_bit() yet + */ + smp_mb__after_clear_bit(); + if (atomic_read(&sk->sk_wmem_alloc) > limit) + break; } limit = mss_now; @@ -1977,7 +1986,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || + if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) return false; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 25f5cee3a08a..88b4023ecfcf 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -17,6 +17,8 @@ static DEFINE_SPINLOCK(udp_offload_lock); static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; +#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) + struct udp_offload_priv { struct udp_offload *offload; struct rcu_head rcu; @@ -100,8 +102,7 @@ out: int udp_add_offload(struct udp_offload *uo) { - struct udp_offload_priv __rcu **head = &udp_offload_base; - struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL); + struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); if (!new_offload) return -ENOMEM; @@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo) new_offload->offload = uo; spin_lock(&udp_offload_lock); - rcu_assign_pointer(new_offload->next, rcu_dereference(*head)); - rcu_assign_pointer(*head, new_offload); + new_offload->next = udp_offload_base; + rcu_assign_pointer(udp_offload_base, new_offload); spin_unlock(&udp_offload_lock); return 0; @@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo) spin_lock(&udp_offload_lock); - uo_priv = rcu_dereference(*head); + uo_priv = udp_deref_protected(*head); for (; uo_priv != NULL; - uo_priv = rcu_dereference(*head)) { - + uo_priv = udp_deref_protected(*head)) { if (uo_priv->offload == uo) { - rcu_assign_pointer(*head, rcu_dereference(uo_priv->next)); + rcu_assign_pointer(*head, + udp_deref_protected(uo_priv->next)); goto unlock; } head = &uo_priv->next; |