summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/cipso_ipv4.c8
-rw-r--r--net/ipv4/devinet.c32
-rw-r--r--net/ipv4/fib_frontend.c114
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_semantics.c47
-rw-r--r--net/ipv4/fib_trie.c23
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c5
-rw-r--r--net/ipv4/inetpeer.c13
-rw-r--r--net/ipv4/ip_fragment.c31
-rw-r--r--net/ipv4/ip_options.c12
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/netfilter.c5
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c6
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c4
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c27
-rw-r--r--net/ipv4/sysctl_net_ipv4.c3
-rw-r--r--net/ipv4/tcp_cubic.c9
-rw-r--r--net/ipv4/tcp_input.c22
-rw-r--r--net/ipv4/tcp_lp.c2
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_output.c8
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv4/xfrm4_state.c1
31 files changed, 264 insertions, 142 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090d273d7865..1b74d3b64371 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -215,6 +215,9 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
case ARPHRD_INFINIBAND:
ip_ib_mc_map(addr, dev->broadcast, haddr);
return 0;
+ case ARPHRD_IPGRE:
+ ip_ipgre_mc_map(addr, dev->broadcast, haddr);
+ return 0;
default:
if (dir) {
memcpy(haddr, dev->broadcast, dev->addr_len);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 094e150c6260..a0af7ea87870 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -112,7 +112,7 @@ int cipso_v4_rbm_strictvalid = 1;
/* The maximum number of category ranges permitted in the ranged category tag
* (tag #5). You may note that the IETF draft states that the maximum number
* of category ranges is 7, but if the low end of the last category range is
- * zero then it is possibile to fit 8 category ranges because the zero should
+ * zero then it is possible to fit 8 category ranges because the zero should
* be omitted. */
#define CIPSO_V4_TAG_RNG_CAT_MAX 8
@@ -438,7 +438,7 @@ cache_add_failure:
*
* Description:
* Search the DOI definition list for a DOI definition with a DOI value that
- * matches @doi. The caller is responsibile for calling rcu_read_[un]lock().
+ * matches @doi. The caller is responsible for calling rcu_read_[un]lock().
* Returns a pointer to the DOI definition on success and NULL on failure.
*/
static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi)
@@ -1293,7 +1293,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
return ret_val;
/* This will send packets using the "optimized" format when
- * possibile as specified in section 3.4.2.6 of the
+ * possible as specified in section 3.4.2.6 of the
* CIPSO draft. */
if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
tag_len = 14;
@@ -1752,7 +1752,7 @@ validate_return:
}
/**
- * cipso_v4_error - Send the correct reponse for a bad packet
+ * cipso_v4_error - Send the correct response for a bad packet
* @skb: the packet
* @error: the error code
* @gateway: CIPSO gateway flag
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 6d85800daeb7..cd9ca0811cfa 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -64,6 +64,8 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include "fib_lookup.h"
+
static struct ipv4_devconf ipv4_devconf = {
.data = {
[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
@@ -151,6 +153,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
break;
}
}
+ if (!result) {
+ struct flowi4 fl4 = { .daddr = addr };
+ struct fib_result res = { 0 };
+ struct fib_table *local;
+
+ /* Fallback to FIB local table so that communication
+ * over loopback subnets work.
+ */
+ local = fib_get_table(net, RT_TABLE_LOCAL);
+ if (local &&
+ !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
+ res.type == RTN_LOCAL)
+ result = FIB_RES_DEV(res);
+ }
if (result && devref)
dev_hold(result);
rcu_read_unlock();
@@ -345,6 +361,17 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
}
+ /* On promotion all secondaries from subnet are changing
+ * the primary IP, we must remove all their routes silently
+ * and later to add them back with new prefsrc. Do this
+ * while all addresses are on the device list.
+ */
+ for (ifa = promote; ifa; ifa = ifa->ifa_next) {
+ if (ifa1->ifa_mask == ifa->ifa_mask &&
+ inet_ifa_match(ifa1->ifa_address, ifa))
+ fib_del_ifaddr(ifa, ifa1);
+ }
+
/* 2. Unlink it */
*ifap = ifa1->ifa_next;
@@ -364,6 +391,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
if (promote) {
+ struct in_ifaddr *next_sec = promote->ifa_next;
if (prev_prom) {
prev_prom->ifa_next = promote->ifa_next;
@@ -375,7 +403,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_UP, promote);
- for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
+ for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
if (ifa1->ifa_mask != ifa->ifa_mask ||
!inet_ifa_match(ifa1->ifa_address, ifa))
continue;
@@ -1652,7 +1680,7 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
return;
cnf->sysctl = NULL;
- unregister_sysctl_table(t->sysctl_header);
+ unregister_net_sysctl_table(t->sysctl_header);
kfree(t->dev_name);
kfree(t);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a373a259253c..451088330bbb 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -228,7 +228,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
if (res.type != RTN_LOCAL || !accept_local)
goto e_inval;
}
- *spec_dst = FIB_RES_PREFSRC(res);
+ *spec_dst = FIB_RES_PREFSRC(net, res);
fib_combine_itag(itag, &res);
dev_match = false;
@@ -258,7 +258,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
ret = 0;
if (fib_lookup(net, &fl4, &res) == 0) {
if (res.type == RTN_UNICAST) {
- *spec_dst = FIB_RES_PREFSRC(res);
+ *spec_dst = FIB_RES_PREFSRC(net, res);
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
}
}
@@ -722,12 +722,17 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
}
}
-static void fib_del_ifaddr(struct in_ifaddr *ifa)
+/* Delete primary or secondary address.
+ * Optionally, on secondary address promotion consider the addresses
+ * from subnet iprim as deleted, even if they are in device list.
+ * In this case the secondary ifa can be in device list.
+ */
+void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
{
struct in_device *in_dev = ifa->ifa_dev;
struct net_device *dev = in_dev->dev;
struct in_ifaddr *ifa1;
- struct in_ifaddr *prim = ifa;
+ struct in_ifaddr *prim = ifa, *prim1 = NULL;
__be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
__be32 any = ifa->ifa_address & ifa->ifa_mask;
#define LOCAL_OK 1
@@ -735,17 +740,26 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
#define BRD0_OK 4
#define BRD1_OK 8
unsigned ok = 0;
+ int subnet = 0; /* Primary network */
+ int gone = 1; /* Address is missing */
+ int same_prefsrc = 0; /* Another primary with same IP */
- if (!(ifa->ifa_flags & IFA_F_SECONDARY))
- fib_magic(RTM_DELROUTE,
- dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- any, ifa->ifa_prefixlen, prim);
- else {
+ if (ifa->ifa_flags & IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
if (prim == NULL) {
printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
return;
}
+ if (iprim && iprim != prim) {
+ printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n");
+ return;
+ }
+ } else if (!ipv4_is_zeronet(any) &&
+ (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ any, ifa->ifa_prefixlen, prim);
+ subnet = 1;
}
/* Deletion is more complicated than add.
@@ -755,6 +769,49 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
*/
for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+ if (ifa1 == ifa) {
+ /* promotion, keep the IP */
+ gone = 0;
+ continue;
+ }
+ /* Ignore IFAs from our subnet */
+ if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
+ inet_ifa_match(ifa1->ifa_address, iprim))
+ continue;
+
+ /* Ignore ifa1 if it uses different primary IP (prefsrc) */
+ if (ifa1->ifa_flags & IFA_F_SECONDARY) {
+ /* Another address from our subnet? */
+ if (ifa1->ifa_mask == prim->ifa_mask &&
+ inet_ifa_match(ifa1->ifa_address, prim))
+ prim1 = prim;
+ else {
+ /* We reached the secondaries, so
+ * same_prefsrc should be determined.
+ */
+ if (!same_prefsrc)
+ continue;
+ /* Search new prim1 if ifa1 is not
+ * using the current prim1
+ */
+ if (!prim1 ||
+ ifa1->ifa_mask != prim1->ifa_mask ||
+ !inet_ifa_match(ifa1->ifa_address, prim1))
+ prim1 = inet_ifa_byprefix(in_dev,
+ ifa1->ifa_address,
+ ifa1->ifa_mask);
+ if (!prim1)
+ continue;
+ if (prim1->ifa_local != prim->ifa_local)
+ continue;
+ }
+ } else {
+ if (prim->ifa_local != ifa1->ifa_local)
+ continue;
+ prim1 = ifa1;
+ if (prim != prim1)
+ same_prefsrc = 1;
+ }
if (ifa->ifa_local == ifa1->ifa_local)
ok |= LOCAL_OK;
if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
@@ -763,19 +820,37 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
ok |= BRD1_OK;
if (any == ifa1->ifa_broadcast)
ok |= BRD0_OK;
+ /* primary has network specific broadcasts */
+ if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
+ __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
+ __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
+
+ if (!ipv4_is_zeronet(any1)) {
+ if (ifa->ifa_broadcast == brd1 ||
+ ifa->ifa_broadcast == any1)
+ ok |= BRD_OK;
+ if (brd == brd1 || brd == any1)
+ ok |= BRD1_OK;
+ if (any == brd1 || any == any1)
+ ok |= BRD0_OK;
+ }
+ }
}
if (!(ok & BRD_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
- if (!(ok & BRD1_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
- if (!(ok & BRD0_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ if (subnet && ifa->ifa_prefixlen < 31) {
+ if (!(ok & BRD1_OK))
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+ if (!(ok & BRD0_OK))
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ }
if (!(ok & LOCAL_OK)) {
fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
/* Check, that this local address finally disappeared. */
- if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
+ if (gone &&
+ inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
/* And the last, but not the least thing.
* We must flush stray FIB entries.
*
@@ -885,6 +960,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
{
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct net_device *dev = ifa->ifa_dev->dev;
+ struct net *net = dev_net(dev);
switch (event) {
case NETDEV_UP:
@@ -892,12 +968,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev);
#endif
- fib_update_nh_saddrs(dev);
+ atomic_inc(&net->ipv4.dev_addr_genid);
rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
- fib_del_ifaddr(ifa);
- fib_update_nh_saddrs(dev);
+ fib_del_ifaddr(ifa, NULL);
+ atomic_inc(&net->ipv4.dev_addr_genid);
if (ifa->ifa_dev->ifa_list == NULL) {
/* Last address was deleted from this interface.
* Disable IP.
@@ -915,6 +991,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
{
struct net_device *dev = ptr;
struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct net *net = dev_net(dev);
if (event == NETDEV_UNREGISTER) {
fib_disable_ip(dev, 2, -1);
@@ -932,6 +1009,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev);
#endif
+ atomic_inc(&net->ipv4.dev_addr_genid);
rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
@@ -990,6 +1068,7 @@ static void ip_fib_net_exit(struct net *net)
fib4_rules_exit(net);
#endif
+ rtnl_lock();
for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
struct fib_table *tb;
struct hlist_head *head;
@@ -1002,6 +1081,7 @@ static void ip_fib_net_exit(struct net *net)
fib_free_table(tb);
}
}
+ rtnl_unlock();
kfree(net->ipv4.fib_table_hash);
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 4ec323875a02..af0f14aba169 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -10,7 +10,6 @@ struct fib_alias {
struct fib_info *fa_info;
u8 fa_tos;
u8 fa_type;
- u8 fa_scope;
u8 fa_state;
struct rcu_head rcu;
};
@@ -29,7 +28,7 @@ extern void fib_release_info(struct fib_info *);
extern struct fib_info *fib_create_info(struct fib_config *cfg);
extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u32 tb_id, u8 type, u8 scope, __be32 dst,
+ u32 tb_id, u8 type, __be32 dst,
int dst_len, u8 tos, struct fib_info *fi,
unsigned int);
extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 622ac4c95026..641a5a2a9f9c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -222,7 +222,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
unsigned int mask = (fib_info_hash_size - 1);
unsigned int val = fi->fib_nhs;
- val ^= fi->fib_protocol;
+ val ^= (fi->fib_protocol << 8) | fi->fib_scope;
val ^= (__force u32)fi->fib_prefsrc;
val ^= fi->fib_priority;
for_nexthops(fi) {
@@ -248,10 +248,11 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
if (fi->fib_nhs != nfi->fib_nhs)
continue;
if (nfi->fib_protocol == fi->fib_protocol &&
+ nfi->fib_scope == fi->fib_scope &&
nfi->fib_prefsrc == fi->fib_prefsrc &&
nfi->fib_priority == fi->fib_priority &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
- sizeof(fi->fib_metrics)) == 0 &&
+ sizeof(u32) * RTAX_MAX) == 0 &&
((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
(nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
return fi;
@@ -328,7 +329,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
goto errout;
err = fib_dump_info(skb, info->pid, seq, event, tb_id,
- fa->fa_type, fa->fa_scope, key, dst_len,
+ fa->fa_type, key, dst_len,
fa->fa_tos, fa->fa_info, nlm_flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in fib_nlmsg_size() */
@@ -695,6 +696,16 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
fib_info_hash_free(old_laddrhash, bytes);
}
+__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
+{
+ nh->nh_saddr = inet_select_addr(nh->nh_dev,
+ nh->nh_gw,
+ nh->nh_parent->fib_scope);
+ nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
+
+ return nh->nh_saddr;
+}
+
struct fib_info *fib_create_info(struct fib_config *cfg)
{
int err;
@@ -753,6 +764,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_net = hold_net(net);
fi->fib_protocol = cfg->fc_protocol;
+ fi->fib_scope = cfg->fc_scope;
fi->fib_flags = cfg->fc_flags;
fi->fib_priority = cfg->fc_priority;
fi->fib_prefsrc = cfg->fc_prefsrc;
@@ -854,10 +866,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
}
change_nexthops(fi) {
- nexthop_nh->nh_cfg_scope = cfg->fc_scope;
- nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev,
- nexthop_nh->nh_gw,
- nexthop_nh->nh_cfg_scope);
+ fib_info_update_nh_saddr(net, nexthop_nh);
} endfor_nexthops(fi)
link_it:
@@ -906,7 +915,7 @@ failure:
}
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
- u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
+ u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
struct fib_info *fi, unsigned int flags)
{
struct nlmsghdr *nlh;
@@ -928,7 +937,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
NLA_PUT_U32(skb, RTA_TABLE, tb_id);
rtm->rtm_type = type;
rtm->rtm_flags = fi->fib_flags;
- rtm->rtm_scope = scope;
+ rtm->rtm_scope = fi->fib_scope;
rtm->rtm_protocol = fi->fib_protocol;
if (rtm->rtm_dst_len)
@@ -1084,7 +1093,7 @@ void fib_select_default(struct fib_result *res)
list_for_each_entry_rcu(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
- if (fa->fa_scope != res->scope ||
+ if (next_fi->fib_scope != res->scope ||
fa->fa_type != RTN_UNICAST)
continue;
@@ -1128,24 +1137,6 @@ out:
return;
}
-void fib_update_nh_saddrs(struct net_device *dev)
-{
- struct hlist_head *head;
- struct hlist_node *node;
- struct fib_nh *nh;
- unsigned int hash;
-
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
- hlist_for_each_entry(nh, node, head, nh_hash) {
- if (nh->nh_dev != dev)
- continue;
- nh->nh_saddr = inet_select_addr(nh->nh_dev,
- nh->nh_gw,
- nh->nh_cfg_scope);
- }
-}
-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
/*
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3d28a35c2e1a..5fe9b8b41df3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -12,7 +12,7 @@
*
* Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
*
- * This work is based on the LPC-trie which is originally descibed in:
+ * This work is based on the LPC-trie which is originally described in:
*
* An experimental study of compression methods for dynamic tries
* Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
@@ -1245,7 +1245,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
if (fa->fa_type == cfg->fc_type &&
- fa->fa_scope == cfg->fc_scope &&
fa->fa_info == fi) {
fa_match = fa;
break;
@@ -1271,7 +1270,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_tos = fa->fa_tos;
new_fa->fa_info = fi;
new_fa->fa_type = cfg->fc_type;
- new_fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
new_fa->fa_state = state & ~FA_S_ACCESSED;
@@ -1308,7 +1306,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
new_fa->fa_type = cfg->fc_type;
- new_fa->fa_scope = cfg->fc_scope;
new_fa->fa_state = 0;
/*
* Insert new entry to the list.
@@ -1362,15 +1359,15 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
continue;
- if (fa->fa_scope < flp->flowi4_scope)
+ if (fa->fa_info->fib_scope < flp->flowi4_scope)
continue;
fib_alias_accessed(fa);
err = fib_props[fa->fa_type].error;
if (err) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
- t->stats.semantic_match_miss++;
+ t->stats.semantic_match_passed++;
#endif
- return 1;
+ return err;
}
if (fi->fib_flags & RTNH_F_DEAD)
continue;
@@ -1388,7 +1385,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
res->prefixlen = plen;
res->nh_sel = nhsel;
res->type = fa->fa_type;
- res->scope = fa->fa_scope;
+ res->scope = fa->fa_info->fib_scope;
res->fi = fi;
res->table = tb;
res->fa_head = &li->falh;
@@ -1664,7 +1661,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
(cfg->fc_scope == RT_SCOPE_NOWHERE ||
- fa->fa_scope == cfg->fc_scope) &&
+ fa->fa_info->fib_scope == cfg->fc_scope) &&
+ (!cfg->fc_prefsrc ||
+ fi->fib_prefsrc == cfg->fc_prefsrc) &&
(!cfg->fc_protocol ||
fi->fib_protocol == cfg->fc_protocol) &&
fib_nh_match(cfg, fi) == 0) {
@@ -1861,7 +1860,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
RTM_NEWROUTE,
tb->tb_id,
fa->fa_type,
- fa->fa_scope,
xkey,
plen,
fa->fa_tos,
@@ -1980,9 +1978,6 @@ struct fib_table *fib_trie_table(u32 id)
t = (struct trie *) tb->tb_data;
memset(t, 0, sizeof(*t));
- if (id == RT_TABLE_LOCAL)
- pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
-
return tb;
}
@@ -2382,7 +2377,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
seq_indent(seq, iter->depth+1);
seq_printf(seq, " /%d %s %s", li->plen,
rtn_scope(buf1, sizeof(buf1),
- fa->fa_scope),
+ fa->fa_info->fib_scope),
rtn_type(buf2, sizeof(buf2),
fa->fa_type));
if (fa->fa_tos)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a91dc1611081..e5f8a71d3a2a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -704,7 +704,7 @@ static void icmp_unreach(struct sk_buff *skb)
*/
/*
- * Check the other end isnt violating RFC 1122. Some routers send
+ * Check the other end isn't violating RFC 1122. Some routers send
* bogus responses to broadcast frames. If you see this message
* first check your netmask matches at both ends, if it does then
* get the other vendor to fix their kit.
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6c0b7f4a3d7d..38f23e721b80 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
!sk2->sk_bound_dev_if ||
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
if (!reuse || !sk2->sk_reuse ||
- ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) {
+ sk2->sk_state == TCP_LISTEN) {
const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -122,8 +122,7 @@ again:
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
smallest_rover = rover;
- if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
- !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+ if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
spin_unlock(&head->lock);
snum = smallest_rover;
goto have_snum;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index dd1b20eca1a2..9df4e635fb5f 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -354,7 +354,8 @@ static void inetpeer_free_rcu(struct rcu_head *head)
}
/* May be called with local BH enabled. */
-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
+static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
+ struct inet_peer __rcu **stack[PEER_MAXDEPTH])
{
int do_free;
@@ -368,7 +369,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
* We use refcnt=-1 to alert lockless readers this entry is deleted.
*/
if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
- struct inet_peer __rcu **stack[PEER_MAXDEPTH];
struct inet_peer __rcu ***stackptr, ***delp;
if (lookup(&p->daddr, stack, base) != p)
BUG();
@@ -422,7 +422,7 @@ static struct inet_peer_base *peer_to_base(struct inet_peer *p)
}
/* May be called with local BH enabled. */
-static int cleanup_once(unsigned long ttl)
+static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH])
{
struct inet_peer *p = NULL;
@@ -454,7 +454,7 @@ static int cleanup_once(unsigned long ttl)
* happen because of entry limits in route cache. */
return -1;
- unlink_from_pool(p, peer_to_base(p));
+ unlink_from_pool(p, peer_to_base(p), stack);
return 0;
}
@@ -524,7 +524,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
if (base->total >= inet_peer_threshold)
/* Remove one less-recently-used entry. */
- cleanup_once(0);
+ cleanup_once(0, stack);
return p;
}
@@ -540,6 +540,7 @@ static void peer_check_expire(unsigned long dummy)
{
unsigned long now = jiffies;
int ttl, total;
+ struct inet_peer __rcu **stack[PEER_MAXDEPTH];
total = compute_total();
if (total >= inet_peer_threshold)
@@ -548,7 +549,7 @@ static void peer_check_expire(unsigned long dummy)
ttl = inet_peer_maxttl
- (inet_peer_maxttl - inet_peer_minttl) / HZ *
total / inet_peer_threshold * HZ;
- while (!cleanup_once(ttl)) {
+ while (!cleanup_once(ttl, stack)) {
if (jiffies != now)
break;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a1151b8adf3c..b1d282f11be7 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -223,31 +223,30 @@ static void ip_expire(unsigned long arg)
if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
struct sk_buff *head = qp->q.fragments;
+ const struct iphdr *iph;
+ int err;
rcu_read_lock();
head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev)
goto out_rcu_unlock;
+ /* skb dst is stale, drop it, and perform route lookup again */
+ skb_dst_drop(head);
+ iph = ip_hdr(head);
+ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ iph->tos, head->dev);
+ if (err)
+ goto out_rcu_unlock;
+
/*
- * Only search router table for the head fragment,
- * when defraging timeout at PRE_ROUTING HOOK.
+ * Only an end host needs to send an ICMP
+ * "Fragment Reassembly Timeout" message, per RFC792.
*/
- if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) {
- const struct iphdr *iph = ip_hdr(head);
- int err = ip_route_input(head, iph->daddr, iph->saddr,
- iph->tos, head->dev);
- if (unlikely(err))
- goto out_rcu_unlock;
-
- /*
- * Only an end host needs to send an ICMP
- * "Fragment Reassembly Timeout" message, per RFC792.
- */
- if (skb_rtable(head)->rt_type != RTN_LOCAL)
- goto out_rcu_unlock;
+ if (qp->user == IP_DEFRAG_CONNTRACK_IN &&
+ skb_rtable(head)->rt_type != RTN_LOCAL)
+ goto out_rcu_unlock;
- }
/* Send an ICMP "Fragment Reassembly Timeout" message. */
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 1906fa35860c..2391b24e8251 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -140,11 +140,11 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
} else {
dopt->ts_needtime = 0;
- if (soffset + 8 <= optlen) {
+ if (soffset + 7 <= optlen) {
__be32 addr;
- memcpy(&addr, sptr+soffset-1, 4);
- if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) {
+ memcpy(&addr, dptr+soffset-1, 4);
+ if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
dopt->ts_needtime = 1;
soffset += 8;
}
@@ -329,7 +329,7 @@ int ip_options_compile(struct net *net,
pp_ptr = optptr + 2;
goto error;
}
- if (skb) {
+ if (rt) {
memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
opt->is_changed = 1;
}
@@ -371,7 +371,7 @@ int ip_options_compile(struct net *net,
goto error;
}
opt->ts = optptr - iph;
- if (skb) {
+ if (rt) {
memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
timeptr = (__be32*)&optptr[optptr[2]+3];
}
@@ -603,7 +603,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
unsigned long orefdst;
int err;
- if (!opt->srr)
+ if (!opt->srr || !rt)
return 0;
if (skb->pkt_type != PACKET_HOST)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 67f241b97649..459c011b1d4a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -603,7 +603,7 @@ slow_path:
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
len = mtu;
- /* IF: we are not sending upto and including the packet end
+ /* IF: we are not sending up to and including the packet end
then align the next start on an eight byte boundary */
if (len < left) {
len &= ~7;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 2b097752426b..cbff2ecccf3d 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1444,7 +1444,7 @@ static int __init ip_auto_config(void)
root_server_addr = addr;
/*
- * Use defaults whereever applicable.
+ * Use defaults wherever applicable.
*/
if (ic_defaults() < 0)
return -1;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f3c0b549b8e1..4614babdc45f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -221,9 +221,10 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
return csum;
}
-static int nf_ip_route(struct dst_entry **dst, struct flowi *fl)
+static int nf_ip_route(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict __always_unused)
{
- struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4);
+ struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
if (IS_ERR(rt))
return PTR_ERR(rt);
*dst = &rt->dst;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 4b5d457c2d76..89bc7e66d598 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -76,7 +76,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
}
/*
- * Unfortunatly, _b and _mask are not aligned to an int (or long int)
+ * Unfortunately, _b and _mask are not aligned to an int (or long int)
* Some arches dont care, unrolling the loop is a win on them.
* For other arches, we only have a 16bit alignement.
*/
@@ -1874,7 +1874,7 @@ static int __init arp_tables_init(void)
if (ret < 0)
goto err1;
- /* Noone else will be downing sem now, so we won't sleep */
+ /* No one else will be downing sem now, so we won't sleep */
ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
if (ret < 0)
goto err2;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b09ed0d080f9..704915028009 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -387,7 +387,7 @@ ipt_do_table(struct sk_buff *skb,
verdict = (unsigned)(-v) - 1;
break;
}
- if (*stackptr == 0) {
+ if (*stackptr <= origptr) {
e = get_entry(table_base,
private->underflow[hook]);
pr_debug("Underflow (this is normal) "
@@ -427,10 +427,10 @@ ipt_do_table(struct sk_buff *skb,
/* Verdict */
break;
} while (!acpar.hotdrop);
- xt_info_rdunlock_bh();
pr_debug("Exiting %s; resetting sp from %u to %u\n",
__func__, *stackptr, origptr);
*stackptr = origptr;
+ xt_info_rdunlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
#else
@@ -2233,7 +2233,7 @@ static int __init ip_tables_init(void)
if (ret < 0)
goto err1;
- /* Noone else will be downing sem now, so we won't sleep */
+ /* No one else will be downing sem now, so we won't sleep */
ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
if (ret < 0)
goto err2;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 403ca57f6011..d609ac3cb9a4 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -664,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
char buffer[PROC_WRITELEN+1];
unsigned long nodenum;
- if (copy_from_user(buffer, input, PROC_WRITELEN))
+ if (size > PROC_WRITELEN)
+ return -EIO;
+ if (copy_from_user(buffer, input, size))
return -EFAULT;
+ buffer[size] = 0;
if (*buffer == '+') {
nodenum = simple_strtoul(buffer+1, NULL, 10);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 21bcf471b25a..9c71b2755ce3 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -521,7 +521,7 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
}
EXPORT_SYMBOL(nf_nat_protocol_register);
-/* Noone stores the protocol anywhere; simply delete it. */
+/* No one stores the protocol anywhere; simply delete it. */
void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
{
spin_lock_bh(&nf_nat_lock);
@@ -532,7 +532,7 @@ void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
}
EXPORT_SYMBOL(nf_nat_protocol_unregister);
-/* Noone using conntrack by the time this called. */
+/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e837ffd3edc3..bceaec42c37d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -569,6 +569,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
+ rt = NULL;
goto done;
}
}
@@ -621,7 +622,7 @@ do_confirm:
static void raw_close(struct sock *sk, long timeout)
{
/*
- * Raw sockets may have direct kernel refereneces. Kill them.
+ * Raw sockets may have direct kernel references. Kill them.
*/
ip_ra_control(sk, 0, NULL);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 870b5182ddd8..99e6e4bb1c72 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -821,7 +821,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
}
/*
- * Pertubation of rt_genid by a small quantity [1..256]
+ * Perturbation of rt_genid by a small quantity [1..256]
* Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
* many times (2^24) without giving recent rt_genid.
* Jenkins hash is strong enough that litle changes of rt_genid are OK.
@@ -1191,7 +1191,7 @@ restart:
#endif
/*
* Since lookup is lockfree, we must make sure
- * previous writes to rt are comitted to memory
+ * previous writes to rt are committed to memory
* before making rt visible to other CPUS.
*/
rcu_assign_pointer(rt_hash_table[hash].chain, rt);
@@ -1593,8 +1593,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
rt->rt_peer_genid = rt_peer_genid();
}
check_peer_pmtu(dst, peer);
-
- inet_putpeer(peer);
}
}
@@ -1720,7 +1718,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
rcu_read_lock();
if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
- src = FIB_RES_PREFSRC(res);
+ src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
else
src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
RT_SCOPE_UNIVERSE);
@@ -1893,6 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
#endif
+ rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
rth->dst.dev = init_net.loopback_dev;
dev_hold(rth->dst.dev);
@@ -2028,6 +2027,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_key_src = saddr;
rth->rt_src = saddr;
rth->rt_gateway = daddr;
+ rth->rt_route_iif = in_dev->dev->ifindex;
rth->rt_iif = in_dev->dev->ifindex;
rth->dst.dev = (out_dev)->dev;
dev_hold(rth->dst.dev);
@@ -2204,6 +2204,7 @@ local_input:
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
#endif
+ rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
rth->dst.dev = net->loopback_dev;
dev_hold(rth->dst.dev);
@@ -2403,7 +2404,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_mark = oldflp4->flowi4_mark;
rth->rt_dst = fl4->daddr;
rth->rt_src = fl4->saddr;
- rth->rt_iif = 0;
+ rth->rt_route_iif = 0;
+ rth->rt_iif = oldflp4->flowi4_oif ? : dev_out->ifindex;
/* get references to the devices that are to be hold by the routing
cache entry */
rth->dst.dev = dev_out;
@@ -2617,7 +2619,7 @@ static struct rtable *ip_route_output_slow(struct net *net,
fib_select_default(&res);
if (!fl4.saddr)
- fl4.saddr = FIB_RES_PREFSRC(res);
+ fl4.saddr = FIB_RES_PREFSRC(net, res);
dev_out = FIB_RES_DEV(res);
fl4.flowi4_oif = dev_out->ifindex;
@@ -2688,6 +2690,12 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
{
}
+static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
+ unsigned long old)
+{
+ return NULL;
+}
+
static struct dst_ops ipv4_dst_blackhole_ops = {
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
@@ -2696,6 +2704,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
.default_mtu = ipv4_blackhole_default_mtu,
.default_advmss = ipv4_default_advmss,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
+ .cow_metrics = ipv4_rt_blackhole_cow_metrics,
};
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -2718,6 +2727,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_key_dst = ort->rt_key_dst;
rt->rt_key_src = ort->rt_key_src;
rt->rt_tos = ort->rt_tos;
+ rt->rt_route_iif = ort->rt_route_iif;
rt->rt_iif = ort->rt_iif;
rt->rt_oif = ort->rt_oif;
rt->rt_mark = ort->rt_mark;
@@ -2727,7 +2737,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_type = ort->rt_type;
rt->rt_dst = ort->rt_dst;
rt->rt_src = ort->rt_src;
- rt->rt_iif = ort->rt_iif;
rt->rt_gateway = ort->rt_gateway;
rt->rt_spec_dst = ort->rt_spec_dst;
rt->peer = ort->peer;
@@ -3221,6 +3230,8 @@ static __net_init int rt_genid_init(struct net *net)
{
get_random_bytes(&net->ipv4.rt_genid,
sizeof(net->ipv4.rt_genid));
+ get_random_bytes(&net->ipv4.dev_addr_genid,
+ sizeof(net->ipv4.dev_addr_genid));
return 0;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1a456652086b..321e6e84dbcc 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -311,7 +311,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_do_large_bitmap,
},
-#ifdef CONFIG_IP_MULTICAST
{
.procname = "igmp_max_memberships",
.data = &sysctl_igmp_max_memberships,
@@ -319,8 +318,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
-
-#endif
{
.procname = "igmp_max_msf",
.data = &sysctl_igmp_max_msf,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 34340c9c95fa..f376b05cca81 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -93,6 +93,7 @@ struct bictcp {
u32 ack_cnt; /* number of acks */
u32 tcp_cwnd; /* estimated tcp cwnd */
#define ACK_RATIO_SHIFT 4
+#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
u8 sample_cnt; /* number of samples to decide curr_rtt */
u8 found; /* the exit point is found? */
@@ -398,8 +399,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
u32 delay;
if (icsk->icsk_ca_state == TCP_CA_Open) {
- cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
- ca->delayed_ack += cnt;
+ u32 ratio = ca->delayed_ack;
+
+ ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
+ ratio += cnt;
+
+ ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
}
/* Some calls are for duplicates without timetamps */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index da782e7ab16d..bef9f04c22ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2659,7 +2659,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
#define DBGUNDO(x...) do { } while (0)
#endif
-static void tcp_undo_cwr(struct sock *sk, const int undo)
+static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2671,14 +2671,13 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
else
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
- if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
+ if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
TCP_ECN_withdraw_cwr(tp);
}
} else {
tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
}
- tcp_moderate_cwnd(tp);
tp->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -2699,7 +2698,7 @@ static int tcp_try_undo_recovery(struct sock *sk)
* or our original transmission succeeded.
*/
DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
- tcp_undo_cwr(sk, 1);
+ tcp_undo_cwr(sk, true);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
mib_idx = LINUX_MIB_TCPLOSSUNDO;
else
@@ -2726,7 +2725,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
if (tp->undo_marker && !tp->undo_retrans) {
DBGUNDO(sk, "D-SACK");
- tcp_undo_cwr(sk, 1);
+ tcp_undo_cwr(sk, true);
tp->undo_marker = 0;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
}
@@ -2779,7 +2778,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
DBGUNDO(sk, "Hoe");
- tcp_undo_cwr(sk, 0);
+ tcp_undo_cwr(sk, false);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
/* So... Do not make Hoe's retransmit yet.
@@ -2808,7 +2807,7 @@ static int tcp_try_undo_loss(struct sock *sk)
DBGUNDO(sk, "partial loss");
tp->lost_out = 0;
- tcp_undo_cwr(sk, 1);
+ tcp_undo_cwr(sk, true);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
inet_csk(sk)->icsk_retransmits = 0;
tp->undo_marker = 0;
@@ -2822,8 +2821,11 @@ static int tcp_try_undo_loss(struct sock *sk)
static inline void tcp_complete_cwr(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ /* Do not moderate cwnd if it's already undone in cwr or recovery */
+ if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) {
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->snd_cwnd_stamp = tcp_time_stamp;
+ }
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
@@ -3494,7 +3496,7 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
if (flag & FLAG_ECE)
tcp_ratehalving_spur_to_response(sk);
else
- tcp_undo_cwr(sk, 1);
+ tcp_undo_cwr(sk, true);
}
/* F-RTO spurious RTO detection algorithm (RFC4138)
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 656d431c99ad..72f7218b03f5 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -12,7 +12,7 @@
* within cong_avoid.
* o Error correcting in remote HZ, therefore remote HZ will be keeped
* on checking and updating.
- * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne
+ * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since
* OWD have a similar meaning as RTT. Also correct the buggy formular.
* o Handle reaction for Early Congestion Indication (ECI) within
* pkts_acked, as mentioned within pseudo code.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dfa5beb0c1c8..17388c7f49c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -73,7 +73,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- /* Don't override Nagle indefinately with F-RTO */
+ /* Don't override Nagle indefinitely with F-RTO */
if (tp->frto_counter == 2)
tp->frto_counter = 3;
@@ -1003,7 +1003,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
int nlen;
u8 flags;
- BUG_ON(len > skb->len);
+ if (WARN_ON(len > skb->len))
+ return -EINVAL;
nsize = skb_headlen(skb) - len;
if (nsize < 0)
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index dc7f43179c9a..05c3b6f0e8e1 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -20,7 +20,7 @@
#define TCP_YEAH_DELTA 3 //log minimum fraction of cwnd to be removed on loss
#define TCP_YEAH_EPSILON 1 //log maximum fraction to be removed on early decongestion
#define TCP_YEAH_PHY 8 //lin maximum delta from base
-#define TCP_YEAH_RHO 16 //lin minumum number of consecutive rtt to consider competition on loss
+#define TCP_YEAH_RHO 16 //lin minimum number of consecutive rtt to consider competition on loss
#define TCP_YEAH_ZETA 50 //lin minimum number of state switchs to reset reno_count
#define TCP_SCALABLE_AI_CNT 100U
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 588f47af5faf..f87a8eb76f3b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -189,7 +189,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
* @sk: socket struct in question
* @snum: port number to look up
* @saddr_comp: AF-dependent comparison of bound local IP addresses
- * @hash2_nulladdr: AF-dependant hash value in secondary hash chains,
+ * @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
* with NULL address
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 571aa96a175c..2d51840e53a1 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
}
EXPORT_SYMBOL(xfrm4_prepare_output);
-static int xfrm4_output_finish(struct sk_buff *skb)
+int xfrm4_output_finish(struct sk_buff *skb)
{
#ifdef CONFIG_NETFILTER
if (!skb_dst(skb)->xfrm) {
@@ -86,7 +86,11 @@ static int xfrm4_output_finish(struct sk_buff *skb)
int xfrm4_output(struct sk_buff *skb)
{
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
+
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
- NULL, skb_dst(skb)->dev, xfrm4_output_finish,
+ NULL, dst->dev,
+ x->outer_mode->afinfo->output_finish,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 13e0e7f659ff..d20a05e970d8 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -74,6 +74,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
rt->rt_key_dst = fl4->daddr;
rt->rt_key_src = fl4->saddr;
rt->rt_tos = fl4->flowi4_tos;
+ rt->rt_route_iif = fl4->flowi4_iif;
rt->rt_iif = fl4->flowi4_iif;
rt->rt_oif = fl4->flowi4_oif;
rt->rt_mark = fl4->flowi4_mark;
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 1717c64628d1..805d63ef4340 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -78,6 +78,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
.init_tempsel = __xfrm4_init_tempsel,
.init_temprop = xfrm4_init_temprop,
.output = xfrm4_output,
+ .output_finish = xfrm4_output_finish,
.extract_input = xfrm4_extract_input,
.extract_output = xfrm4_extract_output,
.transport_finish = xfrm4_transport_finish,