summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c4
-rw-r--r--net/bridge/br.c3
-rw-r--r--net/bridge/br_fdb.c11
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c6
-rw-r--r--net/core/link_watch.c5
-rw-r--r--net/core/page_pool.c10
-rw-r--r--net/dccp/dccp.h6
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ieee802154/socket.c7
-rw-r--r--net/ipv4/igmp.c21
-rw-r--r--net/ipv4/tcp_bbr.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c16
-rw-r--r--net/netfilter/nf_conntrack_core.c71
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c1
-rw-r--r--net/netfilter/nf_conntrack_standalone.c16
-rw-r--r--net/netfilter/nf_flow_table_core.c11
-rw-r--r--net/netfilter/nfnetlink_hook.c24
-rw-r--r--net/openvswitch/flow.c13
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc_core.c4
-rw-r--r--net/smc/smc_core.h4
-rw-r--r--net/smc/smc_llc.c10
-rw-r--r--net/smc/smc_tx.c18
-rw-r--r--net/smc/smc_wr.c10
-rw-r--r--net/tipc/link.c6
-rw-r--r--net/vmw_vsock/virtio_transport.c7
39 files changed, 244 insertions, 130 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 1cc75c811e24..caa16bf30fb5 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -7,6 +7,7 @@
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
+#include <linux/rcupdate_trace.h>
#include <linux/sched/signal.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
@@ -951,7 +952,10 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
goto out;
}
}
+
+ rcu_read_lock_trace();
retval = bpf_prog_run_pin_on_cpu(prog, ctx);
+ rcu_read_unlock_trace();
if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
err = -EFAULT;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index bbab9984f24e..ef743f94254d 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -166,8 +166,7 @@ static int br_switchdev_event(struct notifier_block *unused,
case SWITCHDEV_FDB_ADD_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
- fdb_info->vid,
- fdb_info->is_local, false);
+ fdb_info->vid, false);
if (err) {
err = notifier_from_errno(err);
break;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 835cec1e5a03..5dee30966ed3 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -1044,10 +1044,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
"FDB entry towards bridge must be permanent");
return -EINVAL;
}
-
- err = br_fdb_external_learn_add(br, p, addr, vid,
- ndm->ndm_state & NUD_PERMANENT,
- true);
+ err = br_fdb_external_learn_add(br, p, addr, vid, true);
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
@@ -1275,7 +1272,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
}
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid, bool is_local,
+ const unsigned char *addr, u16 vid,
bool swdev_notify)
{
struct net_bridge_fdb_entry *fdb;
@@ -1293,7 +1290,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (swdev_notify)
flags |= BIT(BR_FDB_ADDED_BY_USER);
- if (is_local)
+ if (!p)
flags |= BIT(BR_FDB_LOCAL);
fdb = fdb_create(br, p, addr, vid, flags);
@@ -1322,7 +1319,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (swdev_notify)
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
- if (is_local)
+ if (!p)
set_bit(BR_FDB_LOCAL, &fdb->flags);
if (modified)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 6e4a32354a13..14cd6ef96111 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -616,6 +616,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
+ br_multicast_del_port(p);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -729,6 +730,7 @@ err4:
err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
+ br_multicast_del_port(p);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aa64d8d63ca3..2b48b204205e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -711,7 +711,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid, bool is_local,
+ const unsigned char *addr, u16 vid,
bool swdev_notify);
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid,
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 8d033a75a766..fdbed3158555 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -88,6 +88,12 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
skb = ip_fraglist_next(&iter);
}
+
+ if (!err)
+ return 0;
+
+ kfree_skb_list(iter.frag);
+
return err;
}
slow_path:
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 75431ca9300f..1a455847da54 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -158,7 +158,7 @@ static void linkwatch_do_dev(struct net_device *dev)
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
rfc2863_policy(dev);
- if (dev->flags & IFF_UP && netif_device_present(dev)) {
+ if (dev->flags & IFF_UP) {
if (netif_carrier_ok(dev))
dev_activate(dev);
else
@@ -204,7 +204,8 @@ static void __linkwatch_run_queue(int urgent_only)
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
list_del_init(&dev->link_watch_list);
- if (urgent_only && !linkwatch_urgent_event(dev)) {
+ if (!netif_device_present(dev) ||
+ (urgent_only && !linkwatch_urgent_event(dev))) {
list_add_tail(&dev->link_watch_list, &lweventlist);
continue;
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 5e4eb45b139c..8ab7b402244c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -634,7 +634,15 @@ bool page_pool_return_skb_page(struct page *page)
struct page_pool *pp;
page = compound_head(page);
- if (unlikely(page->pp_magic != PP_SIGNATURE))
+
+ /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
+ * in order to preserve any existing bits, such as bit 0 for the
+ * head page of compound page and bit 1 for pfmemalloc page, so
+ * mask those bits for freeing side when doing below checking,
+ * and page_is_pfmemalloc() is checked in __page_pool_put_page()
+ * to avoid recycling the pfmemalloc page.
+ */
+ if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
return false;
pp = page->pp;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 9cc9d1ee6cdb..c5c1d2b8045e 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -41,9 +41,9 @@ extern bool dccp_debug;
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
#else
-#define dccp_pr_debug(format, a...)
-#define dccp_pr_debug_cat(format, a...)
-#define dccp_debug(format, a...)
+#define dccp_pr_debug(format, a...) do {} while (0)
+#define dccp_pr_debug_cat(format, a...) do {} while (0)
+#define dccp_debug(format, a...) do {} while (0)
#endif
extern struct inet_hashinfo dccp_hashinfo;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 532085da8d8f..23be8e01026b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2291,8 +2291,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
static void
dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
{
+ struct switchdev_notifier_fdb_info info = {};
struct dsa_switch *ds = switchdev_work->ds;
- struct switchdev_notifier_fdb_info info;
struct dsa_port *dp;
if (!dsa_is_user_port(ds, switchdev_work->port))
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a45a0401adc5..c25f7617770c 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -984,6 +984,11 @@ static const struct proto_ops ieee802154_dgram_ops = {
.sendpage = sock_no_sendpage,
};
+static void ieee802154_sock_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
/* Create a socket. Initialise the socket, blank the addresses
* set the state.
*/
@@ -1024,7 +1029,7 @@ static int ieee802154_create(struct net *net, struct socket *sock,
sock->ops = ops;
sock_init_data(sock, sk);
- /* FIXME: sk->sk_destruct */
+ sk->sk_destruct = ieee802154_sock_destruct;
sk->sk_family = PF_IEEE802154;
/* Checksums on by default */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6b3c558a4f23..00576bae183d 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t)
static void igmp_ifc_timer_expire(struct timer_list *t)
{
struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer);
+ u32 mr_ifc_count;
igmpv3_send_cr(in_dev);
- if (in_dev->mr_ifc_count) {
- in_dev->mr_ifc_count--;
+restart:
+ mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count);
+
+ if (mr_ifc_count) {
+ if (cmpxchg(&in_dev->mr_ifc_count,
+ mr_ifc_count,
+ mr_ifc_count - 1) != mr_ifc_count)
+ goto restart;
igmp_ifc_start_timer(in_dev,
unsolicited_report_interval(in_dev));
}
@@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
igmp_ifc_start_timer(in_dev, 1);
}
@@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
in_dev->mr_qri;
}
/* cancel the interface change timer */
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
/* clear deleted report items */
@@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev)
igmp_group_dropped(pmc);
#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
in_dev->mr_gq_running = 0;
@@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(pmc->interface);
@@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* else no filters; keep old mode for reports */
pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(in_dev);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 6ea3dc2e4219..6274462b86b4 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1041,7 +1041,7 @@ static void bbr_init(struct sock *sk)
bbr->prior_cwnd = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
- bbr->next_rtt_delivered = 0;
+ bbr->next_rtt_delivered = tp->delivered;
bbr->prev_ca_state = TCP_CA_Open;
bbr->packet_conservation = 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index d1bef23fd4f5..dd30c03d5a23 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+ /* 64bit division is not allowed on 32bit */
+ if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried) {
ip = ntohl(h->next.ip);
e.ip = htonl(ip);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 18346d18aa16..153de3457423 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
+ if (e.mark == 0 && e.ip == 0)
+ return -IPSET_ERR_HASH_ELEM;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
@@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (e.mark == 0 && ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
+ if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index e1ca11196515..7303138e46be 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index ab179e064597..334fb1ad0e86 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 8f075b44cf64..7df94f437f60 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index c1a11f041ac6..1422739d9aa2 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
do {
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index ddd51c2e1cb3..9810f5bf63f5 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 6532f0505e66..3d09eefe998a 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
+ u64 n = 0, m = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index ec1564a1cb5a..09cf72eb37f8 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
+ u64 n = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 0e91d1e82f1c..19bcdb3141f6 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
+ u64 n = 0, m = 0;
bool with_ports = false;
int ret;
@@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5c03e5106751..d31dbccbe7bd 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
- u32 last_bucket;
+ u32 next_bucket;
bool exiting;
bool early_drop;
- long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
-/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV 128u
-/* upper bound of full table scan */
-#define GC_MAX_SCAN_JIFFIES (16u * HZ)
-/* desired ratio of entries found to be expired */
-#define GC_EVICT_RATIO 50u
+#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
static struct conntrack_gc_work conntrack_gc_work;
@@ -1363,17 +1358,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
- unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned int nf_conntrack_max95 = 0;
+ unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
+ unsigned int i, hashsz, nf_conntrack_max95 = 0;
+ unsigned long next_run = GC_SCAN_INTERVAL;
struct conntrack_gc_work *gc_work;
- unsigned int ratio, scanned = 0;
- unsigned long next_run;
-
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
- i = gc_work->last_bucket;
+ i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
@@ -1381,15 +1372,15 @@ static void gc_worker(struct work_struct *work)
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int hashsz;
struct nf_conn *tmp;
- i++;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hashsz);
- if (i >= hashsz)
- i = 0;
+ if (i >= hashsz) {
+ rcu_read_unlock();
+ break;
+ }
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet;
@@ -1397,7 +1388,6 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
- scanned++;
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
continue;
@@ -1405,7 +1395,6 @@ static void gc_worker(struct work_struct *work)
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
- expired_count++;
continue;
}
@@ -1438,7 +1427,14 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched();
- } while (++buckets < goal);
+ i++;
+
+ if (time_after(jiffies, end_time) && i < hashsz) {
+ gc_work->next_bucket = i;
+ next_run = 0;
+ break;
+ }
+ } while (i < hashsz);
if (gc_work->exiting)
return;
@@ -1449,40 +1445,17 @@ static void gc_worker(struct work_struct *work)
*
* This worker is only here to reap expired entries when system went
* idle after a busy period.
- *
- * The heuristics below are supposed to balance conflicting goals:
- *
- * 1. Minimize time until we notice a stale entry
- * 2. Maximize scan intervals to not waste cycles
- *
- * Normally, expire ratio will be close to 0.
- *
- * As soon as a sizeable fraction of the entries have expired
- * increase scan frequency.
*/
- ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio > GC_EVICT_RATIO) {
- gc_work->next_gc_run = min_interval;
- } else {
- unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
-
- BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
-
- gc_work->next_gc_run += min_interval;
- if (gc_work->next_gc_run > max)
- gc_work->next_gc_run = max;
+ if (next_run) {
+ gc_work->early_drop = false;
+ gc_work->next_bucket = 0;
}
-
- next_run = gc_work->next_gc_run;
- gc_work->last_bucket = i;
- gc_work->early_drop = false;
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
- gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3259416f2ea4..af5115e127cf 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1478,7 +1478,6 @@ void nf_conntrack_tcp_init_net(struct net *net)
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
tn->offload_timeout = 30 * HZ;
- tn->offload_pickup = 120 * HZ;
#endif
}
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 698fee49e732..f8e3c0d2602f 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -271,7 +271,6 @@ void nf_conntrack_udp_init_net(struct net *net)
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
un->offload_timeout = 30 * HZ;
- un->offload_pickup = 30 * HZ;
#endif
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 214d9f9e499b..e84b499b7bfa 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -575,7 +575,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
- NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
#endif
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
@@ -585,7 +584,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
- NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
#endif
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
@@ -776,12 +774,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
- .procname = "nf_flowtable_tcp_pickup",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
.procname = "nf_conntrack_tcp_loose",
@@ -832,12 +824,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
- .procname = "nf_flowtable_udp_pickup",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
.procname = "nf_conntrack_icmp_timeout",
@@ -1018,7 +1004,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
- table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
#endif
}
@@ -1111,7 +1096,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
- table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
#endif
nf_conntrack_standalone_init_tcp_sysctl(net, table);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 551976e4284c..8788b519255e 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -183,7 +183,7 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
const struct nf_conntrack_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
- unsigned int timeout;
+ s32 timeout;
l4proto = nf_ct_l4proto_find(l4num);
if (!l4proto)
@@ -192,15 +192,20 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
if (l4num == IPPROTO_TCP) {
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- timeout = tn->offload_pickup;
+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+ timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
- timeout = tn->offload_pickup;
+ timeout = tn->timeouts[UDP_CT_REPLIED];
+ timeout -= tn->offload_timeout;
} else {
return;
}
+ if (timeout < 0)
+ timeout = 0;
+
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
ct->timeout = nfct_time_stamp + timeout;
}
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index 202f57d17bab..f554e2ea32ee 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -89,11 +89,15 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
if (!nest2)
goto cancel_nest;
- ret = nla_put_string(nlskb, NFTA_CHAIN_TABLE, chain->table->name);
+ ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
if (ret)
goto cancel_nest;
- ret = nla_put_string(nlskb, NFTA_CHAIN_NAME, chain->name);
+ ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
+ if (ret)
+ goto cancel_nest;
+
+ ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
if (ret)
goto cancel_nest;
@@ -109,18 +113,19 @@ cancel_nest:
static int nfnl_hook_dump_one(struct sk_buff *nlskb,
const struct nfnl_dump_hook_data *ctx,
const struct nf_hook_ops *ops,
- unsigned int seq)
+ int family, unsigned int seq)
{
u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET);
unsigned int portid = NETLINK_CB(nlskb).portid;
struct nlmsghdr *nlh;
int ret = -EMSGSIZE;
+ u32 hooknum;
#ifdef CONFIG_KALLSYMS
char sym[KSYM_SYMBOL_LEN];
char *module_name;
#endif
nlh = nfnl_msg_put(nlskb, portid, seq, event,
- NLM_F_MULTI, ops->pf, NFNETLINK_V0, 0);
+ NLM_F_MULTI, family, NFNETLINK_V0, 0);
if (!nlh)
goto nla_put_failure;
@@ -135,6 +140,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
if (module_name) {
char *end;
+ *module_name = '\0';
module_name += 2;
end = strchr(module_name, ']');
if (end) {
@@ -151,7 +157,12 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
goto nla_put_failure;
#endif
- ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(ops->hooknum));
+ if (ops->pf == NFPROTO_INET && ops->hooknum == NF_INET_INGRESS)
+ hooknum = NF_NETDEV_INGRESS;
+ else
+ hooknum = ops->hooknum;
+
+ ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(hooknum));
if (ret)
goto nla_put_failure;
@@ -259,7 +270,8 @@ static int nfnl_hook_dump(struct sk_buff *nlskb,
ops = nf_hook_entries_get_hook_ops(e);
for (; i < e->num_hook_entries; i++) {
- err = nfnl_hook_dump_one(nlskb, ctx, ops[i], cb->seq);
+ err = nfnl_hook_dump_one(nlskb, ctx, ops[i], family,
+ cb->nlh->nlmsg_seq);
if (err)
break;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e586424d8b04..9713035b89e3 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -293,14 +293,14 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
}
/**
- * Parse vlan tag from vlan header.
+ * parse_vlan_tag - Parse vlan tag from vlan header.
* @skb: skb containing frame to parse
* @key_vh: pointer to parsed vlan tag
* @untag_vlan: should the vlan header be removed from the frame
*
- * Returns ERROR on memory error.
- * Returns 0 if it encounters a non-vlan or incomplete packet.
- * Returns 1 after successfully parsing vlan tag.
+ * Return: ERROR on memory error.
+ * %0 if it encounters a non-vlan or incomplete packet.
+ * %1 after successfully parsing vlan tag.
*/
static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
bool untag_vlan)
@@ -532,6 +532,7 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
* L3 header
* @key: output flow key
*
+ * Return: %0 if successful, otherwise a negative errno value.
*/
static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
{
@@ -748,8 +749,6 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
- * Returns 0 if successful, otherwise a negative errno value.
- *
* Initializes @skb header fields as follows:
*
* - skb->mac_header: the L2 header.
@@ -764,6 +763,8 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
*
* - skb->protocol: the type of the data starting at skb->network_header.
* Equals to key->eth.type.
+ *
+ * Return: %0 if successful, otherwise a negative errno value.
*/
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7153c67f641e..2ef4cd2c848b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -273,6 +273,9 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
+ /* All mirred/redirected skbs should clear previous ct info */
+ nf_reset_ct(skb2);
+
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
expects_nh = want_ingress || !m_mac_header_xmit;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 898389611ae8..c038efc23ce3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -795,7 +795,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
reason_code = SMC_CLC_DECL_NOSRVLINK;
goto connect_abort;
}
- smc->conn.lnk = link;
+ smc_switch_link_and_count(&smc->conn, link);
}
/* create send buffer and rmb */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index cd0d7c908b2a..c160ff50c053 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -917,8 +917,8 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
return rc;
}
-static void smc_switch_link_and_count(struct smc_connection *conn,
- struct smc_link *to_lnk)
+void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk)
{
atomic_dec(&conn->lnk->conn_cnt);
conn->lnk = to_lnk;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 6d6fd1397c87..c043ecdca5c4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -97,6 +97,7 @@ struct smc_link {
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */
+ atomic_t wr_tx_refcnt; /* tx refs to link */
struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
@@ -109,6 +110,7 @@ struct smc_link {
struct ib_reg_wr wr_reg; /* WR register memory region */
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
+ atomic_t wr_reg_refcnt; /* reg refs to link */
enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/
@@ -444,6 +446,8 @@ void smc_core_exit(void);
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini);
void smcr_link_clear(struct smc_link *lnk, bool log);
+void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
int smcr_buf_reg_lgr(struct smc_link *lnk);
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 273eaf1bfe49..2e7560eba981 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -888,6 +888,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
if (!rc)
goto out;
out_clear_lnk:
+ lnk_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(lnk_new, false);
out_reject:
smc_llc_cli_add_link_reject(qentry);
@@ -1184,6 +1185,7 @@ int smc_llc_srv_add_link(struct smc_link *link)
goto out_err;
return 0;
out_err:
+ link_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(link_new, false);
return rc;
}
@@ -1286,10 +1288,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
del_llc->reason = 0;
smc_llc_send_message(lnk, &qentry->msg); /* response */
- if (smc_link_downing(&lnk_del->state)) {
- if (smc_switch_conns(lgr, lnk_del, false))
- smc_wr_tx_wait_no_pending_sends(lnk_del);
- }
+ if (smc_link_downing(&lnk_del->state))
+ smc_switch_conns(lgr, lnk_del, false);
smcr_link_clear(lnk_del, true);
active_links = smc_llc_active_link_count(lgr);
@@ -1805,8 +1805,6 @@ void smc_llc_link_clear(struct smc_link *link, bool log)
link->smcibdev->ibdev->name, link->ibport);
complete(&link->llc_testlink_resp);
cancel_delayed_work_sync(&link->llc_testlink_wrk);
- smc_wr_wakeup_reg_wait(link);
- smc_wr_wakeup_tx_wait(link);
}
/* register a new rtoken at the remote peer (for all links) */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 289025cd545a..c79361dfcdfb 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -496,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
@@ -550,6 +550,22 @@ out_unlock:
return rc;
}
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+ struct smc_link *link = conn->lnk;
+ int rc = -ENOLINK;
+
+ if (!link)
+ return rc;
+
+ atomic_inc(&link->wr_tx_refcnt);
+ if (smc_link_usable(link))
+ rc = _smcr_tx_sndbuf_nonempty(conn);
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+ return rc;
+}
+
static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index cbc73a7e4d59..a419e9af36b9 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -322,9 +322,12 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
if (rc)
return rc;
+ atomic_inc(&link->wr_reg_refcnt);
rc = wait_event_interruptible_timeout(link->wr_reg_wait,
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
+ if (atomic_dec_and_test(&link->wr_reg_refcnt))
+ wake_up_all(&link->wr_reg_wait);
if (!rc) {
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
@@ -566,10 +569,15 @@ void smc_wr_free_link(struct smc_link *lnk)
return;
ibdev = lnk->smcibdev->ibdev;
+ smc_wr_wakeup_reg_wait(lnk);
+ smc_wr_wakeup_tx_wait(lnk);
+
if (smc_wr_tx_wait_no_pending_sends(lnk))
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) *
sizeof(*lnk->wr_tx_mask));
+ wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
+ wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
@@ -728,7 +736,9 @@ int smc_wr_create_link(struct smc_link *lnk)
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
init_waitqueue_head(&lnk->wr_tx_wait);
+ atomic_set(&lnk->wr_tx_refcnt, 0);
init_waitqueue_head(&lnk->wr_reg_wait);
+ atomic_set(&lnk->wr_reg_refcnt, 0);
return rc;
dma_unmap:
diff --git a/net/tipc/link.c b/net/tipc/link.c
index cf586840caeb..1b7a487c8841 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
dnode, l->addr, dport, 0, 0);
if (!skb)
- return -ENOMEM;
+ return -ENOBUFS;
msg_set_dest_droppable(buf_msg(skb), true);
TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
skb_queue_tail(&l->wakeupq, skb);
@@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l)
*
* Consumes the buffer chain.
* Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
- * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM
+ * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
*/
int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *xmitq)
@@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
if (!_skb) {
kfree_skb(skb);
__skb_queue_purge(list);
- return -ENOMEM;
+ return -ENOBUFS;
}
__skb_queue_tail(transmq, skb);
tipc_link_set_skb_retransmit_time(skb, l);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index e0c2c992ad9c..4f7c99dfd16c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -357,11 +357,14 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
- lock_sock(sk);
+ /* vmci_transport.c doesn't take sk_lock here either. At least we're
+ * under vsock_table_lock so the sock cannot disappear while we're
+ * executing.
+ */
+
sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk_error_report(sk);
- release_sock(sk);
}
static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)