summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c2
-rw-r--r--net/9p/trans_virtio.c2
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/bluetooth/a2mp.c2
-rw-r--r--net/bluetooth/smp.c2
-rw-r--r--net/bpf/test_run.c21
-rw-r--r--net/bridge/br_private.h7
-rw-r--r--net/bridge/br_vlan.c3
-rw-r--r--net/can/raw.c15
-rw-r--r--net/ceph/messenger.c18
-rw-r--r--net/core/dev.c78
-rw-r--r--net/core/filter.c32
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/netpoll.c3
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c1
-rw-r--r--net/dsa/master.c34
-rw-r--r--net/dsa/slave.c28
-rw-r--r--net/ipv4/igmp.c53
-rw-r--r--net/ipv4/inet_fragment.c29
-rw-r--r--net/ipv4/ip_fragment.c19
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c38
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c4
-rw-r--r--net/ipv4/tcp_bpf.c1
-rw-r--r--net/ipv4/tcp_input.c31
-rw-r--r--net/ipv4/tcp_output.c51
-rw-r--r--net/ipv4/tcp_timer.c22
-rw-r--r--net/ipv6/addrconf.c19
-rw-r--r--net/ipv6/af_inet6.c5
-rw-r--r--net/ipv6/anycast.c80
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_input.c4
-rw-r--r--net/ipv6/ip6_output.c45
-rw-r--r--net/ipv6/netfilter.c3
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c21
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c49
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c4
-rw-r--r--net/ipv6/reassembly.c8
-rw-r--r--net/ipv6/route.c14
-rw-r--r--net/ipv6/seg6_iptunnel.c1
-rw-r--r--net/l2tp/l2tp_core.c9
-rw-r--r--net/mac80211/cfg.c7
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/mlme.c12
-rw-r--r--net/mac80211/rx.c5
-rw-r--r--net/mac80211/status.c2
-rw-r--r--net/mac80211/tx.c4
-rw-r--r--net/netfilter/ipset/ip_set_core.c43
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c8
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/nf_conncount.c44
-rw-r--r--net/netfilter/nf_conntrack_core.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c11
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c15
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c11
-rw-r--r--net/netfilter/nf_tables_api.c46
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c58
-rw-r--r--net/netfilter/nft_compat.c24
-rw-r--r--net/netfilter/nft_flow_offload.c5
-rw-r--r--net/netfilter/nft_numgen.c127
-rw-r--r--net/netfilter/nft_osf.c2
-rw-r--r--net/netfilter/xt_IDLETIMER.c20
-rw-r--r--net/netfilter/xt_RATEEST.c10
-rw-r--r--net/netfilter/xt_hashlimit.c9
-rw-r--r--net/openvswitch/conntrack.c5
-rw-r--r--net/openvswitch/flow_netlink.c4
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/rxrpc/af_rxrpc.c27
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/call_event.c18
-rw-r--r--net/rxrpc/output.c35
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_police.c60
-rw-r--r--net/sched/cls_flower.c37
-rw-r--r--net/sched/sch_fq.c31
-rw-r--r--net/sched/sch_netem.c12
-rw-r--r--net/sctp/associola.c19
-rw-r--r--net/sctp/chunk.c6
-rw-r--r--net/sctp/output.c25
-rw-r--r--net/sctp/outqueue.c2
-rw-r--r--net/sctp/sm_make_chunk.c3
-rw-r--r--net/sctp/socket.c37
-rw-r--r--net/sctp/stream.c1
-rw-r--r--net/smc/af_smc.c11
-rw-r--r--net/smc/smc_cdc.c26
-rw-r--r--net/smc/smc_cdc.h60
-rw-r--r--net/smc/smc_clc.c4
-rw-r--r--net/smc/smc_core.c20
-rw-r--r--net/smc/smc_core.h5
-rw-r--r--net/smc/smc_ism.c43
-rw-r--r--net/smc/smc_ism.h1
-rw-r--r--net/smc/smc_wr.c4
-rw-r--r--net/socket.c8
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c65
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c16
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c28
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c4
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/svcsock.c2
-rw-r--r--net/sunrpc/xdr.c7
-rw-r--r--net/sunrpc/xprt.c13
-rw-r--r--net/sunrpc/xprtsock.c85
-rw-r--r--net/tipc/discover.c19
-rw-r--r--net/tipc/link.c11
-rw-r--r--net/tipc/net.c45
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tipc/socket.c15
-rw-r--r--net/tipc/topsrv.c2
-rw-r--r--net/tls/tls_device.c4
-rw-r--r--net/tls/tls_sw.c4
-rw-r--r--net/wireless/mlme.c4
-rw-r--r--net/wireless/nl80211.c1
-rw-r--r--net/wireless/sme.c8
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/x25/af_x25.c18
-rw-r--r--net/x25/x25_in.c9
-rw-r--r--net/xfrm/Kconfig1
-rw-r--r--net/xfrm/xfrm_state.c2
-rw-r--r--net/xfrm/xfrm_user.c2
138 files changed, 1351 insertions, 944 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 5f23e18eecc0..2c9a17b9b46b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -2066,7 +2066,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
struct kvec kv = {.iov_base = data, .iov_len = count};
struct iov_iter to;
- iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count);
+ iov_iter_kvec(&to, READ, &kv, 1, count);
p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
fid->fid, (unsigned long long) offset, count);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index eb596c2ed546..b1d39cabf125 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -329,7 +329,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
if (!iov_iter_count(data))
return 0;
- if (!(data->type & ITER_KVEC)) {
+ if (!iov_iter_is_kvec(data)) {
int n;
/*
* We allow only p9_max_pages pinned. We wait for the
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 9f481cfdf77d..e8090f099eb8 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -352,19 +352,21 @@ out:
*/
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
{
+ static const size_t tvlv_padding = sizeof(__be32);
struct batadv_elp_packet *elp_packet;
unsigned char *elp_buff;
u32 random_seqno;
size_t size;
int res = -ENOMEM;
- size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN;
+ size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN + tvlv_padding;
hard_iface->bat_v.elp_skb = dev_alloc_skb(size);
if (!hard_iface->bat_v.elp_skb)
goto out;
skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
- elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+ elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb,
+ BATADV_ELP_HLEN + tvlv_padding);
elp_packet = (struct batadv_elp_packet *)elp_buff;
elp_packet->packet_type = BATADV_ELP;
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0fddc17106bd..5b71a289d04f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -275,7 +275,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
kfree(entry);
packet = (struct batadv_frag_packet *)skb_out->data;
- size = ntohs(packet->total_size);
+ size = ntohs(packet->total_size) + hdr_size;
/* Make room for the rest of the fragments. */
if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 4e2576fc0c59..828e87fe8027 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -467,7 +467,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
iv.iov_len = skb->len;
memset(&msg, 0, sizeof(msg));
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, skb->len);
err = l2cap_chan_send(chan, &msg, skb->len);
if (err > 0) {
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 51c2cf2d8923..58fc6333d412 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -63,7 +63,7 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *dat
memset(&msg, 0, sizeof(msg));
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, total_len);
l2cap_chan_send(chan, &msg, total_len);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a1c1b7e8a45c..c822e626761b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -622,7 +622,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
memset(&msg, 0, sizeof(msg));
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len);
+ iov_iter_kvec(&msg.msg_iter, WRITE, iv, 2, 1 + len);
l2cap_chan_send(chan, &msg, 1 + len);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c89c22c49015..25001913d03b 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -28,12 +28,13 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
return ret;
}
-static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
+static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
+ u32 *time)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
enum bpf_cgroup_storage_type stype;
u64 time_start, time_spent = 0;
- u32 ret = 0, i;
+ u32 i;
for_each_cgroup_storage_type(stype) {
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
@@ -49,7 +50,7 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
repeat = 1;
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
- ret = bpf_test_run_one(prog, ctx, storage);
+ *ret = bpf_test_run_one(prog, ctx, storage);
if (need_resched()) {
if (signal_pending(current))
break;
@@ -65,7 +66,7 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
- return ret;
+ return 0;
}
static int bpf_test_finish(const union bpf_attr *kattr,
@@ -165,7 +166,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
__skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
- retval = bpf_test_run(prog, skb, repeat, &duration);
+ ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
+ if (ret) {
+ kfree_skb(skb);
+ kfree(sk);
+ return ret;
+ }
if (!is_l2) {
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -212,11 +218,14 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp.rxq = &rxqueue->xdp_rxq;
- retval = bpf_test_run(prog, &xdp, repeat, &duration);
+ ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration);
+ if (ret)
+ goto out;
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
xdp.data_end != xdp.data + size)
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+out:
kfree(data);
return ret;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2920e06a5403..04c19a37e500 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -102,12 +102,18 @@ struct br_tunnel_info {
struct metadata_dst *tunnel_dst;
};
+/* private vlan flags */
+enum {
+ BR_VLFLAG_PER_PORT_STATS = BIT(0),
+};
+
/**
* struct net_bridge_vlan - per-vlan entry
*
* @vnode: rhashtable member
* @vid: VLAN id
* @flags: bridge vlan flags
+ * @priv_flags: private (in-kernel) bridge vlan flags
* @stats: per-cpu VLAN statistics
* @br: if MASTER flag set, this points to a bridge struct
* @port: if MASTER flag unset, this points to a port struct
@@ -127,6 +133,7 @@ struct net_bridge_vlan {
struct rhash_head tnode;
u16 vid;
u16 flags;
+ u16 priv_flags;
struct br_vlan_stats __percpu *stats;
union {
struct net_bridge *br;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 8c9297a01947..e84be08b8285 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -197,7 +197,7 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu)
v = container_of(rcu, struct net_bridge_vlan, rcu);
WARN_ON(br_vlan_is_master(v));
/* if we had per-port stats configured then free them here */
- if (v->brvlan->stats != v->stats)
+ if (v->priv_flags & BR_VLFLAG_PER_PORT_STATS)
free_percpu(v->stats);
v->stats = NULL;
kfree(v);
@@ -264,6 +264,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
err = -ENOMEM;
goto out_filt;
}
+ v->priv_flags |= BR_VLFLAG_PER_PORT_STATS;
} else {
v->stats = masterv->stats;
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 1051eee82581..3aab7664933f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -745,18 +745,19 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
} else
ifindex = ro->ifindex;
- if (ro->fd_frames) {
+ dev = dev_get_by_index(sock_net(sk), ifindex);
+ if (!dev)
+ return -ENXIO;
+
+ err = -EINVAL;
+ if (ro->fd_frames && dev->mtu == CANFD_MTU) {
if (unlikely(size != CANFD_MTU && size != CAN_MTU))
- return -EINVAL;
+ goto put_dev;
} else {
if (unlikely(size != CAN_MTU))
- return -EINVAL;
+ goto put_dev;
}
- dev = dev_get_by_index(sock_net(sk), ifindex);
- if (!dev)
- return -ENXIO;
-
skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 88e35830198c..2f126eff275d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -513,7 +513,7 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
if (!buf)
msg.msg_flags |= MSG_TRUNC;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
r = 0;
@@ -532,7 +532,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
int r;
BUG_ON(page_offset + length > PAGE_SIZE);
- iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+ iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
r = 0;
@@ -580,9 +580,15 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
struct bio_vec bvec;
int ret;
- /* sendpage cannot properly handle pages with page_count == 0,
- * we need to fallback to sendmsg if that's the case */
- if (page_count(page) >= 1)
+ /*
+ * sendpage cannot properly handle pages with page_count == 0,
+ * we need to fall back to sendmsg if that's the case.
+ *
+ * Same goes for slab pages: skb_can_coalesce() allows
+ * coalescing neighboring slab objects into a single frag which
+ * triggers one of hardened usercopy checks.
+ */
+ if (page_count(page) >= 1 && !PageSlab(page))
return __ceph_tcp_sendpage(sock, page, offset, size, more);
bvec.bv_page = page;
@@ -594,7 +600,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
else
msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
- iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+ iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size);
ret = sock_sendmsg(sock, &msg);
if (ret == -EAGAIN)
ret = 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index 77d43ae2a7bb..722d50dbf8a4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2175,6 +2175,20 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
return active;
}
+static void reset_xps_maps(struct net_device *dev,
+ struct xps_dev_maps *dev_maps,
+ bool is_rxqs_map)
+{
+ if (is_rxqs_map) {
+ static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+ RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+ } else {
+ RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+ }
+ static_key_slow_dec_cpuslocked(&xps_needed);
+ kfree_rcu(dev_maps, rcu);
+}
+
static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
struct xps_dev_maps *dev_maps, unsigned int nr_ids,
u16 offset, u16 count, bool is_rxqs_map)
@@ -2186,18 +2200,15 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
j < nr_ids;)
active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
count);
- if (!active) {
- if (is_rxqs_map) {
- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
- } else {
- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+ if (!active)
+ reset_xps_maps(dev, dev_maps, is_rxqs_map);
- for (i = offset + (count - 1); count--; i--)
- netdev_queue_numa_node_write(
- netdev_get_tx_queue(dev, i),
- NUMA_NO_NODE);
+ if (!is_rxqs_map) {
+ for (i = offset + (count - 1); count--; i--) {
+ netdev_queue_numa_node_write(
+ netdev_get_tx_queue(dev, i),
+ NUMA_NO_NODE);
}
- kfree_rcu(dev_maps, rcu);
}
}
@@ -2234,10 +2245,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
false);
out_no_maps:
- if (static_key_enabled(&xps_rxqs_needed))
- static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
-
- static_key_slow_dec_cpuslocked(&xps_needed);
mutex_unlock(&xps_map_mutex);
cpus_read_unlock();
}
@@ -2355,9 +2362,12 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
if (!new_dev_maps)
goto out_no_new_maps;
- static_key_slow_inc_cpuslocked(&xps_needed);
- if (is_rxqs_map)
- static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
+ if (!dev_maps) {
+ /* Increment static keys at most once per type */
+ static_key_slow_inc_cpuslocked(&xps_needed);
+ if (is_rxqs_map)
+ static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
+ }
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
j < nr_ids;) {
@@ -2455,13 +2465,8 @@ out_no_new_maps:
}
/* free map if not active */
- if (!active) {
- if (is_rxqs_map)
- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
- else
- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
- kfree_rcu(dev_maps, rcu);
- }
+ if (!active)
+ reset_xps_maps(dev, dev_maps, is_rxqs_map);
out_no_maps:
mutex_unlock(&xps_map_mutex);
@@ -3272,7 +3277,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
}
skb = next;
- if (netif_xmit_stopped(txq) && skb) {
+ if (netif_tx_queue_stopped(txq) && skb) {
rc = NETDEV_TX_BUSY;
break;
}
@@ -5009,7 +5014,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct net_device *orig_dev = skb->dev;
struct packet_type *pt_prev = NULL;
- list_del(&skb->list);
+ skb_list_del_init(skb);
__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
if (!pt_prev)
continue;
@@ -5165,7 +5170,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
net_timestamp_check(netdev_tstamp_prequeue, skb);
- list_del(&skb->list);
+ skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
}
@@ -5176,7 +5181,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
rcu_read_lock();
list_for_each_entry_safe(skb, next, head, list) {
xdp_prog = rcu_dereference(skb->dev->xdp_prog);
- list_del(&skb->list);
+ skb_list_del_init(skb);
if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
list_add_tail(&skb->list, &sublist);
}
@@ -5195,7 +5200,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
if (cpu >= 0) {
/* Will be handled, remove from list */
- list_del(&skb->list);
+ skb_list_del_init(skb);
enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
}
}
@@ -5655,6 +5660,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+
+ /* eth_type_trans() assumes pkt_type is PACKET_HOST */
+ skb->pkt_type = PACKET_HOST;
+
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
@@ -5966,11 +5975,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done)
timeout = n->dev->gro_flush_timeout;
+ /* When the NAPI instance uses a timeout and keeps postponing
+ * it, we need to bound somehow the time packets are kept in
+ * the GRO layer
+ */
+ napi_gro_flush(n, !!timeout);
if (timeout)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
- else
- napi_gro_flush(n, false);
}
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
@@ -6197,8 +6209,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->skb = NULL;
napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT)
- pr_err_once("netif_napi_add() called with weight %d on device %s\n",
- weight, dev->name);
+ netdev_err_once(dev, "%s() called with weight %d\n", __func__,
+ weight);
napi->weight = weight;
list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;
diff --git a/net/core/filter.c b/net/core/filter.c
index e521c5ebc7d1..8d2c629501e2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4852,18 +4852,17 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
} else {
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
- u16 hnum = ntohs(tuple->ipv6.dport);
int sdif = inet6_sdif(skb);
if (proto == IPPROTO_TCP)
sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
src6, tuple->ipv6.sport,
- dst6, hnum,
+ dst6, ntohs(tuple->ipv6.dport),
dif, sdif, &refcounted);
else if (likely(ipv6_bpf_stub))
sk = ipv6_bpf_stub->udp6_lib_lookup(net,
src6, tuple->ipv6.sport,
- dst6, hnum,
+ dst6, tuple->ipv6.dport,
dif, sdif,
&udp_table, skb);
#endif
@@ -4891,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *net;
family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
- if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
+ if (unlikely(family == AF_UNSPEC || flags ||
+ !((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
if (skb->dev)
caller_net = dev_net(skb->dev);
else
caller_net = sock_net(skb->sk);
- if (netns_id) {
+ if ((s32)netns_id < 0) {
+ net = caller_net;
+ sk = sk_lookup(net, tuple, skb, family, proto);
+ } else {
net = get_net_ns_by_id(caller_net, netns_id);
if (unlikely(!net))
goto out;
sk = sk_lookup(net, tuple, skb, family, proto);
put_net(net);
- } else {
- net = caller_net;
- sk = sk_lookup(net, tuple, skb, family, proto);
}
if (sk)
@@ -5436,8 +5436,8 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != size_default)
return false;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
- if (size != sizeof(struct bpf_flow_keys *))
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ if (size != sizeof(__u64))
return false;
break;
default:
@@ -5465,7 +5465,7 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5490,7 +5490,7 @@ static bool cg_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5531,7 +5531,7 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
}
@@ -5757,7 +5757,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5959,7 +5959,7 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
return false;
}
@@ -6040,7 +6040,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
info->reg_type = PTR_TO_FLOW_KEYS;
break;
case bpf_ctx_range(struct __sk_buff, tc_classid):
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 676f3ad629f9..588f475019d4 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1166,8 +1166,8 @@ ip_proto_again:
break;
}
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS)) {
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
+ !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 5da9552b186b..2b9fdbc43205 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -717,7 +717,8 @@ int netpoll_setup(struct netpoll *np)
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+ if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
+ !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
continue;
np->local_ip.in6 = ifp->addr;
err = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f679c7a7d761..7819f7804eeb 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3367,7 +3367,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
cb->seq = 0;
}
ret = dumpit(skb, cb);
- if (ret < 0)
+ if (ret)
break;
}
cb->family = idx;
@@ -3600,6 +3600,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
+ if (dev->type != ARPHRD_ETHER) {
+ NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
+ return -EINVAL;
+ }
+
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
@@ -3704,6 +3709,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
+ if (dev->type != ARPHRD_ETHER) {
+ NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
+ return -EINVAL;
+ }
+
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
@@ -3790,6 +3800,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
{
int err;
+ if (dev->type != ARPHRD_ETHER)
+ return -EINVAL;
+
netif_addr_lock_bh(dev);
err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
if (err)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 946de0e24c87..a8217e221e19 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4854,6 +4854,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
nf_reset(skb);
nf_reset_trace(skb);
+#ifdef CONFIG_NET_SWITCHDEV
+ skb->offload_fwd_mark = 0;
+ skb->offload_mr_fwd_mark = 0;
+#endif
+
if (!xnet)
return;
@@ -4944,6 +4949,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
*
* This is a helper to do that correctly considering GSO_BY_FRAGS.
*
+ * @skb: GSO skb
+ *
* @seg_len: The segmented length (from skb_gso_*_seglen). In the
* GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
*
diff --git a/net/core/sock.c b/net/core/sock.c
index 6fcc4bc07d19..080a880a1761 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3279,6 +3279,7 @@ int sock_load_diag_module(int family, int protocol)
#ifdef CONFIG_INET
if (family == AF_INET &&
+ protocol != IPPROTO_RAW &&
!rcu_access_pointer(inet_protos[protocol]))
return -ENOENT;
#endif
diff --git a/net/dsa/master.c b/net/dsa/master.c
index c90ee3227dea..5e8c9bef78bd 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -158,8 +158,31 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
cpu_dp->orig_ethtool_ops = NULL;
}
+static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *dev = to_net_dev(d);
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+ return sprintf(buf, "%s\n",
+ dsa_tag_protocol_to_str(cpu_dp->tag_ops));
+}
+static DEVICE_ATTR_RO(tagging);
+
+static struct attribute *dsa_slave_attrs[] = {
+ &dev_attr_tagging.attr,
+ NULL
+};
+
+static const struct attribute_group dsa_group = {
+ .name = "dsa",
+ .attrs = dsa_slave_attrs,
+};
+
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
+ int ret;
+
/* If we use a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point on get
* sent to the tag format's receive function.
@@ -168,11 +191,20 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
dev->dsa_ptr = cpu_dp;
- return dsa_master_ethtool_setup(dev);
+ ret = dsa_master_ethtool_setup(dev);
+ if (ret)
+ return ret;
+
+ ret = sysfs_create_group(&dev->dev.kobj, &dsa_group);
+ if (ret)
+ dsa_master_ethtool_teardown(dev);
+
+ return ret;
}
void dsa_master_teardown(struct net_device *dev)
{
+ sysfs_remove_group(&dev->dev.kobj, &dsa_group);
dsa_master_ethtool_teardown(dev);
dev->dsa_ptr = NULL;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7d0c19e7edcf..aec78f5aca72 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1058,27 +1058,6 @@ static struct device_type dsa_type = {
.name = "dsa",
};
-static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
- char *buf)
-{
- struct net_device *dev = to_net_dev(d);
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return sprintf(buf, "%s\n",
- dsa_tag_protocol_to_str(dp->cpu_dp->tag_ops));
-}
-static DEVICE_ATTR_RO(tagging);
-
-static struct attribute *dsa_slave_attrs[] = {
- &dev_attr_tagging.attr,
- NULL
-};
-
-static const struct attribute_group dsa_group = {
- .name = "dsa",
- .attrs = dsa_slave_attrs,
-};
-
static void dsa_slave_phylink_validate(struct net_device *dev,
unsigned long *supported,
struct phylink_link_state *state)
@@ -1374,14 +1353,8 @@ int dsa_slave_create(struct dsa_port *port)
goto out_phy;
}
- ret = sysfs_create_group(&slave_dev->dev.kobj, &dsa_group);
- if (ret)
- goto out_unreg;
-
return 0;
-out_unreg:
- unregister_netdev(slave_dev);
out_phy:
rtnl_lock();
phylink_disconnect_phy(p->dp->pl);
@@ -1405,7 +1378,6 @@ void dsa_slave_destroy(struct net_device *slave_dev)
rtnl_unlock();
dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
- sysfs_remove_group(&slave_dev->dev.kobj, &dsa_group);
unregister_netdev(slave_dev);
phylink_destroy(dp->pl);
free_percpu(p->stats64);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4da39446da2d..765b2b32c4a4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -111,13 +111,10 @@
#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */
-#define IGMP_V1_ROUTER_PRESENT_TIMEOUT (400*HZ)
-#define IGMP_V2_ROUTER_PRESENT_TIMEOUT (400*HZ)
#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ)
#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ)
+#define IGMP_QUERY_INTERVAL (125*HZ)
#define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ)
-#define IGMP_QUERY_ROBUSTNESS_VARIABLE 2
-
#define IGMP_INITIAL_REPORT_DELAY (1)
@@ -935,13 +932,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
in_dev->mr_v1_seen = jiffies +
- IGMP_V1_ROUTER_PRESENT_TIMEOUT;
+ (in_dev->mr_qrv * in_dev->mr_qi) +
+ in_dev->mr_qri;
group = 0;
} else {
/* v2 router present */
max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
in_dev->mr_v2_seen = jiffies +
- IGMP_V2_ROUTER_PRESENT_TIMEOUT;
+ (in_dev->mr_qrv * in_dev->mr_qi) +
+ in_dev->mr_qri;
}
/* cancel the interface change timer */
in_dev->mr_ifc_count = 0;
@@ -981,8 +980,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (!max_delay)
max_delay = 1; /* can't mod w/ 0 */
in_dev->mr_maxdelay = max_delay;
- if (ih3->qrv)
- in_dev->mr_qrv = ih3->qrv;
+
+ /* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
+ * received value was zero, use the default or statically
+ * configured value.
+ */
+ in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+ /* RFC3376, 8.3. Query Response Interval:
+ * The number of seconds represented by the [Query Response
+ * Interval] must be less than the [Query Interval].
+ */
+ if (in_dev->mr_qri >= in_dev->mr_qi)
+ in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+
if (!group) { /* general query */
if (ih3->nsrcs)
return true; /* no sources allowed */
@@ -1723,18 +1735,30 @@ void ip_mc_down(struct in_device *in_dev)
ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
}
-void ip_mc_init_dev(struct in_device *in_dev)
-{
#ifdef CONFIG_IP_MULTICAST
+static void ip_mc_reset(struct in_device *in_dev)
+{
struct net *net = dev_net(in_dev->dev);
+
+ in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+ in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+ in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+}
+#else
+static void ip_mc_reset(struct in_device *in_dev)
+{
+}
#endif
+
+void ip_mc_init_dev(struct in_device *in_dev)
+{
ASSERT_RTNL();
#ifdef CONFIG_IP_MULTICAST
timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
#endif
+ ip_mc_reset(in_dev);
spin_lock_init(&in_dev->mc_tomb_lock);
}
@@ -1744,15 +1768,10 @@ void ip_mc_init_dev(struct in_device *in_dev)
void ip_mc_up(struct in_device *in_dev)
{
struct ip_mc_list *pmc;
-#ifdef CONFIG_IP_MULTICAST
- struct net *net = dev_net(in_dev->dev);
-#endif
ASSERT_RTNL();
-#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
-#endif
+ ip_mc_reset(in_dev);
ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
for_each_pmc_rtnl(in_dev, pmc) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index bcb11f3a27c0..760a9e52e02b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -178,21 +178,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- void *arg)
+ void *arg,
+ struct inet_frag_queue **prev)
{
struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
- int err;
q = inet_frag_alloc(nf, f, arg);
- if (!q)
+ if (!q) {
+ *prev = ERR_PTR(-ENOMEM);
return NULL;
-
+ }
mod_timer(&q->timer, jiffies + nf->timeout);
- err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
- f->rhash_params);
- if (err < 0) {
+ *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+ &q->node, f->rhash_params);
+ if (*prev) {
q->flags |= INET_FRAG_COMPLETE;
inet_frag_kill(q);
inet_frag_destroy(q);
@@ -204,22 +205,22 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- struct inet_frag_queue *fq;
+ struct inet_frag_queue *fq = NULL, *prev;
if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
return NULL;
rcu_read_lock();
- fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
- if (fq) {
+ prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (!prev)
+ fq = inet_frag_create(nf, key, &prev);
+ if (prev && !IS_ERR(prev)) {
+ fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
fq = NULL;
- rcu_read_unlock();
- return fq;
}
rcu_read_unlock();
-
- return inet_frag_create(nf, key);
+ return fq;
}
EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9b0158fa431f..aa0b22697998 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -515,6 +515,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
struct rb_node *rbn;
int len;
int ihlen;
+ int delta;
int err;
u8 ecn;
@@ -556,10 +557,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
if (len > 65535)
goto out_oversize;
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
goto out_nomem;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(qp->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
@@ -722,10 +729,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
if (ip_is_fragment(&iph)) {
skb = skb_share_check(skb, GFP_ATOMIC);
if (skb) {
- if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
- return skb;
- if (pskb_trim_rcsum(skb, netoff + len))
- return skb;
+ if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+ if (pskb_trim_rcsum(skb, netoff + len)) {
+ kfree_skb(skb);
+ return NULL;
+ }
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
if (ip_defrag(net, skb, user))
return NULL;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 35a786c0aaa0..e609b08c9df4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -547,7 +547,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
- list_del(&skb->list);
+ skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -594,7 +594,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- list_del(&skb->list);
+ skb_list_del_init(skb);
skb = ip_rcv_core(skb, net);
if (skb == NULL)
continue;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c09219e7f230..5dbec21856f4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -939,7 +939,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -956,6 +956,7 @@ alloc_new_skb:
if (datalen > mtu - fragheaderlen)
datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 26c36cccabdc..fffcc130900e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1246,7 +1246,7 @@ int ip_setsockopt(struct sock *sk, int level,
return -ENOPROTOOPT;
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
optname < BPFILTER_IPT_SET_MAX)
err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
@@ -1559,7 +1559,7 @@ int ip_getsockopt(struct sock *sk, int level,
int err;
err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
@@ -1596,7 +1596,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
err = do_ip_getsockopt(sk, level, optname, optval, optlen,
MSG_CMSG_COMPAT);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index dde671e97829..c248e0dccbe1 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = df;
+ iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df;
iph->protocol = proto;
iph->tos = tos;
iph->daddr = dst;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ce1512b02cb2..fd3f9e8a74da 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
int ret;
ret = xt_register_target(&masquerade_tg_reg);
+ if (ret)
+ return ret;
- if (ret == 0)
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ xt_unregister_target(&masquerade_tg_reg);
return ret;
}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index a9d5e013e555..41327bb99093 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -147,28 +147,50 @@ static struct notifier_block masq_inet_notifier = {
.notifier_call = masq_inet_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv4_register_notifier(void)
+int nf_nat_masquerade_ipv4_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier was already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
/* Register for device down reports */
- register_netdevice_notifier(&masq_dev_notifier);
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
/* Register IP address change reports */
- register_inetaddr_notifier(&masq_inet_notifier);
+ ret = register_inetaddr_notifier(&masq_inet_notifier);
+ if (ret)
+ goto err_unregister;
+
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
void nf_nat_masquerade_ipv4_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
unregister_netdevice_notifier(&masq_dev_notifier);
unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f1193e1e928a..6847de1d1db8 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv4_type);
return ret;
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index b7918d4caa30..3b45fe530f91 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -145,6 +145,7 @@ msg_bytes_ready:
ret = err;
goto out;
}
+ copied = -EAGAIN;
}
ret = copied;
out:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2868ef28ce52..a9d9555a973f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
u32 delta_us;
- if (!delta)
- delta = 1;
- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- tcp_rcv_rtt_update(tp, delta_us, 0);
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
}
@@ -2910,9 +2912,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us;
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ ca_rtt_us = seq_rtt_us;
+ }
}
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -4268,7 +4272,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
@@ -4363,6 +4367,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
to->tstamp = from->tstamp;
+ skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
}
return true;
@@ -5188,7 +5193,17 @@ send_now:
if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
goto send_now;
- tp->compressed_ack++;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+ tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = 0;
+ }
+
+ if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ goto send_now;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c34b97d365d..d1676d8a6ed7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(tp->compressed_ack)) {
+ if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack);
- tp->compressed_ack = 0;
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = TCP_FASTRETRANS_THRESH;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
@@ -1904,7 +1904,9 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
* This algorithm is from John Heffner.
*/
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
- bool *is_cwnd_limited, u32 max_segs)
+ bool *is_cwnd_limited,
+ bool *is_rwnd_limited,
+ u32 max_segs)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 age, send_win, cong_win, limit, in_flight;
@@ -1912,9 +1914,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
struct sk_buff *head;
int win_divisor;
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- goto send_now;
-
if (icsk->icsk_ca_state >= TCP_CA_Recovery)
goto send_now;
@@ -1973,10 +1972,27 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
if (age < (tp->srtt_us >> 4))
goto send_now;
- /* Ok, it looks like it is advisable to defer. */
+ /* Ok, it looks like it is advisable to defer.
+ * Three cases are tracked :
+ * 1) We are cwnd-limited
+ * 2) We are rwnd-limited
+ * 3) We are application limited.
+ */
+ if (cong_win < send_win) {
+ if (cong_win <= skb->len) {
+ *is_cwnd_limited = true;
+ return true;
+ }
+ } else {
+ if (send_win <= skb->len) {
+ *is_rwnd_limited = true;
+ return true;
+ }
+ }
- if (cong_win < send_win && cong_win <= skb->len)
- *is_cwnd_limited = true;
+ /* If this packet won't get more data, do not wait. */
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ goto send_now;
return true;
@@ -2356,7 +2372,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
} else {
if (!push_one &&
tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
- max_segs))
+ &is_rwnd_limited, max_segs))
break;
}
@@ -2494,15 +2510,18 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
}
skb = skb_rb_last(&sk->tcp_rtx_queue);
+ if (unlikely(!skb)) {
+ WARN_ONCE(tp->packets_out,
+ "invalid inflight: %u state %u cwnd %u mss %d\n",
+ tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+ inet_csk(sk)->icsk_pending = 0;
+ return;
+ }
/* At most one outstanding TLP retransmission. */
if (tp->tlp_high_seq)
goto rearm_timer;
- /* Retransmit last segment. */
- if (WARN_ON(!skb))
- goto rearm_timer;
-
if (skb_still_in_host_queue(sk, skb))
goto rearm_timer;
@@ -2920,7 +2939,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
}
return err;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 676020663ce8..f87dbc78b6bc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 elapsed, start_ts;
+ s32 remaining;
start_ts = tcp_retransmit_stamp(sk);
if (!icsk->icsk_user_timeout || !start_ts)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- if (elapsed >= icsk->icsk_user_timeout)
+ remaining = icsk->icsk_user_timeout - elapsed;
+ if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
- else
- return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
}
/**
@@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk,
(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
timeout = jiffies_to_msecs(timeout);
}
- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
}
/* A write timeout has occurred. Process the after effects. */
@@ -376,7 +378,7 @@ static void tcp_probe_timer(struct sock *sk)
return;
}
- if (icsk->icsk_probes_out > max_probes) {
+ if (icsk->icsk_probes_out >= max_probes) {
abort: tcp_write_err(sk);
} else {
/* Only send another probe if we didn't close things up. */
@@ -482,11 +484,12 @@ void tcp_retransmit_timer(struct sock *sk)
goto out_reset_timer;
}
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
if (tcp_write_timeout(sk))
goto out;
if (icsk->icsk_retransmits == 0) {
- int mib_idx;
+ int mib_idx = 0;
if (icsk->icsk_ca_state == TCP_CA_Recovery) {
if (tcp_is_sack(tp))
@@ -501,10 +504,9 @@ void tcp_retransmit_timer(struct sock *sk)
mib_idx = LINUX_MIB_TCPSACKFAILURES;
else
mib_idx = LINUX_MIB_TCPRENOFAILURES;
- } else {
- mib_idx = LINUX_MIB_TCPTIMEOUTS;
}
- __NET_INC_STATS(sock_net(sk), mib_idx);
+ if (mib_idx)
+ __NET_INC_STATS(sock_net(sk), mib_idx);
}
tcp_enter_loss(sk);
@@ -740,7 +742,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 63a808d5af15..045597b9a7c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -179,7 +179,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
bool send_na);
-static void addrconf_dad_run(struct inet6_dev *idev);
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart);
static void addrconf_rs_timer(struct timer_list *t);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -3439,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_change_info *change_info;
struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
struct net *net = dev_net(dev);
@@ -3513,7 +3514,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
if (idev->if_flags & IF_READY) {
/* device is already configured -
* but resend MLD reports, we might
@@ -3521,6 +3522,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* multicast snooping switches
*/
ipv6_mc_up(idev);
+ change_info = ptr;
+ if (change_info->flags_changed & IFF_NOARP)
+ addrconf_dad_run(idev, true);
rt6_sync_up(dev, RTNH_F_LINKDOWN);
break;
}
@@ -3555,7 +3559,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
- addrconf_dad_run(idev);
+ addrconf_dad_run(idev, false);
/* Device has an address by now */
rt6_sync_up(dev, RTNH_F_DEAD);
@@ -4173,16 +4177,19 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
addrconf_verify_rtnl();
}
-static void addrconf_dad_run(struct inet6_dev *idev)
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
{
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
spin_lock(&ifp->lock);
- if (ifp->flags & IFA_F_TENTATIVE &&
- ifp->state == INET6_IFADDR_STATE_DAD)
+ if ((ifp->flags & IFA_F_TENTATIVE &&
+ ifp->state == INET6_IFADDR_STATE_DAD) || restart) {
+ if (restart)
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
addrconf_dad_kick(ifp);
+ }
spin_unlock(&ifp->lock);
}
read_unlock_bh(&idev->lock);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..f0cd291034f0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
+ err = ipv6_anycast_init();
+ if (err)
+ goto ipv6_anycast_fail;
err = addrconf_init();
if (err)
goto addrconf_fail;
@@ -1091,6 +1094,8 @@ ipv6_frag_fail:
ipv6_exthdrs_fail:
addrconf_cleanup();
addrconf_fail:
+ ipv6_anycast_cleanup();
+ipv6_anycast_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..94999058e110 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
#include <net/checksum.h>
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT)
+/* anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
/*
* socket join an anycast group
*/
@@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk)
rtnl_unlock();
}
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+ unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+ spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+ spin_lock(&acaddr_hash_lock);
+ hlist_del_init_rcu(&aca->aca_addr_lst);
+ spin_unlock(&acaddr_hash_lock);
+}
+
static void aca_get(struct ifacaddr6 *aca)
{
refcount_inc(&aca->aca_refcnt);
}
+static void aca_free_rcu(struct rcu_head *h)
+{
+ struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+ fib6_info_release(aca->aca_rt);
+ kfree(aca);
+}
+
static void aca_put(struct ifacaddr6 *ac)
{
if (refcount_dec_and_test(&ac->aca_refcnt)) {
- fib6_info_release(ac->aca_rt);
- kfree(ac);
+ call_rcu(&ac->rcu, aca_free_rcu);
}
}
@@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
aca->aca_addr = *addr;
fib6_info_hold(f6i);
aca->aca_rt = f6i;
+ INIT_HLIST_NODE(&aca->aca_addr_lst);
aca->aca_users = 1;
/* aca_tstamp should be updated upon changes */
aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
aca_get(aca);
write_unlock_bh(&idev->lock);
+ ipv6_add_acaddr_hash(net, aca);
+
ip6_ins_rt(net, f6i);
addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
else
idev->ac_list = aca->aca_next;
write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
idev->ac_list = aca->aca_next;
write_unlock_bh(&idev->lock);
+ ipv6_del_acaddr_hash(aca);
+
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr)
{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct net_device *nh_dev;
+ struct ifacaddr6 *aca;
bool found = false;
rcu_read_lock();
if (dev)
found = ipv6_chk_acast_dev(dev, addr);
else
- for_each_netdev_rcu(net, dev)
- if (ipv6_chk_acast_dev(dev, addr)) {
+ hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+ aca_addr_lst) {
+ nh_dev = fib6_info_nh_dev(aca->aca_rt);
+ if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+ continue;
+ if (ipv6_addr_equal(&aca->aca_addr, addr)) {
found = true;
break;
}
+ }
rcu_read_unlock();
return found;
}
@@ -540,3 +591,24 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
+
+/* Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+ int i;
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+ return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+ int i;
+
+ spin_lock(&acaddr_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+ spin_unlock(&acaddr_hash_lock);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1b8bc008b53b..ae3786132c23 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -591,7 +591,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
/* fib entries are never clones */
if (arg.filter.flags & RTM_F_CLONED)
- return skb->len;
+ goto out;
w = (void *)cb->args[2];
if (!w) {
@@ -621,7 +621,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
if (arg.filter.dump_all_families)
- return skb->len;
+ goto out;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
return -ENOENT;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 96577e742afd..c1d85830c906 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -95,7 +95,7 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
- list_del(&skb->list);
+ skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -296,7 +296,7 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- list_del(&skb->list);
+ skb_list_del_init(skb);
skb = ip6_rcv_core(skb, dev, net);
if (skb == NULL)
continue;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89e0d5118afe..fcd3c66ded16 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
+ unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
int seg_len = skb->len;
int hlimit = -1;
u32 mtu;
- if (opt) {
- unsigned int head_room;
+ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+ if (opt)
+ head_room += opt->opt_nflen + opt->opt_flen;
- /* First: exthdrs may take lots of space (~8K for now)
- MAX_HEADER is not enough.
- */
- head_room = opt->opt_nflen + opt->opt_flen;
- seg_len += head_room;
- head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
-
- if (skb_headroom(skb) < head_room) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- if (!skb2) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
- return -ENOBUFS;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
+ if (unlikely(skb_headroom(skb) < head_room)) {
+ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+ if (!skb2) {
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return -ENOBUFS;
}
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ consume_skb(skb);
+ skb = skb2;
+ }
+
+ if (opt) {
+ seg_len += opt->opt_nflen + opt->opt_flen;
+
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
+
if (opt->opt_nflen)
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
&fl6->saddr);
@@ -1354,7 +1354,7 @@ emsgsize:
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1378,6 +1378,7 @@ alloc_new_skb:
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 5ae8e1c51079..8b075f0bc351 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,7 +24,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
unsigned int hh_len;
struct dst_entry *dst;
struct flowi6 fl6 = {
- .flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
+ .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
+ rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 491f808e356a..29c7f1915a96 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -58,8 +58,12 @@ static int __init masquerade_tg6_init(void)
int err;
err = xt_register_target(&masquerade_tg6_reg);
- if (err == 0)
- nf_nat_masquerade_ipv6_register_notifier();
+ if (err)
+ return err;
+
+ err = nf_nat_masquerade_ipv6_register_notifier();
+ if (err)
+ xt_unregister_target(&masquerade_tg6_reg);
return err;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b8ac369f98ad..181da2c40f9a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -341,7 +341,7 @@ static bool
nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
+ int payload_len, delta;
u8 ecn;
inet_frag_kill(&fq->q);
@@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
return false;
}
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
return false;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(fq->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
@@ -587,11 +593,16 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
*/
ret = -EINPROGRESS;
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len &&
- nf_ct_frag6_reasm(fq, skb, dev))
- ret = 0;
- else
+ fq->q.meat == fq->q.len) {
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ if (nf_ct_frag6_reasm(fq, skb, dev))
+ ret = 0;
+ skb->_skb_refdst = orefdst;
+ } else {
skb_dst_drop(skb);
+ }
out_unlock:
spin_unlock_bh(&fq->q.lock);
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 3e4bf2286abe..0ad0da5a2600 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -132,8 +132,8 @@ static void iterate_cleanup_work(struct work_struct *work)
* of ipv6 addresses being deleted), we also need to add an upper
* limit to the number of queued work items.
*/
-static int masq_inet_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+static int masq_inet6_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
@@ -171,30 +171,53 @@ static int masq_inet_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static struct notifier_block masq_inet_notifier = {
- .notifier_call = masq_inet_event,
+static struct notifier_block masq_inet6_notifier = {
+ .notifier_call = masq_inet6_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv6_register_notifier(void)
+int nf_nat_masquerade_ipv6_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier is already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
+
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
+
+ ret = register_inet6addr_notifier(&masq_inet6_notifier);
+ if (ret)
+ goto err_unregister;
- register_netdevice_notifier(&masq_dev_notifier);
- register_inet6addr_notifier(&masq_inet_notifier);
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
void nf_nat_masquerade_ipv6_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
- unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_inet6addr_notifier(&masq_inet6_notifier);
unregister_netdevice_notifier(&masq_dev_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index dd0122f3cffe..e06c82e9dfcd 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -70,7 +70,9 @@ static int __init nft_masq_ipv6_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv6_register_notifier();
+ ret = nf_nat_masquerade_ipv6_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv6_type);
return ret;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5c3c92713096..aa26c45486d9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
{
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
+ int payload_len, delta;
unsigned int nhoff;
int sum_truesize;
u8 ecn;
@@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
goto out_oom;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(fq->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2a7423c39456..059f0531f7c1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2232,8 +2232,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
- if (dst_hold_safe(&rt->dst))
- rt6_remove_exception_rt(rt);
+ rt6_remove_exception_rt(rt);
} else {
struct fib6_info *from;
struct fib6_node *fn;
@@ -2360,10 +2359,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
+ int oif = sk->sk_bound_dev_if;
struct dst_entry *dst;
- ip6_update_pmtu(skb, sock_net(sk), mtu,
- sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
+ if (!oif && skb->dev)
+ oif = l3mdev_master_ifindex(skb->dev);
+
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -3214,8 +3216,8 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
goto out;
- if (dst_hold_safe(&rt->dst))
- rc = rt6_remove_exception_rt(rt);
+
+ rc = rt6_remove_exception_rt(rt);
out:
return rc;
}
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index a8854dd3e9c5..8181ee7e1e27 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -347,6 +347,7 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
+ memset(&fl6, 0, sizeof(fl6));
fl6.daddr = hdr->daddr;
fl6.saddr = hdr->saddr;
fl6.flowlabel = ip6_flowinfo(hdr);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 82cdf9020b53..26f1d435696a 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1490,12 +1490,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
goto err_sock;
}
- sk = sock->sk;
-
- sock_hold(sk);
- tunnel->sock = sk;
tunnel->l2tp_net = net;
-
pn = l2tp_pernet(net);
spin_lock_bh(&pn->l2tp_tunnel_list_lock);
@@ -1510,6 +1505,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ sk = sock->sk;
+ sock_hold(sk);
+ tunnel->sock = sk;
+
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
.sk_user_data = tunnel,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 51622333d460..818aa0060349 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2891,7 +2891,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len +
beacon->proberesp_ies_len + beacon->assocresp_ies_len +
- beacon->probe_resp_len;
+ beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len;
new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL);
if (!new_beacon)
@@ -2934,8 +2934,9 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
pos += beacon->probe_resp_len;
}
- if (beacon->ftm_responder)
- new_beacon->ftm_responder = beacon->ftm_responder;
+
+ /* might copy -1, meaning no changes requested */
+ new_beacon->ftm_responder = beacon->ftm_responder;
if (beacon->lci) {
new_beacon->lci_len = beacon->lci_len;
new_beacon->lci = pos;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5836ddeac9e3..5f3c81e705c7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1015,6 +1015,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (local->open_count == 0)
ieee80211_clear_tx_pending(local);
+ sdata->vif.bss_conf.beacon_int = 0;
+
/*
* If the interface goes down while suspended, presumably because
* the device was unplugged and that happens before our resume,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d2bc8d57c87e..bcf5ffc1567a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2766,6 +2766,7 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct sta_info *sta;
+ bool result = true;
sdata_info(sdata, "authenticated\n");
ifmgd->auth_data->done = true;
@@ -2778,15 +2779,18 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
sta = sta_info_get(sdata, bssid);
if (!sta) {
WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid);
- return false;
+ result = false;
+ goto out;
}
if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
sdata_info(sdata, "failed moving %pM to auth\n", bssid);
- return false;
+ result = false;
+ goto out;
}
- mutex_unlock(&sdata->local->sta_mtx);
- return true;
+out:
+ mutex_unlock(&sdata->local->sta_mtx);
+ return result;
}
static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3bd3b5769797..428f7ad5f9b5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1403,6 +1403,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
if (ieee80211_is_ctl(hdr->frame_control) ||
+ ieee80211_is_nullfunc(hdr->frame_control) ||
ieee80211_is_qos_nullfunc(hdr->frame_control) ||
is_multicast_ether_addr(hdr->addr1))
return RX_CONTINUE;
@@ -3063,7 +3064,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
&sta_opmode,
- GFP_KERNEL);
+ GFP_ATOMIC);
goto handled;
}
case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
@@ -3100,7 +3101,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
&sta_opmode,
- GFP_KERNEL);
+ GFP_ATOMIC);
goto handled;
}
default:
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index aa4afbf0abaf..a794ca729000 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -964,6 +964,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
/* Track when last TDLS packet was ACKed */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
sta->status_stats.last_tdls_pkt_time = jiffies;
+ } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
+ return;
} else {
ieee80211_lost_packet(sta, info);
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e0ccee23fbcd..1f536ba573b4 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -439,8 +439,8 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL))
info->hw_queue = tx->sdata->vif.cab_queue;
- /* no stations in PS mode */
- if (!atomic_read(&ps->num_sta_ps))
+ /* no stations in PS mode and no buffered packets */
+ if (!atomic_read(&ps->num_sta_ps) && skb_queue_empty(&ps->bc_buf))
return TX_CONTINUE;
info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bc4bd247bb7d..1577f2f76060 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -55,11 +55,15 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
MODULE_DESCRIPTION("core IP set support");
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
-/* When the nfnl mutex is held: */
+/* When the nfnl mutex or ip_set_ref_lock is held: */
#define ip_set_dereference(p) \
- rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+ rcu_dereference_protected(p, \
+ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+ lockdep_is_held(&ip_set_ref_lock))
#define ip_set(inst, id) \
ip_set_dereference((inst)->ip_set_list)[id]
+#define ip_set_ref_netlink(inst,id) \
+ rcu_dereference_raw((inst)->ip_set_list)[id]
/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
@@ -693,21 +697,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
EXPORT_SYMBOL_GPL(ip_set_put_byindex);
/* Get the name of a set behind a set index.
- * We assume the set is referenced, so it does exist and
- * can't be destroyed. The set cannot be renamed due to
- * the referencing either.
- *
+ * Set itself is protected by RCU, but its name isn't: to protect against
+ * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
+ * name.
*/
-const char *
-ip_set_name_byindex(struct net *net, ip_set_id_t index)
+void
+ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
{
- const struct ip_set *set = ip_set_rcu_get(net, index);
+ struct ip_set *set = ip_set_rcu_get(net, index);
BUG_ON(!set);
- BUG_ON(set->ref == 0);
- /* Referenced, so it's safe */
- return set->name;
+ read_lock_bh(&ip_set_ref_lock);
+ strncpy(name, set->name, IPSET_MAXNAMELEN);
+ read_unlock_bh(&ip_set_ref_lock);
}
EXPORT_SYMBOL_GPL(ip_set_name_byindex);
@@ -961,7 +964,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
/* Wraparound */
goto cleanup;
- list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
+ list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
if (!list)
goto cleanup;
/* nfnl mutex is held, both lists are valid */
@@ -973,7 +976,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
/* Use new list */
index = inst->ip_set_max;
inst->ip_set_max = i;
- kfree(tmp);
+ kvfree(tmp);
ret = 0;
} else if (ret) {
goto cleanup;
@@ -1153,7 +1156,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
if (!set)
return -ENOENT;
- read_lock_bh(&ip_set_ref_lock);
+ write_lock_bh(&ip_set_ref_lock);
if (set->ref != 0) {
ret = -IPSET_ERR_REFERENCED;
goto out;
@@ -1170,7 +1173,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
strncpy(set->name, name2, IPSET_MAXNAMELEN);
out:
- read_unlock_bh(&ip_set_ref_lock);
+ write_unlock_bh(&ip_set_ref_lock);
return ret;
}
@@ -1252,7 +1255,7 @@ ip_set_dump_done(struct netlink_callback *cb)
struct ip_set_net *inst =
(struct ip_set_net *)cb->args[IPSET_CB_NET];
ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
- struct ip_set *set = ip_set(inst, index);
+ struct ip_set *set = ip_set_ref_netlink(inst, index);
if (set->variant->uref)
set->variant->uref(set, cb, false);
@@ -1441,7 +1444,7 @@ next_set:
release_refcount:
/* If there was an error or set is done, release set */
if (ret || !cb->args[IPSET_CB_ARG0]) {
- set = ip_set(inst, index);
+ set = ip_set_ref_netlink(inst, index);
if (set->variant->uref)
set->variant->uref(set, cb, false);
pr_debug("release set %s\n", set->name);
@@ -2059,7 +2062,7 @@ ip_set_net_init(struct net *net)
if (inst->ip_set_max >= IPSET_INVALID_ID)
inst->ip_set_max = IPSET_INVALID_ID - 1;
- list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
+ list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
if (!list)
return -ENOMEM;
inst->is_deleted = false;
@@ -2087,7 +2090,7 @@ ip_set_net_exit(struct net *net)
}
}
nfnl_unlock(NFNL_SUBSYS_IPSET);
- kfree(rcu_dereference_protected(inst->ip_set_list, 1));
+ kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
}
static struct pernet_operations ip_set_net_ops = {
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index d391485a6acd..613e18e720a4 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ if (e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ if (e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
@@ -493,13 +493,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ if (e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ if (e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 072a658fde04..4eef55da0878 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
{
struct set_elem *e = container_of(rcu, struct set_elem, rcu);
struct ip_set *set = e->set;
- struct list_set *map = set->data;
- ip_set_put_byindex(map->net, e->id);
ip_set_ext_destroy(set, e);
kfree(e);
}
@@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu)
static inline void
list_set_del(struct ip_set *set, struct set_elem *e)
{
+ struct list_set *map = set->data;
+
set->elements--;
list_del_rcu(&e->list);
+ ip_set_put_byindex(map->net, e->id);
call_rcu(&e->rcu, __list_set_del_rcu);
}
static inline void
-list_set_replace(struct set_elem *e, struct set_elem *old)
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
{
+ struct list_set *map = set->data;
+
list_replace_rcu(&old->list, &e->list);
+ ip_set_put_byindex(map->net, old->id);
call_rcu(&old->rcu, __list_set_del_rcu);
}
@@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
INIT_LIST_HEAD(&e->list);
list_set_init_extensions(set, ext, e);
if (n)
- list_set_replace(e, n);
+ list_set_replace(set, e, n);
else if (next)
list_add_tail_rcu(&e->list, &next->list);
else if (prev)
@@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
const struct list_set *map = set->data;
struct nlattr *atd, *nested;
u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+ char name[IPSET_MAXNAMELEN];
struct set_elem *e;
int ret = 0;
@@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
- if (nla_put_string(skb, IPSET_ATTR_NAME,
- ip_set_name_byindex(map->net, e->id)))
+ ip_set_name_byindex(map->net, e->id, name);
+ if (nla_put_string(skb, IPSET_ATTR_NAME, name))
goto nla_put_failure;
if (ip_set_put_extensions(skb, set, e, true))
goto nla_put_failure;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 83395bf6dc35..432141f04af3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3980,6 +3980,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
static struct notifier_block ip_vs_dst_notifier = {
.notifier_call = ip_vs_dst_event,
+#ifdef CONFIG_IP_VS_IPV6
+ .priority = ADDRCONF_NOTIFY_PRIORITY + 5,
+#endif
};
int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d4020c5e831d..2526be6b3d90 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1616,7 +1616,7 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
EnterFunction(7);
/* Receive a packet */
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen);
len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
if (len < 0)
return len;
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 02ca7df793f5..b6d0f6deea86 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -49,6 +49,7 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
+ bool dead;
struct rcu_head rcu_head;
};
@@ -106,15 +107,16 @@ nf_conncount_add(struct nf_conncount_list *list,
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
- spin_lock(&list->list_lock);
+ conn->dead = false;
+ spin_lock_bh(&list->list_lock);
if (list->dead == true) {
kmem_cache_free(conncount_conn_cachep, conn);
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_SKIP;
}
list_add_tail(&conn->node, &list->head);
list->count++;
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_ADDED;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
@@ -132,19 +134,22 @@ static bool conn_free(struct nf_conncount_list *list,
{
bool free_entry = false;
- spin_lock(&list->list_lock);
+ spin_lock_bh(&list->list_lock);
- if (list->count == 0) {
- spin_unlock(&list->list_lock);
- return free_entry;
+ if (conn->dead) {
+ spin_unlock_bh(&list->list_lock);
+ return free_entry;
}
list->count--;
+ conn->dead = true;
list_del_rcu(&conn->node);
- if (list->count == 0)
+ if (list->count == 0) {
+ list->dead = true;
free_entry = true;
+ }
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
call_rcu(&conn->rcu_head, __conn_free);
return free_entry;
}
@@ -245,7 +250,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
- list->count = 1;
+ list->count = 0;
list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -259,6 +264,7 @@ bool nf_conncount_gc_list(struct net *net,
struct nf_conn *found_ct;
unsigned int collected = 0;
bool free_entry = false;
+ bool ret = false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry);
@@ -288,7 +294,15 @@ bool nf_conncount_gc_list(struct net *net,
if (collected > CONNCOUNT_GC_MAX_NODES)
return false;
}
- return false;
+
+ spin_lock_bh(&list->list_lock);
+ if (!list->count) {
+ list->dead = true;
+ ret = true;
+ }
+ spin_unlock_bh(&list->list_lock);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
@@ -309,11 +323,8 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock);
- if (rbconn->list.count == 0 && rbconn->list.dead == false) {
- rbconn->list.dead = true;
- rb_erase(&rbconn->node, root);
- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
- }
+ rb_erase(&rbconn->node, root);
+ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
spin_unlock(&rbconn->list.list_lock);
}
}
@@ -414,6 +425,7 @@ insert_tree(struct net *net,
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
count = 1;
+ rbconn->list.count = count;
rb_link_node(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ca1168d67fac..e92e749aff53 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1073,19 +1073,22 @@ static unsigned int early_drop_list(struct net *net,
return drops;
}
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static noinline int early_drop(struct net *net, unsigned int hash)
{
- unsigned int i;
+ unsigned int i, bucket;
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned int hash, hsize, drops;
+ unsigned int hsize, drops;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hsize);
- hash = reciprocal_scale(_hash++, hsize);
+ if (!i)
+ bucket = reciprocal_scale(hash, hsize);
+ else
+ bucket = (bucket + 1) % hsize;
- drops = early_drop_list(net, &ct_hash[hash]);
+ drops = early_drop_list(net, &ct_hash[bucket]);
rcu_read_unlock();
if (drops) {
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 171e9e122e5f..023c1445bc39 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -384,11 +384,6 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
},
};
-static inline struct nf_dccp_net *dccp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.dccp;
-}
-
static noinline bool
dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
const struct dccp_hdr *dh)
@@ -401,7 +396,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
switch (state) {
default:
- dn = dccp_pernet(net);
+ dn = nf_dccp_pernet(net);
if (dn->dccp_loose == 0) {
msg = "not picking up existing connection ";
goto out_invalid;
@@ -568,7 +563,7 @@ static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
- timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
+ timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
return NF_ACCEPT;
@@ -681,7 +676,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- struct nf_dccp_net *dn = dccp_pernet(net);
+ struct nf_dccp_net *dn = nf_dccp_pernet(net);
unsigned int *timeouts = data;
int i;
@@ -814,7 +809,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
static int dccp_init_net(struct net *net)
{
- struct nf_dccp_net *dn = dccp_pernet(net);
+ struct nf_dccp_net *dn = nf_dccp_pernet(net);
struct nf_proto_net *pn = &dn->pn;
if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index e10e867e0b55..5da19d5fbc76 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -27,11 +27,6 @@ static bool nf_generic_should_process(u8 proto)
}
}
-static inline struct nf_generic_net *generic_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.generic;
-}
-
static bool generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
@@ -58,7 +53,7 @@ static int generic_packet(struct nf_conn *ct,
}
if (!timeout)
- timeout = &generic_pernet(nf_ct_net(ct))->timeout;
+ timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
@@ -72,7 +67,7 @@ static int generic_packet(struct nf_conn *ct,
static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- struct nf_generic_net *gn = generic_pernet(net);
+ struct nf_generic_net *gn = nf_generic_pernet(net);
unsigned int *timeout = data;
if (!timeout)
@@ -138,7 +133,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int generic_init_net(struct net *net)
{
- struct nf_generic_net *gn = generic_pernet(net);
+ struct nf_generic_net *gn = nf_generic_pernet(net);
struct nf_proto_net *pn = &gn->pn;
gn->timeout = nf_ct_generic_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9b48dc8b4b88..2a5e56c6d8d9 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -43,24 +43,12 @@
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
-enum grep_conntrack {
- GRE_CT_UNREPLIED,
- GRE_CT_REPLIED,
- GRE_CT_MAX
-};
-
static const unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_UNREPLIED] = 30*HZ,
[GRE_CT_REPLIED] = 180*HZ,
};
static unsigned int proto_gre_net_id __read_mostly;
-struct netns_proto_gre {
- struct nf_proto_net nf;
- rwlock_t keymap_lock;
- struct list_head keymap_list;
- unsigned int gre_timeouts[GRE_CT_MAX];
-};
static inline struct netns_proto_gre *gre_pernet(struct net *net)
{
@@ -402,6 +390,8 @@ static int __init nf_ct_proto_gre_init(void)
{
int ret;
+ BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
+
ret = register_pernet_subsys(&proto_gre_net_ops);
if (ret < 0)
goto out_pernet;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 3598520bd19b..de64d8a5fdfd 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -25,11 +25,6 @@
static const unsigned int nf_ct_icmp_timeout = 30*HZ;
-static inline struct nf_icmp_net *icmp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.icmp;
-}
-
static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
@@ -103,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct,
}
if (!timeout)
- timeout = &icmp_pernet(nf_ct_net(ct))->timeout;
+ timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout;
nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
@@ -275,7 +270,7 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeout = data;
- struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_icmp_net *in = nf_icmp_pernet(net);
if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
if (!timeout)
@@ -337,7 +332,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int icmp_init_net(struct net *net)
{
- struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_icmp_net *in = nf_icmp_pernet(net);
struct nf_proto_net *pn = &in->pn;
in->timeout = nf_ct_icmp_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 378618feed5d..a15eefb8e317 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -30,11 +30,6 @@
static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
-static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.icmpv6;
-}
-
static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
struct net *net,
@@ -87,7 +82,7 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
static unsigned int *icmpv6_get_timeouts(struct net *net)
{
- return &icmpv6_pernet(net)->timeout;
+ return &nf_icmpv6_pernet(net)->timeout;
}
/* Returns verdict for packet, or -1 for invalid. */
@@ -286,7 +281,7 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeout = data;
- struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_icmp_net *in = nf_icmpv6_pernet(net);
if (!timeout)
timeout = icmpv6_get_timeouts(net);
@@ -348,7 +343,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int icmpv6_init_net(struct net *net)
{
- struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_icmp_net *in = nf_icmpv6_pernet(net);
struct nf_proto_net *pn = &in->pn;
in->timeout = nf_ct_icmpv6_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3d719d3eb9a3..d53e3e78f605 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -146,11 +146,6 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
}
};
-static inline struct nf_sctp_net *sctp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.sctp;
-}
-
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -480,7 +475,7 @@ static int sctp_packet(struct nf_conn *ct,
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
- timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+ timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts;
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
@@ -599,7 +594,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
- struct nf_sctp_net *sn = sctp_pernet(net);
+ struct nf_sctp_net *sn = nf_sctp_pernet(net);
int i;
/* set default SCTP timeouts. */
@@ -736,7 +731,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int sctp_init_net(struct net *net)
{
- struct nf_sctp_net *sn = sctp_pernet(net);
+ struct nf_sctp_net *sn = nf_sctp_pernet(net);
struct nf_proto_net *pn = &sn->pn;
if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 1bcf9984d45e..4dcbd51a8e97 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -272,11 +272,6 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
}
};
-static inline struct nf_tcp_net *tcp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.tcp;
-}
-
#ifdef CONFIG_NF_CONNTRACK_PROCFS
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -475,7 +470,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
const struct tcphdr *tcph)
{
struct net *net = nf_ct_net(ct);
- struct nf_tcp_net *tn = tcp_pernet(net);
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
@@ -767,7 +762,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
{
enum tcp_conntrack new_state;
struct net *net = nf_ct_net(ct);
- const struct nf_tcp_net *tn = tcp_pernet(net);
+ const struct nf_tcp_net *tn = nf_tcp_pernet(net);
const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
@@ -841,7 +836,7 @@ static int tcp_packet(struct nf_conn *ct,
const struct nf_hook_state *state)
{
struct net *net = nf_ct_net(ct);
- struct nf_tcp_net *tn = tcp_pernet(net);
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
unsigned int index, *timeouts;
@@ -1283,7 +1278,7 @@ static unsigned int tcp_nlattr_tuple_size(void)
static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
- struct nf_tcp_net *tn = tcp_pernet(net);
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
unsigned int *timeouts = data;
int i;
@@ -1508,7 +1503,7 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int tcp_init_net(struct net *net)
{
- struct nf_tcp_net *tn = tcp_pernet(net);
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct nf_proto_net *pn = &tn->pn;
if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a7aa70370913..c879d8d78cfd 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -32,14 +32,9 @@ static const unsigned int udp_timeouts[UDP_CT_MAX] = {
[UDP_CT_REPLIED] = 180*HZ,
};
-static inline struct nf_udp_net *udp_pernet(struct net *net)
-{
- return &net->ct.nf_ct_proto.udp;
-}
-
static unsigned int *udp_get_timeouts(struct net *net)
{
- return udp_pernet(net)->timeouts;
+ return nf_udp_pernet(net)->timeouts;
}
static void udp_error_log(const struct sk_buff *skb,
@@ -212,7 +207,7 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
- struct nf_udp_net *un = udp_pernet(net);
+ struct nf_udp_net *un = nf_udp_pernet(net);
if (!timeouts)
timeouts = un->timeouts;
@@ -292,7 +287,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
static int udp_init_net(struct net *net)
{
- struct nf_udp_net *un = udp_pernet(net);
+ struct nf_udp_net *un = nf_udp_pernet(net);
struct nf_proto_net *pn = &un->pn;
if (!pn->users) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 42487d01a3ed..2e61aab6ed73 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2457,7 +2457,7 @@ err:
static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
- struct nft_expr *expr;
+ struct nft_expr *expr, *next;
/*
* Careful: some expressions might not be initialized in case this
@@ -2465,8 +2465,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
*/
expr = nft_expr_first(rule);
while (expr != nft_expr_last(rule) && expr->ops) {
+ next = nft_expr_next(expr);
nf_tables_expr_destroy(ctx, expr);
- expr = nft_expr_next(expr);
+ expr = next;
}
kfree(rule);
}
@@ -2589,17 +2590,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (chain->use == UINT_MAX)
return -EOVERFLOW;
- }
-
- if (nla[NFTA_RULE_POSITION]) {
- if (!(nlh->nlmsg_flags & NLM_F_CREATE))
- return -EOPNOTSUPP;
- pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
- old_rule = __nft_rule_lookup(chain, pos_handle);
- if (IS_ERR(old_rule)) {
- NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
- return PTR_ERR(old_rule);
+ if (nla[NFTA_RULE_POSITION]) {
+ pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+ old_rule = __nft_rule_lookup(chain, pos_handle);
+ if (IS_ERR(old_rule)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
+ return PTR_ERR(old_rule);
+ }
}
}
@@ -2669,21 +2667,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
- if (!nft_is_active_next(net, old_rule)) {
- err = -ENOENT;
- goto err2;
- }
- trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
- old_rule);
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
- nft_deactivate_next(net, old_rule);
- chain->use--;
-
- if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
- err = -ENOMEM;
+ err = nft_delrule(&ctx, old_rule);
+ if (err < 0) {
+ nft_trans_destroy(trans);
goto err2;
}
@@ -6324,7 +6315,7 @@ static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
}
-static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain)
+static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
{
struct nft_rule **g0, **g1;
bool next_genbit;
@@ -6441,11 +6432,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
/* step 2. Make rules_gen_X visible to packet path */
list_for_each_entry(table, &net->nft.tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
- if (!nft_is_active_next(net, chain))
- continue;
- nf_tables_commit_chain_active(net, chain);
- }
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_commit_chain(net, chain);
}
/*
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index e7a50af1b3d6..109b0d27345a 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -382,7 +382,8 @@ err:
static int
cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u32 type, int event, u16 l3num,
- const struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l4proto *l4proto,
+ const unsigned int *timeouts)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -408,7 +409,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
if (!nest_parms)
goto nla_put_failure;
- ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
+ ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
if (ret < 0)
goto nla_put_failure;
@@ -430,6 +431,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
struct netlink_ext_ack *extack)
{
const struct nf_conntrack_l4proto *l4proto;
+ unsigned int *timeouts = NULL;
struct sk_buff *skb2;
int ret, err;
__u16 l3num;
@@ -442,12 +444,55 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
l4proto = nf_ct_l4proto_find_get(l4num);
- /* This protocol is not supported, skip. */
- if (l4proto->l4proto != l4num) {
- err = -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ if (l4proto->l4proto != l4num)
goto err;
+
+ switch (l4proto->l4proto) {
+ case IPPROTO_ICMP:
+ timeouts = &nf_icmp_pernet(net)->timeout;
+ break;
+ case IPPROTO_TCP:
+ timeouts = nf_tcp_pernet(net)->timeouts;
+ break;
+ case IPPROTO_UDP: /* fallthrough */
+ case IPPROTO_UDPLITE:
+ timeouts = nf_udp_pernet(net)->timeouts;
+ break;
+ case IPPROTO_DCCP:
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ timeouts = nf_dccp_pernet(net)->dccp_timeout;
+#endif
+ break;
+ case IPPROTO_ICMPV6:
+ timeouts = &nf_icmpv6_pernet(net)->timeout;
+ break;
+ case IPPROTO_SCTP:
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ timeouts = nf_sctp_pernet(net)->timeouts;
+#endif
+ break;
+ case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ if (l4proto->net_id) {
+ struct netns_proto_gre *net_gre;
+
+ net_gre = net_generic(net, *l4proto->net_id);
+ timeouts = net_gre->gre_timeouts;
+ }
+#endif
+ break;
+ case 255:
+ timeouts = &nf_generic_pernet(net)->timeout;
+ break;
+ default:
+ WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
+ break;
}
+ if (!timeouts)
+ goto err;
+
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (skb2 == NULL) {
err = -ENOMEM;
@@ -458,8 +503,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
- l3num,
- l4proto);
+ l3num, l4proto, timeouts);
if (ret <= 0) {
kfree_skb(skb2);
err = -ENOMEM;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 768292eac2a4..7334e0b80a5e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -54,9 +54,11 @@ static bool nft_xt_put(struct nft_xt *xt)
return false;
}
-static int nft_compat_chain_validate_dependency(const char *tablename,
- const struct nft_chain *chain)
+static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
+ const char *tablename)
{
+ enum nft_chain_types type = NFT_CHAIN_T_DEFAULT;
+ const struct nft_chain *chain = ctx->chain;
const struct nft_base_chain *basechain;
if (!tablename ||
@@ -64,9 +66,12 @@ static int nft_compat_chain_validate_dependency(const char *tablename,
return 0;
basechain = nft_base_chain(chain);
- if (strcmp(tablename, "nat") == 0 &&
- basechain->type->type != NFT_CHAIN_T_NAT)
- return -EINVAL;
+ if (strcmp(tablename, "nat") == 0) {
+ if (ctx->family != NFPROTO_BRIDGE)
+ type = NFT_CHAIN_T_NAT;
+ if (basechain->type->type != type)
+ return -EINVAL;
+ }
return 0;
}
@@ -342,8 +347,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
if (target->hooks && !(hook_mask & target->hooks))
return -EINVAL;
- ret = nft_compat_chain_validate_dependency(target->table,
- ctx->chain);
+ ret = nft_compat_chain_validate_dependency(ctx, target->table);
if (ret < 0)
return ret;
}
@@ -516,6 +520,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
void *info)
{
struct xt_match *match = expr->ops->data;
+ struct module *me = match->me;
struct xt_mtdtor_param par;
par.net = ctx->net;
@@ -526,7 +531,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
par.match->destroy(&par);
if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
- module_put(match->me);
+ module_put(me);
}
static void
@@ -590,8 +595,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
if (match->hooks && !(hook_mask & match->hooks))
return -EINVAL;
- ret = nft_compat_chain_validate_dependency(match->table,
- ctx->chain);
+ ret = nft_compat_chain_validate_dependency(ctx, match->table);
if (ret < 0)
return ret;
}
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e82d9a966c45..974525eb92df 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -214,7 +214,9 @@ static int __init nft_flow_offload_module_init(void)
{
int err;
- register_netdevice_notifier(&flow_offload_netdev_notifier);
+ err = register_netdevice_notifier(&flow_offload_netdev_notifier);
+ if (err)
+ goto err;
err = nft_register_expr(&nft_flow_offload_type);
if (err < 0)
@@ -224,6 +226,7 @@ static int __init nft_flow_offload_module_init(void)
register_expr:
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+err:
return err;
}
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 649d1700ec5b..3cc1b3dc3c3c 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -24,7 +24,6 @@ struct nft_ng_inc {
u32 modulus;
atomic_t counter;
u32 offset;
- struct nft_set *map;
};
static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
@@ -48,34 +47,11 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = nft_ng_inc_gen(priv);
}
-static void nft_ng_inc_map_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
-{
- struct nft_ng_inc *priv = nft_expr_priv(expr);
- const struct nft_set *map = priv->map;
- const struct nft_set_ext *ext;
- u32 result;
- bool found;
-
- result = nft_ng_inc_gen(priv);
- found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-
- if (!found)
- return;
-
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), map->dlen);
-}
-
static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
[NFTA_NG_DREG] = { .type = NLA_U32 },
[NFTA_NG_MODULUS] = { .type = NLA_U32 },
[NFTA_NG_TYPE] = { .type = NLA_U32 },
[NFTA_NG_OFFSET] = { .type = NLA_U32 },
- [NFTA_NG_SET_NAME] = { .type = NLA_STRING,
- .len = NFT_SET_MAXNAMELEN - 1 },
- [NFTA_NG_SET_ID] = { .type = NLA_U32 },
};
static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -101,22 +77,6 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_ng_inc *priv = nft_expr_priv(expr);
- u8 genmask = nft_genmask_next(ctx->net);
-
- nft_ng_inc_init(ctx, expr, tb);
-
- priv->map = nft_set_lookup_global(ctx->net, ctx->table,
- tb[NFTA_NG_SET_NAME],
- tb[NFTA_NG_SET_ID], genmask);
-
- return PTR_ERR_OR_ZERO(priv->map);
-}
-
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
u32 modulus, enum nft_ng_types type, u32 offset)
{
@@ -143,27 +103,10 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
priv->offset);
}
-static int nft_ng_inc_map_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
-{
- const struct nft_ng_inc *priv = nft_expr_priv(expr);
-
- if (nft_ng_dump(skb, priv->dreg, priv->modulus,
- NFT_NG_INCREMENTAL, priv->offset) ||
- nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
struct nft_ng_random {
enum nft_registers dreg:8;
u32 modulus;
u32 offset;
- struct nft_set *map;
};
static u32 nft_ng_random_gen(struct nft_ng_random *priv)
@@ -183,25 +126,6 @@ static void nft_ng_random_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = nft_ng_random_gen(priv);
}
-static void nft_ng_random_map_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
-{
- struct nft_ng_random *priv = nft_expr_priv(expr);
- const struct nft_set *map = priv->map;
- const struct nft_set_ext *ext;
- u32 result;
- bool found;
-
- result = nft_ng_random_gen(priv);
- found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
- if (!found)
- return;
-
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), map->dlen);
-}
-
static int nft_ng_random_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -226,21 +150,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_ng_random_map_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_ng_random *priv = nft_expr_priv(expr);
- u8 genmask = nft_genmask_next(ctx->net);
-
- nft_ng_random_init(ctx, expr, tb);
- priv->map = nft_set_lookup_global(ctx->net, ctx->table,
- tb[NFTA_NG_SET_NAME],
- tb[NFTA_NG_SET_ID], genmask);
-
- return PTR_ERR_OR_ZERO(priv->map);
-}
-
static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ng_random *priv = nft_expr_priv(expr);
@@ -249,22 +158,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
priv->offset);
}
-static int nft_ng_random_map_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
-{
- const struct nft_ng_random *priv = nft_expr_priv(expr);
-
- if (nft_ng_dump(skb, priv->dreg, priv->modulus,
- NFT_NG_RANDOM, priv->offset) ||
- nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
static struct nft_expr_type nft_ng_type;
static const struct nft_expr_ops nft_ng_inc_ops = {
.type = &nft_ng_type,
@@ -274,14 +167,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
.dump = nft_ng_inc_dump,
};
-static const struct nft_expr_ops nft_ng_inc_map_ops = {
- .type = &nft_ng_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
- .eval = nft_ng_inc_map_eval,
- .init = nft_ng_inc_map_init,
- .dump = nft_ng_inc_map_dump,
-};
-
static const struct nft_expr_ops nft_ng_random_ops = {
.type = &nft_ng_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
@@ -290,14 +175,6 @@ static const struct nft_expr_ops nft_ng_random_ops = {
.dump = nft_ng_random_dump,
};
-static const struct nft_expr_ops nft_ng_random_map_ops = {
- .type = &nft_ng_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
- .eval = nft_ng_random_map_eval,
- .init = nft_ng_random_map_init,
- .dump = nft_ng_random_map_dump,
-};
-
static const struct nft_expr_ops *
nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
{
@@ -312,12 +189,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
switch (type) {
case NFT_NG_INCREMENTAL:
- if (tb[NFTA_NG_SET_NAME])
- return &nft_ng_inc_map_ops;
return &nft_ng_inc_ops;
case NFT_NG_RANDOM:
- if (tb[NFTA_NG_SET_NAME])
- return &nft_ng_random_map_ops;
return &nft_ng_random_ops;
}
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index ca5e5d8c5ef8..b13618c764ec 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -50,7 +50,7 @@ static int nft_osf_init(const struct nft_ctx *ctx,
int err;
u8 ttl;
- if (nla_get_u8(tb[NFTA_OSF_TTL])) {
+ if (tb[NFTA_OSF_TTL]) {
ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
if (ttl > 2)
return -EINVAL;
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index c6acfc2d9c84..eb4cbd244c3d 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -114,6 +114,22 @@ static void idletimer_tg_expired(struct timer_list *t)
schedule_work(&timer->work);
}
+static int idletimer_check_sysfs_name(const char *name, unsigned int size)
+{
+ int ret;
+
+ ret = xt_check_proc_name(name, size);
+ if (ret < 0)
+ return ret;
+
+ if (!strcmp(name, "power") ||
+ !strcmp(name, "subsystem") ||
+ !strcmp(name, "uevent"))
+ return -EINVAL;
+
+ return 0;
+}
+
static int idletimer_tg_create(struct idletimer_tg_info *info)
{
int ret;
@@ -124,6 +140,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
goto out;
}
+ ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
+ if (ret < 0)
+ goto out_free_timer;
+
sysfs_attr_init(&info->timer->attr.attr);
info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
if (!info->timer->attr.attr.name) {
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dec843cadf46..9e05c86ba5c4 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -201,18 +201,8 @@ static __net_init int xt_rateest_net_init(struct net *net)
return 0;
}
-static void __net_exit xt_rateest_net_exit(struct net *net)
-{
- struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
- WARN_ON_ONCE(!hlist_empty(&xn->hash[i]));
-}
-
static struct pernet_operations xt_rateest_net_ops = {
.init = xt_rateest_net_init,
- .exit = xt_rateest_net_exit,
.id = &xt_rateest_id,
.size = sizeof(struct xt_rateest_net),
};
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 3e7d259e5d8d..1ad4017f9b73 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
/* copy match config into hashtable config */
ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
-
- if (ret)
+ if (ret) {
+ vfree(hinfo);
return ret;
+ }
hinfo->cfg.size = size;
if (hinfo->cfg.max == 0)
@@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
if (ret)
return ret;
@@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
if (ret)
return ret;
@@ -921,7 +920,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
if (ret)
return ret;
@@ -940,7 +938,6 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
if (ret)
return ret;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 6bec37ab4472..cd94f925495a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1166,7 +1166,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
if (err) {
net_warn_ratelimited("openvswitch: zone: %u "
- "execeeds conntrack limit\n",
+ "exceeds conntrack limit\n",
info->zone.id);
return err;
}
@@ -1203,7 +1203,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
&info->labels.mask);
if (err)
return err;
- } else if (labels_nonzero(&info->labels.mask)) {
+ } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ labels_nonzero(&info->labels.mask)) {
err = ovs_ct_set_labels(ct, key, &info->labels.value,
&info->labels.mask);
if (err)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index a70097ecf33c..865ecef68196 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -3030,7 +3030,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
* is already present */
if (mac_proto != MAC_PROTO_NONE)
return -EINVAL;
- mac_proto = MAC_PROTO_NONE;
+ mac_proto = MAC_PROTO_ETHERNET;
break;
case OVS_ACTION_ATTR_POP_ETH:
@@ -3038,7 +3038,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL;
if (vlan_tci & htons(VLAN_TAG_PRESENT))
return -EINVAL;
- mac_proto = MAC_PROTO_ETHERNET;
+ mac_proto = MAC_PROTO_NONE;
break;
case OVS_ACTION_ATTR_PUSH_NSH:
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aae..a74650e98f42 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2394,7 +2394,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
void *ph;
__u32 ts;
- ph = skb_shinfo(skb)->destructor_arg;
+ ph = skb_zcopy_get_nouarg(skb);
packet_dec_pending(&po->tx_ring);
ts = __packet_set_timestamp(po, ph, skb);
@@ -2461,7 +2461,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->mark = po->sk.sk_mark;
skb->tstamp = sockc->transmit_time;
sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
- skb_shinfo(skb)->destructor_arg = ph.raw;
+ skb_zcopy_set_nouarg(skb, ph.raw);
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 64362d078da8..a2522f9d71e2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -375,17 +375,36 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* getting ACKs from the server. Returns a number representing the life state
* which can be compared to that returned by a previous call.
*
- * If this is a client call, ping ACKs will be sent to the server to find out
- * whether it's still responsive and whether the call is still alive on the
- * server.
+ * If the life state stalls, rxrpc_kernel_probe_life() should be called and
+ * then 2RTT waited.
*/
-u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+u32 rxrpc_kernel_check_life(const struct socket *sock,
+ const struct rxrpc_call *call)
{
return call->acks_latest;
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
/**
+ * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
+ * find out whether a call is still alive by pinging it. This should cause the
+ * life state to be bumped in about 2*RTT.
+ *
+ * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
+ */
+void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
+{
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ack_ping_for_check_life);
+ rxrpc_send_ack_packet(call, true, NULL);
+}
+EXPORT_SYMBOL(rxrpc_kernel_probe_life);
+
+/**
* rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
* @sock: The socket the call is on
* @call: The call to query
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 382196e57a26..bc628acf4f4f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -611,6 +611,7 @@ struct rxrpc_call {
* not hard-ACK'd packet follows this.
*/
rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
+ u16 tx_backoff; /* Delay to insert due to Tx failure */
/* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
* is fixed, we keep these numbers in terms of segments (ie. DATA
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 8e7434e92097..468efc3660c0 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -123,6 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
else
ack_at = expiry;
+ ack_at += READ_ONCE(call->tx_backoff);
ack_at += now;
if (time_before(ack_at, call->ack_at)) {
WRITE_ONCE(call->ack_at, ack_at);
@@ -311,6 +312,7 @@ void rxrpc_process_call(struct work_struct *work)
container_of(work, struct rxrpc_call, processor);
rxrpc_serial_t *send_ack;
unsigned long now, next, t;
+ unsigned int iterations = 0;
rxrpc_see_call(call);
@@ -319,6 +321,11 @@ void rxrpc_process_call(struct work_struct *work)
call->debug_id, rxrpc_call_states[call->state], call->events);
recheck_state:
+ /* Limit the number of times we do this before returning to the manager */
+ iterations++;
+ if (iterations > 5)
+ goto requeue;
+
if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
rxrpc_send_abort_packet(call);
goto recheck_state;
@@ -447,13 +454,16 @@ recheck_state:
rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
/* other events may have been raised since we started checking */
- if (call->events && call->state < RXRPC_CALL_COMPLETE) {
- __rxrpc_queue_call(call);
- goto out;
- }
+ if (call->events && call->state < RXRPC_CALL_COMPLETE)
+ goto requeue;
out_put:
rxrpc_put_call(call, rxrpc_call_put);
out:
_leave("");
+ return;
+
+requeue:
+ __rxrpc_queue_call(call);
+ goto out;
}
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 189418888839..736aa9281100 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -35,6 +35,21 @@ struct rxrpc_abort_buffer {
static const char rxrpc_keepalive_string[] = "";
/*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+ if (ret < 0) {
+ u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+ if (tx_backoff < HZ)
+ WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+ } else {
+ WRITE_ONCE(call->tx_backoff, 0);
+ }
+}
+
+/*
* Arrange for a keepalive ping a certain time after we last transmitted. This
* lets the far side know we're still interested in this call and helps keep
* the route through any intervening firewall open.
@@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
else
trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
rxrpc_tx_point_call_ack);
+ rxrpc_tx_backoff(call, ret);
if (call->state < RXRPC_CALL_COMPLETE) {
if (ret < 0) {
@@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
rxrpc_propose_ACK(call, pkt->ack.reason,
ntohs(pkt->ack.maxSkew),
ntohl(pkt->ack.serial),
- true, true,
+ false, true,
rxrpc_propose_ack_retry_tx);
} else {
spin_lock_bh(&call->lock);
@@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
else
trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
rxrpc_tx_point_call_abort);
-
+ rxrpc_tx_backoff(call, ret);
rxrpc_put_connection(conn);
return ret;
@@ -413,6 +429,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
else
trace_rxrpc_tx_packet(call->debug_id, &whdr,
rxrpc_tx_point_call_data_nofrag);
+ rxrpc_tx_backoff(call, ret);
if (ret == -EMSGSIZE)
goto send_fragmentable;
@@ -445,9 +462,18 @@ done:
rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
rxrpc_timer_set_for_normal);
}
- }
- rxrpc_set_keepalive(call);
+ rxrpc_set_keepalive(call);
+ } else {
+ /* Cancel the call if the initial transmission fails,
+ * particularly if that's due to network routing issues that
+ * aren't going away anytime soon. The layer above can arrange
+ * the retransmission.
+ */
+ if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_USER_ABORT, ret);
+ }
_leave(" = %d [%u]", ret, call->peer->maxdata);
return ret;
@@ -506,6 +532,7 @@ send_fragmentable:
else
trace_rxrpc_tx_packet(call->debug_id, &whdr,
rxrpc_tx_point_call_data_frag);
+ rxrpc_tx_backoff(call, ret);
up_write(&conn->params.local->defrag_sem);
goto done;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1dae5f2b358f..c8cf4d10c435 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -258,7 +258,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (is_redirect) {
skb2->tc_redirected = 1;
skb2->tc_from_ingress = skb2->tc_at_ingress;
-
+ if (skb2->tc_from_ingress)
+ skb2->tstamp = 0;
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index da3dd0f68cc2..2b372a06b432 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -201,7 +201,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_release;
}
} else {
- return err;
+ ret = err;
+ goto out_free;
}
p = to_pedit(*a);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 052855d47354..ec8ec55e0fe8 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -27,10 +27,7 @@ struct tcf_police_params {
u32 tcfp_ewma_rate;
s64 tcfp_burst;
u32 tcfp_mtu;
- s64 tcfp_toks;
- s64 tcfp_ptoks;
s64 tcfp_mtu_ptoks;
- s64 tcfp_t_c;
struct psched_ratecfg rate;
bool rate_present;
struct psched_ratecfg peak;
@@ -41,6 +38,11 @@ struct tcf_police_params {
struct tcf_police {
struct tc_action common;
struct tcf_police_params __rcu *params;
+
+ spinlock_t tcfp_lock ____cacheline_aligned_in_smp;
+ s64 tcfp_toks;
+ s64 tcfp_ptoks;
+ s64 tcfp_t_c;
};
#define to_police(pc) ((struct tcf_police *)pc)
@@ -83,7 +85,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
int ovr, int bind, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- int ret = 0, err;
+ int ret = 0, tcfp_result = TC_ACT_OK, err, size;
struct nlattr *tb[TCA_POLICE_MAX + 1];
struct tc_police *parm;
struct tcf_police *police;
@@ -91,7 +93,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, police_net_id);
struct tcf_police_params *new;
bool exists = false;
- int size;
if (nla == NULL)
return -EINVAL;
@@ -122,6 +123,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
+ spin_lock_init(&(to_police(*a)->tcfp_lock));
} else if (!ovr) {
tcf_idr_release(*a, bind);
return -EEXIST;
@@ -157,6 +159,16 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
goto failure;
}
+ if (tb[TCA_POLICE_RESULT]) {
+ tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
+ if (TC_ACT_EXT_CMP(tcfp_result, TC_ACT_GOTO_CHAIN)) {
+ NL_SET_ERR_MSG(extack,
+ "goto chain not allowed on fallback");
+ err = -EINVAL;
+ goto failure;
+ }
+ }
+
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (unlikely(!new)) {
err = -ENOMEM;
@@ -164,6 +176,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
/* No failure allowed after this point */
+ new->tcfp_result = tcfp_result;
new->tcfp_mtu = parm->mtu;
if (!new->tcfp_mtu) {
new->tcfp_mtu = ~0;
@@ -186,28 +199,20 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
- new->tcfp_toks = new->tcfp_burst;
- if (new->peak_present) {
+ if (new->peak_present)
new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
new->tcfp_mtu);
- new->tcfp_ptoks = new->tcfp_mtu_ptoks;
- }
if (tb[TCA_POLICE_AVRATE])
new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
- if (tb[TCA_POLICE_RESULT]) {
- new->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
- if (TC_ACT_EXT_CMP(new->tcfp_result, TC_ACT_GOTO_CHAIN)) {
- NL_SET_ERR_MSG(extack,
- "goto chain not allowed on fallback");
- err = -EINVAL;
- goto failure;
- }
- }
-
spin_lock_bh(&police->tcf_lock);
- new->tcfp_t_c = ktime_get_ns();
+ spin_lock_bh(&police->tcfp_lock);
+ police->tcfp_t_c = ktime_get_ns();
+ police->tcfp_toks = new->tcfp_burst;
+ if (new->peak_present)
+ police->tcfp_ptoks = new->tcfp_mtu_ptoks;
+ spin_unlock_bh(&police->tcfp_lock);
police->tcf_action = parm->action;
rcu_swap_protected(police->params,
new,
@@ -257,25 +262,28 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
}
now = ktime_get_ns();
- toks = min_t(s64, now - p->tcfp_t_c, p->tcfp_burst);
+ spin_lock_bh(&police->tcfp_lock);
+ toks = min_t(s64, now - police->tcfp_t_c, p->tcfp_burst);
if (p->peak_present) {
- ptoks = toks + p->tcfp_ptoks;
+ ptoks = toks + police->tcfp_ptoks;
if (ptoks > p->tcfp_mtu_ptoks)
ptoks = p->tcfp_mtu_ptoks;
ptoks -= (s64)psched_l2t_ns(&p->peak,
qdisc_pkt_len(skb));
}
- toks += p->tcfp_toks;
+ toks += police->tcfp_toks;
if (toks > p->tcfp_burst)
toks = p->tcfp_burst;
toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
if ((toks|ptoks) >= 0) {
- p->tcfp_t_c = now;
- p->tcfp_toks = toks;
- p->tcfp_ptoks = ptoks;
+ police->tcfp_t_c = now;
+ police->tcfp_toks = toks;
+ police->tcfp_ptoks = ptoks;
+ spin_unlock_bh(&police->tcfp_lock);
ret = p->tcfp_result;
goto inc_drops;
}
+ spin_unlock_bh(&police->tcfp_lock);
}
inc_overlimits:
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9aada2d0ef06..71312d7bd8f4 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -709,11 +709,23 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
struct netlink_ext_ack *extack)
{
const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
- int option_len, key_depth, msk_depth = 0;
+ int err, option_len, key_depth, msk_depth = 0;
+
+ err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS],
+ TCA_FLOWER_KEY_ENC_OPTS_MAX,
+ enc_opts_policy, extack);
+ if (err)
+ return err;
nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
+ err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
+ TCA_FLOWER_KEY_ENC_OPTS_MAX,
+ enc_opts_policy, extack);
+ if (err)
+ return err;
+
nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
}
@@ -1226,18 +1238,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errout_idr;
- if (!tc_skip_sw(fnew->flags)) {
- if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
- err = -EEXIST;
- goto errout_mask;
- }
-
- err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
- fnew->mask->filter_ht_params);
- if (err)
- goto errout_mask;
+ if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
+ err = -EEXIST;
+ goto errout_mask;
}
+ err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
+ fnew->mask->filter_ht_params);
+ if (err)
+ goto errout_mask;
+
if (!tc_skip_hw(fnew->flags)) {
err = fl_hw_replace_filter(tp, fnew, extack);
if (err)
@@ -1291,9 +1301,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = arg;
- if (!tc_skip_sw(f->flags))
- rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
- f->mask->filter_ht_params);
+ rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
+ f->mask->filter_ht_params);
__fl_delete(tp, f, extack);
*last = list_empty(&head->masks);
return 0;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4b1af706896c..25a7cf6d380f 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -469,22 +469,29 @@ begin:
goto begin;
}
prefetch(&skb->end);
- f->credit -= qdisc_pkt_len(skb);
+ plen = qdisc_pkt_len(skb);
+ f->credit -= plen;
- if (ktime_to_ns(skb->tstamp) || !q->rate_enable)
+ if (!q->rate_enable)
goto out;
rate = q->flow_max_rate;
- if (skb->sk)
- rate = min(skb->sk->sk_pacing_rate, rate);
-
- if (rate <= q->low_rate_threshold) {
- f->credit = 0;
- plen = qdisc_pkt_len(skb);
- } else {
- plen = max(qdisc_pkt_len(skb), q->quantum);
- if (f->credit > 0)
- goto out;
+
+ /* If EDT time was provided for this skb, we need to
+ * update f->time_next_packet only if this qdisc enforces
+ * a flow max rate.
+ */
+ if (!skb->tstamp) {
+ if (skb->sk)
+ rate = min(skb->sk->sk_pacing_rate, rate);
+
+ if (rate <= q->low_rate_threshold) {
+ f->credit = 0;
+ } else {
+ plen = max(plen, q->quantum);
+ if (f->credit > 0)
+ goto out;
+ }
}
if (rate != ~0UL) {
u64 len = (u64)plen * NSEC_PER_SEC;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 57b3ad9394ad..22cd46a60057 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -431,6 +431,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
int count = 1;
int rc = NET_XMIT_SUCCESS;
+ /* Do not fool qdisc_drop_all() */
+ skb->prev = NULL;
+
/* Random duplication */
if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
++count;
@@ -648,15 +651,6 @@ deliver:
*/
skb->dev = qdisc_dev(sch);
-#ifdef CONFIG_NET_CLS_ACT
- /*
- * If it's at ingress let's pretend the delay is
- * from the network (tstamp will be updated).
- */
- if (skb->tc_redirected && skb->tc_from_ingress)
- skb->tstamp = 0;
-#endif
-
if (q->slot.slot_next) {
q->slot.packets_left--;
q->slot.bytes_left -= qdisc_pkt_len(skb);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a827a1f562bf..914750b819b2 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -118,9 +118,6 @@ static struct sctp_association *sctp_association_init(
asoc->flowlabel = sp->flowlabel;
asoc->dscp = sp->dscp;
- /* Initialize default path MTU. */
- asoc->pathmtu = sp->pathmtu;
-
/* Set association default SACK delay */
asoc->sackdelay = msecs_to_jiffies(sp->sackdelay);
asoc->sackfreq = sp->sackfreq;
@@ -252,6 +249,10 @@ static struct sctp_association *sctp_association_init(
0, gfp))
goto fail_init;
+ /* Initialize default path MTU. */
+ asoc->pathmtu = sp->pathmtu;
+ sctp_assoc_update_frag_point(asoc);
+
/* Assume that peer would support both address types unless we are
* told otherwise.
*/
@@ -434,7 +435,7 @@ static void sctp_association_destroy(struct sctp_association *asoc)
WARN_ON(atomic_read(&asoc->rmem_alloc));
- kfree(asoc);
+ kfree_rcu(asoc, rcu);
SCTP_DBG_OBJCNT_DEC(assoc);
}
@@ -499,8 +500,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
void sctp_assoc_rm_peer(struct sctp_association *asoc,
struct sctp_transport *peer)
{
- struct list_head *pos;
- struct sctp_transport *transport;
+ struct sctp_transport *transport;
+ struct list_head *pos;
+ struct sctp_chunk *ch;
pr_debug("%s: association:%p addr:%pISpc\n",
__func__, asoc, &peer->ipaddr.sa);
@@ -564,7 +566,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
*/
if (!list_empty(&peer->transmitted)) {
struct sctp_transport *active = asoc->peer.active_path;
- struct sctp_chunk *ch;
/* Reset the transport of each chunk on this list */
list_for_each_entry(ch, &peer->transmitted,
@@ -586,6 +587,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
sctp_transport_hold(active);
}
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list)
+ if (ch->transport == peer)
+ ch->transport = NULL;
+
asoc->peer.transport_count--;
sctp_transport_free(peer);
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index ce8087846f05..d2048de86e7c 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -191,6 +191,12 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
* the packet
*/
max_data = asoc->frag_point;
+ if (unlikely(!max_data)) {
+ max_data = sctp_min_frag_point(sctp_sk(asoc->base.sk),
+ sctp_datachk_len(&asoc->stream));
+ pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%Zu)",
+ __func__, asoc, max_data);
+ }
/* If the the peer requested that we authenticate DATA chunks
* we need to account for bundling of the AUTH chunks along with
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 67939ad99c01..025f48e14a91 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -118,6 +118,9 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
sctp_transport_route(tp, NULL, sp);
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
+ } else if (!sctp_transport_pmtu_check(tp)) {
+ if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(asoc);
}
if (asoc->pmtu_pending) {
@@ -396,25 +399,6 @@ finish:
return retval;
}
-static void sctp_packet_release_owner(struct sk_buff *skb)
-{
- sk_free(skb->sk);
-}
-
-static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = sctp_packet_release_owner;
-
- /*
- * The data chunks have already been accounted for in sctp_sendmsg(),
- * therefore only reserve a single byte to keep socket around until
- * the packet has been transmitted.
- */
- refcount_inc(&sk->sk_wmem_alloc);
-}
-
static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
{
if (SCTP_OUTPUT_CB(head)->last == head)
@@ -426,6 +410,7 @@ static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
head->truesize += skb->truesize;
head->data_len += skb->len;
head->len += skb->len;
+ refcount_add(skb->truesize, &head->sk->sk_wmem_alloc);
__skb_header_release(skb);
}
@@ -601,7 +586,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
if (!head)
goto out;
skb_reserve(head, packet->overhead + MAX_HEADER);
- sctp_packet_set_owner_w(head, sk);
+ skb_set_owner_w(head, sk);
/* set sctp header */
sh = skb_push(head, sizeof(struct sctphdr));
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 9cb854b05342..c37e1c2dec9d 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
INIT_LIST_HEAD(&q->retransmit);
INIT_LIST_HEAD(&q->sacked);
INIT_LIST_HEAD(&q->abandoned);
- sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
+ sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT);
}
/* Free the outqueue structure and any related pending chunks.
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4a4fd1971255..f4ac6c592e13 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2462,6 +2462,9 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
asoc->c.sinit_max_instreams, gfp))
goto clean_up;
+ /* Update frag_point when stream_interleave may get changed. */
+ sctp_assoc_update_frag_point(asoc);
+
if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
goto clean_up;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fc0386e8ff23..b8cebd5a87e5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3324,8 +3324,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
__u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) :
sizeof(struct sctp_data_chunk);
- min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT,
- datasize);
+ min_len = sctp_min_frag_point(sp, datasize);
max_len = SCTP_MAX_CHUNK_LEN - datasize;
if (val < min_len || val > max_len)
@@ -3940,32 +3939,16 @@ static int sctp_setsockopt_pr_supported(struct sock *sk,
unsigned int optlen)
{
struct sctp_assoc_value params;
- struct sctp_association *asoc;
- int retval = -EINVAL;
if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
-
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (asoc) {
- asoc->prsctp_enable = !!params.assoc_value;
- } else if (!params.assoc_id) {
- struct sctp_sock *sp = sctp_sk(sk);
+ return -EINVAL;
- sp->ep->prsctp_enable = !!params.assoc_value;
- } else {
- goto out;
- }
+ if (copy_from_user(&params, optval, optlen))
+ return -EFAULT;
- retval = 0;
+ sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
-out:
- return retval;
+ return 0;
}
static int sctp_setsockopt_default_prinfo(struct sock *sk,
@@ -7083,14 +7066,15 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
}
policy = params.sprstat_policy;
- if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+ if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+ ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
if (!asoc)
goto out;
- if (policy & SCTP_PR_SCTP_ALL) {
+ if (policy == SCTP_PR_SCTP_ALL) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -7142,7 +7126,8 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
}
policy = params.sprstat_policy;
- if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+ if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+ ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
goto out;
asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index ffb940d3b57c..3892e7630f3a 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -535,7 +535,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
goto out;
}
- stream->incnt = incnt;
stream->outcnt = outcnt;
asoc->strreset_outstanding = !!out + !!in;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 80e2119f1c70..5fbaf1901571 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -127,6 +127,8 @@ static int smc_release(struct socket *sock)
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
+ if (smc->connect_info && sk->sk_state == SMC_INIT)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
flush_work(&smc->connect_work);
kfree(smc->connect_info);
smc->connect_info = NULL;
@@ -547,7 +549,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
- ibport, &aclc->lcl, NULL, 0);
+ ibport, ntoh24(aclc->qpn), &aclc->lcl,
+ NULL, 0);
if (local_contact < 0) {
if (local_contact == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -618,7 +621,7 @@ static int smc_connect_ism(struct smc_sock *smc,
int rc = 0;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+ local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
NULL, ismdev, aclc->gid);
if (local_contact < 0)
return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
@@ -1083,7 +1086,7 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
int *local_contact)
{
/* allocate connection / link group */
- *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+ *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
&pclc->lcl, NULL, 0);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
@@ -1107,7 +1110,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_smcd *pclc_smcd;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
- *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+ *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
ismdev, pclc_smcd->gid);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index ed5dcf03fe0b..db83332ac1c8 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -81,7 +81,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
BUILD_BUG_ON_MSG(
- sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
+ offsetofend(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
BUILD_BUG_ON_MSG(
sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
@@ -177,23 +177,24 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
int smcd_cdc_msg_send(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ union smc_host_cursor curs;
struct smcd_cdc_msg cdc;
int rc, diff;
memset(&cdc, 0, sizeof(cdc));
cdc.common.type = SMC_CDC_MSG_TYPE;
- cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
- cdc.prod_count = conn->local_tx_ctrl.prod.count;
-
- cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
- cdc.cons_count = conn->local_tx_ctrl.cons.count;
- cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
- cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
+ curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.prod.acurs);
+ cdc.prod.wrap = curs.wrap;
+ cdc.prod.count = curs.count;
+ curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.cons.acurs);
+ cdc.cons.wrap = curs.wrap;
+ cdc.cons.count = curs.count;
+ cdc.cons.prod_flags = conn->local_tx_ctrl.prod_flags;
+ cdc.cons.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
if (rc)
return rc;
- smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons,
- conn);
+ smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
@@ -331,13 +332,16 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
static void smcd_cdc_rx_tsklet(unsigned long data)
{
struct smc_connection *conn = (struct smc_connection *)data;
+ struct smcd_cdc_msg *data_cdc;
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
if (!conn)
return;
- memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
+ data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
+ smcd_curs_copy(&cdc.prod, &data_cdc->prod, conn);
+ smcd_curs_copy(&cdc.cons, &data_cdc->cons, conn);
smc = container_of(conn, struct smc_sock, conn);
smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 934df4473a7c..b5bfe38c7f9b 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -48,21 +48,31 @@ struct smc_cdc_msg {
struct smc_cdc_producer_flags prod_flags;
struct smc_cdc_conn_state_flags conn_state_flags;
u8 reserved[18];
-} __packed; /* format defined in RFC7609 */
+};
+
+/* SMC-D cursor format */
+union smcd_cdc_cursor {
+ struct {
+ u16 wrap;
+ u32 count;
+ struct smc_cdc_producer_flags prod_flags;
+ struct smc_cdc_conn_state_flags conn_state_flags;
+ } __packed;
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_t acurs; /* for atomic processing */
+#else
+ u64 acurs; /* for atomic processing */
+#endif
+} __aligned(8);
/* CDC message for SMC-D */
struct smcd_cdc_msg {
struct smc_wr_rx_hdr common; /* Type = 0xFE */
u8 res1[7];
- u16 prod_wrap;
- u32 prod_count;
- u8 res2[2];
- u16 cons_wrap;
- u32 cons_count;
- struct smc_cdc_producer_flags prod_flags;
- struct smc_cdc_conn_state_flags conn_state_flags;
+ union smcd_cdc_cursor prod;
+ union smcd_cdc_cursor cons;
u8 res3[8];
-} __packed;
+} __aligned(8);
static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
{
@@ -135,6 +145,21 @@ static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt,
#endif
}
+static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,
+ union smcd_cdc_cursor *src,
+ struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+ unsigned long flags;
+
+ spin_lock_irqsave(&conn->acurs_lock, flags);
+ tgt->acurs = src->acurs;
+ spin_unlock_irqrestore(&conn->acurs_lock, flags);
+#else
+ atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
+#endif
+}
+
/* calculate cursor difference between old and new, where old <= new */
static inline int smc_curs_diff(unsigned int size,
union smc_host_cursor *old,
@@ -222,12 +247,17 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smcd_cdc_msg *peer)
{
- local->prod.wrap = peer->prod_wrap;
- local->prod.count = peer->prod_count;
- local->cons.wrap = peer->cons_wrap;
- local->cons.count = peer->cons_count;
- local->prod_flags = peer->prod_flags;
- local->conn_state_flags = peer->conn_state_flags;
+ union smc_host_cursor temp;
+
+ temp.wrap = peer->prod.wrap;
+ temp.count = peer->prod.count;
+ atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs));
+
+ temp.wrap = peer->cons.wrap;
+ temp.count = peer->cons.count;
+ atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs));
+ local->prod_flags = peer->cons.prod_flags;
+ local->conn_state_flags = peer->cons.conn_state_flags;
}
static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 52241d679cc9..89c3a8c7859a 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -286,7 +286,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
*/
krflags = MSG_PEEK | MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
+ iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
sizeof(struct smc_clc_msg_hdr));
len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (signal_pending(current)) {
@@ -325,7 +325,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
/* receive the complete CLC message */
memset(&msg, 0, sizeof(struct msghdr));
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
+ iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen);
krflags = MSG_WAITALL;
len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 18daebcef181..1c9fa7f0261a 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -184,6 +184,8 @@ free:
if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
smc_llc_link_inactive(lnk);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr);
}
}
@@ -485,7 +487,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
}
/* Called when SMC-D device is terminated or peer is lost */
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
@@ -495,7 +497,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->is_smcd && lgr->smcd == dev &&
(!peer_gid || lgr->peer_gid == peer_gid) &&
- !list_empty(&lgr->list)) {
+ (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
__smc_lgr_terminate(lgr);
list_move(&lgr->list, &lgr_free_list);
}
@@ -506,6 +508,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
cancel_delayed_work_sync(&lgr->free_work);
+ if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr);
}
}
@@ -559,7 +563,7 @@ out:
static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
- enum smc_lgr_role role)
+ enum smc_lgr_role role, u32 clcqpn)
{
return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
SMC_SYSTEMID_LEN) &&
@@ -567,7 +571,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
SMC_GID_SIZE) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
sizeof(lcl->mac)) &&
- lgr->role == role;
+ lgr->role == role &&
+ (lgr->role == SMC_SERV ||
+ lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -578,7 +584,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
/* create a new SMC connection (and a new link group if necessary) */
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport,
+ struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid)
{
@@ -603,7 +609,7 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
write_lock_bh(&lgr->conns_lock);
if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
- smcr_lgr_match(lgr, lcl, role)) &&
+ smcr_lgr_match(lgr, lcl, role, clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == vlan_id &&
(role == SMC_CLNT ||
@@ -1024,6 +1030,8 @@ void smc_core_exit(void)
smc_llc_link_inactive(lnk);
}
cancel_delayed_work_sync(&lgr->free_work);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr); /* free link group */
}
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c156674733c9..cf98f4d6093e 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -247,7 +247,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
+ unsigned short vlan);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
@@ -262,7 +263,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport,
+ struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid);
void smcd_conn_free(struct smc_connection *conn);
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index e36f21ce7252..2fff79db1a59 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -187,22 +187,28 @@ struct smc_ism_event_work {
#define ISM_EVENT_REQUEST 0x0001
#define ISM_EVENT_RESPONSE 0x0002
#define ISM_EVENT_REQUEST_IR 0x00000001
+#define ISM_EVENT_CODE_SHUTDOWN 0x80
#define ISM_EVENT_CODE_TESTLINK 0x83
+union smcd_sw_event_info {
+ u64 info;
+ struct {
+ u8 uid[SMC_LGR_ID_SIZE];
+ unsigned short vlan_id;
+ u16 code;
+ };
+};
+
static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
{
- union {
- u64 info;
- struct {
- u32 uid;
- unsigned short vlanid;
- u16 code;
- };
- } ev_info;
+ union smcd_sw_event_info ev_info;
+ ev_info.info = wrk->event.info;
switch (wrk->event.code) {
+ case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */
+ smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id);
+ break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
- ev_info.info = wrk->event.info;
if (ev_info.code == ISM_EVENT_REQUEST) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
@@ -215,6 +221,21 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
}
}
+int smc_ism_signal_shutdown(struct smc_link_group *lgr)
+{
+ int rc;
+ union smcd_sw_event_info ev_info;
+
+ memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
+ ev_info.vlan_id = lgr->vlan_id;
+ ev_info.code = ISM_EVENT_REQUEST;
+ rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+ ISM_EVENT_REQUEST_IR,
+ ISM_EVENT_CODE_SHUTDOWN,
+ ev_info.info);
+ return rc;
+}
+
/* worker for SMC-D events */
static void smc_ism_event_work(struct work_struct *work)
{
@@ -223,7 +244,7 @@ static void smc_ism_event_work(struct work_struct *work)
switch (wrk->event.type) {
case ISM_EVENT_GID: /* GID event, token is peer GID */
- smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+ smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK);
break;
case ISM_EVENT_DMB:
break;
@@ -289,7 +310,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
spin_unlock(&smcd_dev_list.lock);
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
- smc_smcd_terminate(smcd, 0);
+ smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
device_del(&smcd->dev);
}
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index aee45b860b79..4da946cbfa29 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -45,4 +45,5 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
+int smc_ism_signal_shutdown(struct smc_link_group *lgr);
#endif
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 3c458d279855..c2694750a6a8 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -215,12 +215,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
if (pend->idx < link->wr_tx_cnt) {
+ u32 idx = pend->idx;
+
/* clear the full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[pend->idx], 0,
sizeof(link->wr_tx_pends[pend->idx]));
memset(&link->wr_tx_bufs[pend->idx], 0,
sizeof(link->wr_tx_bufs[pend->idx]));
- test_and_clear_bit(pend->idx, link->wr_tx_mask);
+ test_and_clear_bit(idx, link->wr_tx_mask);
return 1;
}
diff --git a/net/socket.c b/net/socket.c
index 99c96851469f..334fcc617ef2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -635,7 +635,7 @@ EXPORT_SYMBOL(sock_sendmsg);
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
- iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
return sock_sendmsg(sock, msg);
}
EXPORT_SYMBOL(kernel_sendmsg);
@@ -648,7 +648,7 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
if (!sock->ops->sendmsg_locked)
return sock_no_sendmsg_locked(sk, msg, size);
- iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+ iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
}
@@ -823,7 +823,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
mm_segment_t oldfs = get_fs();
int result;
- iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
+ iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
set_fs(KERNEL_DS);
result = sock_recvmsg(sock, msg, flags);
set_fs(oldfs);
@@ -853,7 +853,7 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct socket *sock = file->private_data;
if (unlikely(!sock->ops->splice_read))
- return -EINVAL;
+ return generic_file_splice_read(file, ppos, pipe, len, flags);
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d8831b988b1e..ab4a3be1542a 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred)
{
struct auth_cred *acred = &container_of(cred, struct generic_cred,
gc_base)->acred;
- bool ret;
-
- get_rpccred(cred);
- ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
- put_rpccred(cred);
-
- return ret;
+ return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
}
static const struct rpc_credops generic_credops = {
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 30f970cdc7f6..ba765473d1f0 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1239,36 +1239,59 @@ gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
return &gss_auth->rpc_auth;
}
+static struct gss_cred *
+gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+{
+ struct gss_cred *new;
+
+ /* Make a copy of the cred so that we can reference count it */
+ new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
+ if (new) {
+ struct auth_cred acred = {
+ .uid = gss_cred->gc_base.cr_uid,
+ };
+ struct gss_cl_ctx *ctx =
+ rcu_dereference_protected(gss_cred->gc_ctx, 1);
+
+ rpcauth_init_cred(&new->gc_base, &acred,
+ &gss_auth->rpc_auth,
+ &gss_nullops);
+ new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+ new->gc_service = gss_cred->gc_service;
+ new->gc_principal = gss_cred->gc_principal;
+ kref_get(&gss_auth->kref);
+ rcu_assign_pointer(new->gc_ctx, ctx);
+ gss_get_ctx(ctx);
+ }
+ return new;
+}
+
/*
- * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
+ * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call
* to the server with the GSS control procedure field set to
* RPC_GSS_PROC_DESTROY. This should normally cause the server to release
* all RPCSEC_GSS state associated with that context.
*/
-static int
-gss_destroying_context(struct rpc_cred *cred)
+static void
+gss_send_destroy_context(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
+ struct gss_cred *new;
struct rpc_task *task;
- if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
- return 0;
+ new = gss_dup_cred(gss_auth, gss_cred);
+ if (new) {
+ ctx->gc_proc = RPC_GSS_PROC_DESTROY;
- ctx->gc_proc = RPC_GSS_PROC_DESTROY;
- cred->cr_ops = &gss_nullops;
+ task = rpc_call_null(gss_auth->client, &new->gc_base,
+ RPC_TASK_ASYNC|RPC_TASK_SOFT);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
- /* Take a reference to ensure the cred will be destroyed either
- * by the RPC call or by the put_rpccred() below */
- get_rpccred(cred);
-
- task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT);
- if (!IS_ERR(task))
- rpc_put_task(task);
-
- put_rpccred(cred);
- return 1;
+ put_rpccred(&new->gc_base);
+ }
}
/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure
@@ -1330,8 +1353,8 @@ static void
gss_destroy_cred(struct rpc_cred *cred)
{
- if (gss_destroying_context(cred))
- return;
+ if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
+ gss_send_destroy_context(cred);
gss_destroy_nullcred(cred);
}
@@ -1768,6 +1791,7 @@ priv_release_snd_buf(struct rpc_rqst *rqstp)
for (i=0; i < rqstp->rq_enc_pages_num; i++)
__free_page(rqstp->rq_enc_pages[i]);
kfree(rqstp->rq_enc_pages);
+ rqstp->rq_release_snd_buf = NULL;
}
static int
@@ -1776,6 +1800,9 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
int first, last, i;
+ if (rqstp->rq_release_snd_buf)
+ rqstp->rq_release_snd_buf(rqstp);
+
if (snd_buf->page_len == 0) {
rqstp->rq_enc_pages_num = 0;
return 0;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7f0424dfa8f6..eab71fc7af3e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -274,6 +274,7 @@ out_err:
static int
gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
{
+ u32 seq_send;
int tmp;
p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
@@ -315,9 +316,10 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
if (IS_ERR(p))
goto out_err;
- p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+ p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
if (IS_ERR(p))
goto out_err;
+ atomic_set(&ctx->seq_send, seq_send);
p = simple_get_netobj(p, end, &ctx->mech_used);
if (IS_ERR(p))
goto out_err;
@@ -607,6 +609,7 @@ static int
gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
gfp_t gfp_mask)
{
+ u64 seq_send64;
int keylen;
p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
@@ -617,14 +620,15 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
if (IS_ERR(p))
goto out_err;
- p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
+ p = simple_get_bytes(p, end, &seq_send64, sizeof(seq_send64));
if (IS_ERR(p))
goto out_err;
+ atomic64_set(&ctx->seq_send64, seq_send64);
/* set seq_send for use by "older" enctypes */
- ctx->seq_send = ctx->seq_send64;
- if (ctx->seq_send64 != ctx->seq_send) {
- dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
- (unsigned long)ctx->seq_send64, ctx->seq_send);
+ atomic_set(&ctx->seq_send, seq_send64);
+ if (seq_send64 != atomic_read(&ctx->seq_send)) {
+ dprintk("%s: seq_send64 %llx, seq_send %x overflow?\n", __func__,
+ seq_send64, atomic_read(&ctx->seq_send));
p = ERR_PTR(-EINVAL);
goto out_err;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index b4adeb06660b..48fe4a591b54 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -123,30 +123,6 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
return krb5_hdr;
}
-u32
-gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx)
-{
- u32 old, seq_send = READ_ONCE(ctx->seq_send);
-
- do {
- old = seq_send;
- seq_send = cmpxchg(&ctx->seq_send, old, old + 1);
- } while (old != seq_send);
- return seq_send;
-}
-
-u64
-gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
-{
- u64 old, seq_send = READ_ONCE(ctx->seq_send);
-
- do {
- old = seq_send;
- seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
- } while (old != seq_send);
- return seq_send;
-}
-
static u32
gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
struct xdr_netobj *token)
@@ -177,7 +153,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
- seq_send = gss_seq_send_fetch_and_inc(ctx);
+ seq_send = atomic_fetch_inc(&ctx->seq_send);
if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
@@ -205,7 +181,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
/* Set up the sequence number. Now 64-bits in clear
* text and w/o direction indicator */
- seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx));
+ seq_send_be64 = cpu_to_be64(atomic64_fetch_inc(&ctx->seq_send64));
memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
if (ctx->initiate) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 962fa84e6db1..5cdde6cb703a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -228,7 +228,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
- seq_send = gss_seq_send_fetch_and_inc(kctx);
+ seq_send = atomic_fetch_inc(&kctx->seq_send);
/* XXX would probably be more efficient to compute checksum
* and encrypt at the same time: */
@@ -475,7 +475,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
*be16ptr++ = 0;
be64ptr = (__be64 *)be16ptr;
- *be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx));
+ *be64ptr = cpu_to_be64(atomic64_fetch_inc(&kctx->seq_send64));
err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
if (err)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ae3b8145da35..c6782aa47525 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1915,6 +1915,13 @@ call_connect_status(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status;
+ /* Check if the task was already transmitted */
+ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+ xprt_end_transmit(task);
+ task->tk_action = call_transmit_status;
+ return;
+ }
+
dprint_status(task);
trace_rpc_connect_status(task);
@@ -2302,6 +2309,7 @@ out_retry:
task->tk_status = 0;
/* Note: rpc_verify_header() may have freed the RPC slot */
if (task->tk_rqstp == req) {
+ xdr_free_bvec(&req->rq_rcv_buf);
req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
if (task->tk_client->cl_discrtry)
xprt_conditional_disconnect(req->rq_xprt,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3b525accaa68..986f3ed7d1a2 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -336,7 +336,7 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
+ iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
if (base != 0) {
iov_iter_advance(&msg.msg_iter, base);
buflen -= base;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2bbb8d38d2bf..f302c6eb8779 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode);
static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
size_t nbytes)
{
- static __be32 *p;
+ __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
@@ -673,11 +673,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
WARN_ON_ONCE(xdr->iov);
return;
}
- if (fraglen) {
+ if (fraglen)
xdr->end = head->iov_base + head->iov_len;
- xdr->page_ptr--;
- }
/* (otherwise assume xdr->end is already set) */
+ xdr->page_ptr--;
head->iov_len = len;
buf->len = len;
xdr->p = head->iov_base + head->iov_len;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 86bea4520c4d..ce927002862a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,8 +826,15 @@ void xprt_connect(struct rpc_task *task)
return;
if (xprt_test_and_set_connecting(xprt))
return;
- xprt->stat.connect_start = jiffies;
- xprt->ops->connect(xprt, task);
+ /* Race breaker */
+ if (!xprt_connected(xprt)) {
+ xprt->stat.connect_start = jiffies;
+ xprt->ops->connect(xprt, task);
+ } else {
+ xprt_clear_connecting(xprt);
+ task->tk_status = 0;
+ rpc_wake_up_queued_task(&xprt->pending, task);
+ }
}
xprt_release_write(xprt, task);
}
@@ -1623,6 +1630,8 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.buflen = 0;
req->rq_rcv_buf.len = 0;
req->rq_rcv_buf.buflen = 0;
+ req->rq_snd_buf.bvec = NULL;
+ req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
xprt_reset_majortimeo(req);
dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1b51e04d3566..8a5e823e0b33 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -330,18 +330,16 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
{
size_t i,n;
- if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+ if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES))
return want;
- if (want > buf->page_len)
- want = buf->page_len;
n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < n; i++) {
if (buf->pages[i])
continue;
buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
if (!buf->pages[i]) {
- buf->page_len = (i * PAGE_SIZE) - buf->page_base;
- return buf->page_len;
+ i *= PAGE_SIZE;
+ return i > buf->page_base ? i - buf->page_base : 0;
}
}
return want;
@@ -361,7 +359,7 @@ static ssize_t
xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
struct kvec *kvec, size_t count, size_t seek)
{
- iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+ iov_iter_kvec(&msg->msg_iter, READ, kvec, 1, count);
return xs_sock_recvmsg(sock, msg, flags, seek);
}
@@ -370,7 +368,7 @@ xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
struct bio_vec *bvec, unsigned long nr, size_t count,
size_t seek)
{
- iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+ iov_iter_bvec(&msg->msg_iter, READ, bvec, nr, count);
return xs_sock_recvmsg(sock, msg, flags, seek);
}
@@ -378,8 +376,8 @@ static ssize_t
xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
size_t count)
{
- struct kvec kvec = { 0 };
- return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+ iov_iter_discard(&msg->msg_iter, READ, count);
+ return sock_recvmsg(sock, msg, flags);
}
static ssize_t
@@ -398,16 +396,17 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
seek = 0;
} else {
seek -= buf->head[0].iov_len;
offset += buf->head[0].iov_len;
}
- if (seek < buf->page_len) {
- want = xs_alloc_sparse_pages(buf,
- min_t(size_t, count - offset, buf->page_len),
- GFP_NOWAIT);
+
+ want = xs_alloc_sparse_pages(buf,
+ min_t(size_t, count - offset, buf->page_len),
+ GFP_NOWAIT);
+ if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf),
want + buf->page_base,
@@ -418,12 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
seek = 0;
} else {
- seek -= buf->page_len;
- offset += buf->page_len;
+ seek -= want;
+ offset += want;
}
+
if (seek < buf->tail[0].iov_len) {
want = min_t(size_t, count - offset, buf->tail[0].iov_len);
ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
@@ -433,17 +433,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
} else
offset += buf->tail[0].iov_len;
ret = -EMSGSIZE;
- msg->msg_flags |= MSG_TRUNC;
out:
*read = offset - seek_init;
return ret;
-eagain:
- ret = -EAGAIN;
- goto out;
sock_err:
offset += seek;
goto out;
@@ -486,19 +482,20 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
if (transport->recv.offset == transport->recv.len) {
if (xs_read_stream_request_done(transport))
msg->msg_flags |= MSG_EOR;
- return transport->recv.copied;
+ return read;
}
switch (ret) {
+ default:
+ break;
+ case -EFAULT:
case -EMSGSIZE:
- return transport->recv.copied;
+ msg->msg_flags |= MSG_TRUNC;
+ return read;
case 0:
return -ESHUTDOWN;
- default:
- if (ret < 0)
- return ret;
}
- return -EAGAIN;
+ return ret < 0 ? ret : read;
}
static size_t
@@ -537,7 +534,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
ret = xs_read_stream_request(transport, msg, flags, req);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
- xprt_complete_bc_request(req, ret);
+ xprt_complete_bc_request(req, transport->recv.copied);
return ret;
}
@@ -570,7 +567,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
spin_lock(&xprt->queue_lock);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
- xprt_complete_rqst(req->rq_task, ret);
+ xprt_complete_rqst(req->rq_task, transport->recv.copied);
xprt_unpin_rqst(req);
out:
spin_unlock(&xprt->queue_lock);
@@ -591,10 +588,8 @@ xs_read_stream(struct sock_xprt *transport, int flags)
if (ret <= 0)
goto out_err;
transport->recv.offset = ret;
- if (ret != want) {
- ret = -EAGAIN;
- goto out_err;
- }
+ if (transport->recv.offset != want)
+ return transport->recv.offset;
transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
RPC_FRAGMENT_SIZE_MASK;
transport->recv.offset -= sizeof(transport->recv.fraghdr);
@@ -602,6 +597,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
}
switch (be32_to_cpu(transport->recv.calldir)) {
+ default:
+ msg.msg_flags |= MSG_TRUNC;
+ break;
case RPC_CALL:
ret = xs_read_stream_call(transport, &msg, flags);
break;
@@ -616,6 +614,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
goto out_err;
read += ret;
if (transport->recv.offset < transport->recv.len) {
+ if (!(msg.msg_flags & MSG_TRUNC))
+ return read;
+ msg.msg_flags = 0;
ret = xs_read_discard(transport->sock, &msg, flags,
transport->recv.len - transport->recv.offset);
if (ret <= 0)
@@ -623,7 +624,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.offset += ret;
read += ret;
if (transport->recv.offset != transport->recv.len)
- return -EAGAIN;
+ return read;
}
if (xs_read_stream_request_done(transport)) {
trace_xs_stream_read_request(transport);
@@ -633,13 +634,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.len = 0;
return read;
out_err:
- switch (ret) {
- case 0:
- case -ESHUTDOWN:
- xprt_force_disconnect(&transport->xprt);
- return -ESHUTDOWN;
- }
- return ret;
+ return ret != 0 ? ret : -ESHUTDOWN;
}
static void xs_stream_data_receive(struct sock_xprt *transport)
@@ -648,12 +643,12 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
ssize_t ret = 0;
mutex_lock(&transport->recv_mutex);
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (transport->sock == NULL)
goto out;
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
ret = xs_read_stream(transport, MSG_DONTWAIT);
- if (ret <= 0)
+ if (ret < 0)
break;
read += ret;
cond_resched();
@@ -1345,10 +1340,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
int err;
mutex_lock(&transport->recv_mutex);
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
sk = transport->inet;
if (sk == NULL)
goto out;
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
skb = skb_recv_udp(sk, 0, 1, &err);
if (skb == NULL)
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 2830709957bd..c138d68e8a69 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -166,7 +166,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
/* Apply trial address if we just left trial period */
if (!trial && !self) {
- tipc_net_finalize(net, tn->trial_addr);
+ tipc_sched_net_finalize(net, tn->trial_addr);
+ msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
@@ -300,14 +301,12 @@ static void tipc_disc_timeout(struct timer_list *t)
goto exit;
}
- /* Trial period over ? */
- if (!time_before(jiffies, tn->addr_trial_end)) {
- /* Did we just leave it ? */
- if (!tipc_own_addr(net))
- tipc_net_finalize(net, tn->trial_addr);
-
- msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
- msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net));
+ /* Did we just leave trial period ? */
+ if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
+ mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
+ spin_unlock_bh(&d->lock);
+ tipc_sched_net_finalize(net, tn->trial_addr);
+ return;
}
/* Adjust timeout interval according to discovery phase */
@@ -319,6 +318,8 @@ static void tipc_disc_timeout(struct timer_list *t)
d->timer_intv = TIPC_DISC_SLOW;
else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
d->timer_intv = TIPC_DISC_FAST;
+ msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
}
mod_timer(&d->timer, jiffies + d->timer_intv);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 201c3b5bc96b..836727e363c4 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1594,14 +1594,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
l->priority = peers_prio;
- /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
- if (msg_peer_stopping(hdr))
+ /* If peer is going down we want full re-establish cycle */
+ if (msg_peer_stopping(hdr)) {
rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
- else if ((mtyp == RESET_MSG) || !link_is_up(l))
+ break;
+ }
+ /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
+ if (mtyp == RESET_MSG || !link_is_up(l))
rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
/* ACTIVATE_MSG takes up link if it was already locally reset */
- if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
+ if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
l->peer_session = msg_session(hdr);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 62199cf5a56c..f076edb74338 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -104,6 +104,14 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
+struct tipc_net_work {
+ struct work_struct work;
+ struct net *net;
+ u32 addr;
+};
+
+static void tipc_net_finalize(struct net *net, u32 addr);
+
int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
{
if (tipc_own_id(net)) {
@@ -119,17 +127,38 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
return 0;
}
-void tipc_net_finalize(struct net *net, u32 addr)
+static void tipc_net_finalize(struct net *net, u32 addr)
{
struct tipc_net *tn = tipc_net(net);
- if (!cmpxchg(&tn->node_addr, 0, addr)) {
- tipc_set_node_addr(net, addr);
- tipc_named_reinit(net);
- tipc_sk_reinit(net);
- tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
- TIPC_CLUSTER_SCOPE, 0, addr);
- }
+ if (cmpxchg(&tn->node_addr, 0, addr))
+ return;
+ tipc_set_node_addr(net, addr);
+ tipc_named_reinit(net);
+ tipc_sk_reinit(net);
+ tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+ TIPC_CLUSTER_SCOPE, 0, addr);
+}
+
+static void tipc_net_finalize_work(struct work_struct *work)
+{
+ struct tipc_net_work *fwork;
+
+ fwork = container_of(work, struct tipc_net_work, work);
+ tipc_net_finalize(fwork->net, fwork->addr);
+ kfree(fwork);
+}
+
+void tipc_sched_net_finalize(struct net *net, u32 addr)
+{
+ struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC);
+
+ if (!fwork)
+ return;
+ INIT_WORK(&fwork->work, tipc_net_finalize_work);
+ fwork->net = net;
+ fwork->addr = addr;
+ schedule_work(&fwork->work);
}
void tipc_net_stop(struct net *net)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 09ad02b50bb1..b7f2e364eb99 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -42,7 +42,7 @@
extern const struct nla_policy tipc_nl_net_policy[];
int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
-void tipc_net_finalize(struct net *net, u32 addr);
+void tipc_sched_net_finalize(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2afc4f8c37a7..488019766433 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -584,12 +584,15 @@ static void tipc_node_clear_links(struct tipc_node *node)
/* tipc_node_cleanup - delete nodes that does not
* have active links for NODE_CLEANUP_AFTER time
*/
-static int tipc_node_cleanup(struct tipc_node *peer)
+static bool tipc_node_cleanup(struct tipc_node *peer)
{
struct tipc_net *tn = tipc_net(peer->net);
bool deleted = false;
- spin_lock_bh(&tn->node_list_lock);
+ /* If lock held by tipc_node_stop() the node will be deleted anyway */
+ if (!spin_trylock_bh(&tn->node_list_lock))
+ return false;
+
tipc_node_write_lock(peer);
if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 636e6131769d..b57b1be7252b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1555,16 +1555,17 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
/**
* tipc_sk_anc_data_recv - optionally capture ancillary data for received message
* @m: descriptor for message info
- * @msg: received message header
+ * @skb: received message buffer
* @tsk: TIPC port associated with message
*
* Note: Ancillary data is not captured if not requested by receiver.
*
* Returns 0 if successful, otherwise errno
*/
-static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
+static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
struct tipc_sock *tsk)
{
+ struct tipc_msg *msg;
u32 anc_data[3];
u32 err;
u32 dest_type;
@@ -1573,6 +1574,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
if (likely(m->msg_controllen == 0))
return 0;
+ msg = buf_msg(skb);
/* Optionally capture errored message object(s) */
err = msg ? msg_errcode(msg) : 0;
@@ -1583,6 +1585,9 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
if (res)
return res;
if (anc_data[1]) {
+ if (skb_linearize(skb))
+ return -ENOMEM;
+ msg = buf_msg(skb);
res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
msg_data(msg));
if (res)
@@ -1744,9 +1749,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, including error code and rejected data */
tipc_sk_set_orig_addr(m, skb);
- rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+ rc = tipc_sk_anc_data_recv(m, skb, tsk);
if (unlikely(rc))
goto exit;
+ hdr = buf_msg(skb);
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
@@ -1856,9 +1862,10 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, incl. error code and rejected data */
if (!copied) {
tipc_sk_set_orig_addr(m, skb);
- rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+ rc = tipc_sk_anc_data_recv(m, skb, tsk);
if (rc)
break;
+ hdr = buf_msg(skb);
}
/* Copy data if msg ok, otherwise return error/partial data */
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 4bdea0057171..efb16f69bd2c 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -394,7 +394,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
iov.iov_base = &s;
iov.iov_len = sizeof(s);
msg.msg_name = NULL;
- iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len);
ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
if (ret == -EWOULDBLOCK)
return -EWOULDBLOCK;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 276edbc04f38..d753e362d2d9 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -489,7 +489,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
iov.iov_base = kaddr + offset;
iov.iov_len = size;
- iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+ iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
rc = tls_push_data(sk, &msg_iter, size,
flags, TLS_RECORD_TYPE_DATA);
kunmap(page);
@@ -538,7 +538,7 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
{
struct iov_iter msg_iter;
- iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+ iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 5cd88ba8acd1..7b1af8b59cd2 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -799,7 +799,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send);
bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
unsigned char record_type = TLS_RECORD_TYPE_DATA;
- bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+ bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool eor = !(msg->msg_flags & MSG_MORE);
size_t try_to_copy, copied = 0;
struct sk_msg *msg_pl, *msg_en;
@@ -1457,7 +1457,7 @@ int tls_sw_recvmsg(struct sock *sk,
bool cmsg = false;
int target, err = 0;
long timeo;
- bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+ bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
int num_async = 0;
flags |= nonblock;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 12b3edf70a7b..1615e503f8e3 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -272,11 +272,11 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
p1 = (u8*)(ht_capa);
p2 = (u8*)(ht_capa_mask);
- for (i = 0; i<sizeof(*ht_capa); i++)
+ for (i = 0; i < sizeof(*ht_capa); i++)
p1[i] &= p2[i];
}
-/* Do a logical ht_capa &= ht_capa_mask. */
+/* Do a logical vht_capa &= vht_capa_mask. */
void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
const struct ieee80211_vht_cap *vht_capa_mask)
{
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 744b5851bbf9..8d763725498c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7870,6 +7870,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
}
memset(&params, 0, sizeof(params));
+ params.beacon_csa.ftm_responder = -1;
if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
!info->attrs[NL80211_ATTR_CH_SWITCH_COUNT])
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d536b07582f8..f741d8376a46 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -642,11 +642,15 @@ static bool cfg80211_is_all_idle(void)
* All devices must be idle as otherwise if you are actively
* scanning some new beacon hints could be learned and would
* count as new regulatory hints.
+ * Also if there is any other active beaconing interface we
+ * need not issue a disconnect hint and reset any info such
+ * as chan dfs state, etc.
*/
list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
wdev_lock(wdev);
- if (wdev->conn || wdev->current_bss)
+ if (wdev->conn || wdev->current_bss ||
+ cfg80211_beaconing_iface_active(wdev))
is_all_idle = false;
wdev_unlock(wdev);
}
@@ -1171,6 +1175,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
rdev->wiphy.ht_capa_mod_mask);
+ cfg80211_oper_and_vht_capa(&connect->vht_capa_mask,
+ rdev->wiphy.vht_capa_mod_mask);
if (connkeys && connkeys->def >= 0) {
int idx;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index ef14d80ca03e..d473bd135da8 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1421,6 +1421,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
ies[pos + ext],
ext == 2))
pos = skip_ie(ies, ielen, pos);
+ else
+ break;
}
} else {
pos = skip_ie(ies, ielen, pos);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d49aa79b7997..5121729b8b63 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb,
}
len = *skb->data;
- needed = 1 + (len >> 4) + (len & 0x0f);
+ needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2;
if (!pskb_may_pull(skb, needed)) {
/* packet is too short to hold the addresses it claims
@@ -288,7 +288,7 @@ static struct sock *x25_find_listener(struct x25_address *addr,
sk_for_each(s, &x25_list)
if ((!strcmp(addr->x25_addr,
x25_sk(s)->source_addr.x25_addr) ||
- !strcmp(addr->x25_addr,
+ !strcmp(x25_sk(s)->source_addr.x25_addr,
null_x25_address.x25_addr)) &&
s->sk_state == TCP_LISTEN) {
/*
@@ -688,11 +688,15 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- len = strlen(addr->sx25_addr.x25_addr);
- for (i = 0; i < len; i++) {
- if (!isdigit(addr->sx25_addr.x25_addr[i])) {
- rc = -EINVAL;
- goto out;
+ /* check for the null_x25_address */
+ if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) {
+
+ len = strlen(addr->sx25_addr.x25_addr);
+ for (i = 0; i < len; i++) {
+ if (!isdigit(addr->sx25_addr.x25_addr[i])) {
+ rc = -EINVAL;
+ goto out;
+ }
}
}
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 3c12cae32001..afb26221d8a8 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -142,6 +142,15 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
sk->sk_state_change(sk);
break;
}
+ case X25_CALL_REQUEST:
+ /* call collision */
+ x25->causediag.cause = 0x01;
+ x25->causediag.diagnostic = 0x48;
+
+ x25_write_internal(sk, X25_CLEAR_REQUEST);
+ x25_disconnect(sk, EISCONN, 0x01, 0x48);
+ break;
+
case X25_CLEAR_REQUEST:
if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
goto out_clear;
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 4a9ee2d83158..140270a13d54 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -8,7 +8,6 @@ config XFRM
config XFRM_OFFLOAD
bool
- depends on XFRM
config XFRM_ALGO
tristate
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b669262682c9..dc4a9f1fb941 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2077,10 +2077,8 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
struct xfrm_mgr *km;
struct xfrm_policy *pol = NULL;
-#ifdef CONFIG_COMPAT
if (in_compat_syscall())
return -EOPNOTSUPP;
-#endif
if (!optval && !optlen) {
xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ca7a207b81a9..c9a84e22f5d5 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2621,10 +2621,8 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct xfrm_link *link;
int type, err;
-#ifdef CONFIG_COMPAT
if (in_compat_syscall())
return -EOPNOTSUPP;
-#endif
type = nlh->nlmsg_type;
if (type > XFRM_MSG_MAX)