summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-07-30 10:08:09 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-07-30 10:08:09 +0200
commitd2fc88a61b4ea99f574bde16e92718e22f312136 (patch)
treef256841ebcbc3b90fc29d7fa751610ba9aac5a2b /net
parente16f4f3e0b7daecd48d4f944ab4147c1a6cb16a8 (diff)
parentacb1872577b346bd15ab3a3f8dff780d6cca4b70 (diff)
downloadlinux-stable-d2fc88a61b4ea99f574bde16e92718e22f312136.tar.gz
linux-stable-d2fc88a61b4ea99f574bde16e92718e22f312136.tar.bz2
linux-stable-d2fc88a61b4ea99f574bde16e92718e22f312136.zip
Merge 4.18-rc7 into driver-core-next
We need the driver core changes in here as well for testing. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/9p/client.c3
-rw-r--r--net/Makefile4
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c11
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v.c4
-rw-r--r--net/batman-adv/debugfs.c40
-rw-r--r--net/batman-adv/debugfs.h11
-rw-r--r--net/batman-adv/hard-interface.c37
-rw-r--r--net/batman-adv/translation-table.c7
-rw-r--r--net/bluetooth/af_bluetooth.c7
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/bpf/test_run.c17
-rw-r--r--net/bpfilter/Kconfig2
-rw-r--r--net/bpfilter/Makefile17
-rw-r--r--net/bpfilter/bpfilter_kern.c11
-rw-r--r--net/bpfilter/bpfilter_umh_blob.S7
-rw-r--r--net/caif/caif_dev.c4
-rw-r--r--net/caif/caif_socket.c12
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/raw.c2
-rw-r--r--net/core/datagram.c13
-rw-r--r--net/core/dev_ioctl.c11
-rw-r--r--net/core/fib_rules.c80
-rw-r--r--net/core/filter.c235
-rw-r--r--net/core/gen_stats.c16
-rw-r--r--net/core/page_pool.c2
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/skbuff.c14
-rw-r--r--net/core/sock.c13
-rw-r--r--net/dccp/ccids/ccid3.c16
-rw-r--r--net/dccp/dccp.h3
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/dccp/proto.c13
-rw-r--r--net/decnet/af_decnet.c6
-rw-r--r--net/dns_resolver/dns_key.c28
-rw-r--r--net/ieee802154/6lowpan/core.c6
-rw-r--r--net/ieee802154/socket.c4
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/igmp.c61
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_sockglue.c11
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c18
-rw-r--r--net/ipv4/sysctl_net_ipv4.c23
-rw-r--r--net/ipv4/tcp.c39
-rw-r--r--net/ipv4/tcp_dctcp.c75
-rw-r--r--net/ipv4/tcp_input.c78
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_output.c36
-rw-r--r--net/ipv4/udp.c10
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c12
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/calipso.c9
-rw-r--r--net/ipv6/datagram.c7
-rw-r--r--net/ipv6/exthdrs.c111
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ip6_fib.c156
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c32
-rw-r--r--net/ipv6/mcast.c76
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c8
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c18
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/route.c51
-rw-r--r--net/ipv6/seg6_hmac.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/iucv/af_iucv.c7
-rw-r--r--net/kcm/kcmsock.c10
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/rx.c5
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac80211/util.c3
-rw-r--r--net/netfilter/Kconfig25
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/nf_conncount.c52
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c8
-rw-r--r--net/netfilter/nf_log.c13
-rw-r--r--net/netfilter/nf_tables_api.c304
-rw-r--r--net/netfilter/nf_tables_set_core.c28
-rw-r--r--net/netfilter/nfnetlink_queue.c3
-rw-r--r--net/netfilter/nft_compat.c13
-rw-r--r--net/netfilter/nft_immediate.c3
-rw-r--r--net/netfilter/nft_lookup.c13
-rw-r--r--net/netfilter/nft_set_bitmap.c19
-rw-r--r--net/netfilter/nft_set_hash.c30
-rw-r--r--net/netfilter/nft_set_rbtree.c26
-rw-r--r--net/netfilter/xt_TPROXY.c8
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/llcp_commands.c9
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/nsh/nsh.c2
-rw-r--r--net/packet/af_packet.c27
-rw-r--r--net/phonet/socket.c9
-rw-r--r--net/qrtr/qrtr.c15
-rw-r--r--net/rds/connection.c11
-rw-r--r--net/rds/loop.c56
-rw-r--r--net/rds/loop.h2
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/af_rxrpc.c10
-rw-r--r--net/sched/act_csum.c6
-rw-r--r--net/sched/act_tunnel_key.c6
-rw-r--r--net/sched/cls_api.c4
-rw-r--r--net/sched/cls_flower.c21
-rw-r--r--net/sched/sch_fq_codel.c25
-rw-r--r--net/sched/sch_hfsc.c4
-rw-r--r--net/sctp/chunk.c4
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/smc/af_smc.c138
-rw-r--r--net/smc/smc.h8
-rw-r--r--net/smc/smc_clc.c3
-rw-r--r--net/smc/smc_close.c2
-rw-r--r--net/smc/smc_tx.c12
-rw-r--r--net/socket.c50
-rw-r--r--net/strparser/strparser.c22
-rw-r--r--net/tipc/discover.c18
-rw-r--r--net/tipc/net.c17
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tipc/socket.c14
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c29
-rw-r--r--net/unix/af_unix.c30
-rw-r--r--net/vmw_vsock/af_vsock.c19
-rw-r--r--net/vmw_vsock/virtio_transport.c2
-rw-r--r--net/wireless/nl80211.c60
-rw-r--r--net/wireless/reg.c28
-rw-r--r--net/wireless/trace.h18
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/xdp/xsk.c37
-rw-r--r--net/xdp/xsk_queue.h9
160 files changed, 1830 insertions, 1136 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 73a65789271b..8ccee3d01822 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -693,7 +693,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
out_unlock:
rcu_read_unlock();
out:
- NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_flush_final(skb, pp, flush);
return pp;
}
diff --git a/net/9p/client.c b/net/9p/client.c
index 18c5271910dc..5c1343195292 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -225,7 +225,8 @@ static int parse_opts(char *opts, struct p9_client *clnt)
}
free_and_return:
- v9fs_put_trans(clnt->trans_mod);
+ if (ret)
+ v9fs_put_trans(clnt->trans_mod);
kfree(tmp_options);
return ret;
}
diff --git a/net/Makefile b/net/Makefile
index 13ec0d5415c7..bdaf53925acd 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -20,11 +20,7 @@ obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX) += unix/
obj-$(CONFIG_NET) += ipv6/
-ifneq ($(CC_CAN_LINK),y)
-$(warning CC cannot link executables. Skipping bpfilter.)
-else
obj-$(CONFIG_BPFILTER) += bpfilter/
-endif
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 55fdba05d7d9..9b6bc5abe946 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1869,7 +1869,7 @@ static const struct proto_ops atalk_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = atalk_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = atalk_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = atalk_compat_ioctl,
diff --git a/net/atm/common.c b/net/atm/common.c
index ff5748b2190f..a7a68e509628 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -647,11 +647,16 @@ out:
return error;
}
-__poll_t vcc_poll_mask(struct socket *sock, __poll_t events)
+__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
- struct atm_vcc *vcc = ATM_SD(sock);
- __poll_t mask = 0;
+ struct atm_vcc *vcc;
+ __poll_t mask;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
+
+ vcc = ATM_SD(sock);
/* exceptional events */
if (sk->sk_err)
diff --git a/net/atm/common.h b/net/atm/common.h
index 526796ad230f..5850649068bb 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
-__poll_t vcc_poll_mask(struct socket *sock, __poll_t events);
+__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 9f75092fe778..2cb10af16afc 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -113,7 +113,7 @@ static const struct proto_ops pvc_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pvc_getname,
- .poll_mask = vcc_poll_mask,
+ .poll = vcc_poll,
.ioctl = vcc_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = vcc_compat_ioctl,
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 53f4ad7087b1..2f91b766ac42 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -636,7 +636,7 @@ static const struct proto_ops svc_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = svc_accept,
.getname = svc_getname,
- .poll_mask = vcc_poll_mask,
+ .poll = vcc_poll,
.ioctl = svc_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = svc_compat_ioctl,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index d1d2442ce573..c603d33d5410 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1941,7 +1941,7 @@ static const struct proto_ops ax25_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = ax25_accept,
.getname = ax25_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = ax25_ioctl,
.listen = ax25_listen,
.shutdown = ax25_shutdown,
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index be09a9883825..73bf6a93a3cf 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2732,7 +2732,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
{
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
struct batadv_neigh_node *router;
- struct batadv_gw_node *curr_gw;
+ struct batadv_gw_node *curr_gw = NULL;
int ret = 0;
void *hdr;
@@ -2780,6 +2780,8 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
ret = 0;
out:
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
if (router_ifinfo)
batadv_neigh_ifinfo_put(router_ifinfo);
if (router)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ec93337ee259..6baec4e68898 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -927,7 +927,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
{
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
struct batadv_neigh_node *router;
- struct batadv_gw_node *curr_gw;
+ struct batadv_gw_node *curr_gw = NULL;
int ret = 0;
void *hdr;
@@ -995,6 +995,8 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
ret = 0;
out:
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
if (router_ifinfo)
batadv_neigh_ifinfo_put(router_ifinfo);
if (router)
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 4229b01ac7b5..87479c60670e 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -19,6 +19,7 @@
#include "debugfs.h"
#include "main.h"
+#include <linux/dcache.h>
#include <linux/debugfs.h>
#include <linux/err.h>
#include <linux/errno.h>
@@ -344,6 +345,25 @@ out:
}
/**
+ * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif
+ * @hard_iface: hard interface which was renamed
+ */
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
+{
+ const char *name = hard_iface->net_dev->name;
+ struct dentry *dir;
+ struct dentry *d;
+
+ dir = hard_iface->debug_dir;
+ if (!dir)
+ return;
+
+ d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
+ if (!d)
+ pr_err("Can't rename debugfs dir to %s\n", name);
+}
+
+/**
* batadv_debugfs_del_hardif() - delete the base directory for a hard interface
* in debugfs.
* @hard_iface: hard interface which is deleted.
@@ -414,6 +434,26 @@ out:
}
/**
+ * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif
+ * @dev: net_device which was renamed
+ */
+void batadv_debugfs_rename_meshif(struct net_device *dev)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ const char *name = dev->name;
+ struct dentry *dir;
+ struct dentry *d;
+
+ dir = bat_priv->debug_dir;
+ if (!dir)
+ return;
+
+ d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
+ if (!d)
+ pr_err("Can't rename debugfs dir to %s\n", name);
+}
+
+/**
* batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries
* @dev: netdev struct of the soft interface
*/
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 37b069698b04..08a592ffbee5 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -30,8 +30,10 @@ struct net_device;
void batadv_debugfs_init(void);
void batadv_debugfs_destroy(void);
int batadv_debugfs_add_meshif(struct net_device *dev);
+void batadv_debugfs_rename_meshif(struct net_device *dev);
void batadv_debugfs_del_meshif(struct net_device *dev);
int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface);
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
#else
@@ -49,6 +51,10 @@ static inline int batadv_debugfs_add_meshif(struct net_device *dev)
return 0;
}
+static inline void batadv_debugfs_rename_meshif(struct net_device *dev)
+{
+}
+
static inline void batadv_debugfs_del_meshif(struct net_device *dev)
{
}
@@ -60,6 +66,11 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
}
static inline
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
+{
+}
+
+static inline
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
{
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c405d15befd6..2f0d42f2f913 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -989,6 +989,32 @@ void batadv_hardif_remove_interfaces(void)
rtnl_unlock();
}
+/**
+ * batadv_hard_if_event_softif() - Handle events for soft interfaces
+ * @event: NETDEV_* event to handle
+ * @net_dev: net_device which generated an event
+ *
+ * Return: NOTIFY_* result
+ */
+static int batadv_hard_if_event_softif(unsigned long event,
+ struct net_device *net_dev)
+{
+ struct batadv_priv *bat_priv;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ batadv_sysfs_add_meshif(net_dev);
+ bat_priv = netdev_priv(net_dev);
+ batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
+ break;
+ case NETDEV_CHANGENAME:
+ batadv_debugfs_rename_meshif(net_dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
static int batadv_hard_if_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -997,12 +1023,8 @@ static int batadv_hard_if_event(struct notifier_block *this,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_priv *bat_priv;
- if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
- batadv_sysfs_add_meshif(net_dev);
- bat_priv = netdev_priv(net_dev);
- batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
- return NOTIFY_DONE;
- }
+ if (batadv_softif_is_valid(net_dev))
+ return batadv_hard_if_event_softif(event, net_dev);
hard_iface = batadv_hardif_get_by_netdev(net_dev);
if (!hard_iface && (event == NETDEV_REGISTER ||
@@ -1051,6 +1073,9 @@ static int batadv_hard_if_event(struct notifier_block *this,
if (batadv_is_wifi_hardif(hard_iface))
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
break;
+ case NETDEV_CHANGENAME:
+ batadv_debugfs_rename_hardif(hard_iface);
+ break;
default:
break;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 3986551397ca..12a2b7d21376 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1705,7 +1705,9 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
ether_addr_copy(common->addr, tt_addr);
common->vid = vid;
- common->flags = flags;
+ if (!is_multicast_ether_addr(common->addr))
+ common->flags = flags & (~BATADV_TT_SYNC_MASK);
+
tt_global_entry->roam_at = 0;
/* node must store current time in case of roaming. This is
* needed to purge this entry out on timeout (if nobody claims
@@ -1768,7 +1770,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
* TT_CLIENT_TEMP, therefore they have to be copied in the
* client entry
*/
- common->flags |= flags & (~BATADV_TT_SYNC_MASK);
+ if (!is_multicast_ether_addr(common->addr))
+ common->flags |= flags & (~BATADV_TT_SYNC_MASK);
/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
* one originator left in the list and we previously received a
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 510ab4f55df5..3264e1873219 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -437,13 +437,16 @@ static inline __poll_t bt_accept_poll(struct sock *parent)
return 0;
}
-__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events)
+__poll_t bt_sock_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
BT_DBG("sock %p, sk %p", sock, sk);
+ poll_wait(file, sk_sleep(sk), wait);
+
if (sk->sk_state == BT_LISTEN)
return bt_accept_poll(sk);
@@ -475,7 +478,7 @@ __poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events)
return mask;
}
-EXPORT_SYMBOL(bt_sock_poll_mask);
+EXPORT_SYMBOL(bt_sock_poll);
int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index d6c099861538..1506e1632394 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1975,7 +1975,7 @@ static const struct proto_ops hci_sock_ops = {
.sendmsg = hci_sock_sendmsg,
.recvmsg = hci_sock_recvmsg,
.ioctl = hci_sock_ioctl,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = hci_sock_setsockopt,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 742a190034e6..686bdc6b35b0 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1653,7 +1653,7 @@ static const struct proto_ops l2cap_sock_ops = {
.getname = l2cap_sock_getname,
.sendmsg = l2cap_sock_sendmsg,
.recvmsg = l2cap_sock_recvmsg,
- .poll_mask = bt_sock_poll_mask,
+ .poll = bt_sock_poll,
.ioctl = bt_sock_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 1cf57622473a..d606e9212291 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1049,7 +1049,7 @@ static const struct proto_ops rfcomm_sock_ops = {
.setsockopt = rfcomm_sock_setsockopt,
.getsockopt = rfcomm_sock_getsockopt,
.ioctl = rfcomm_sock_ioctl,
- .poll_mask = bt_sock_poll_mask,
+ .poll = bt_sock_poll,
.socketpair = sock_no_socketpair,
.mmap = sock_no_mmap
};
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index d60dbc61d170..413b8ee49fec 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1197,7 +1197,7 @@ static const struct proto_ops sco_sock_ops = {
.getname = sco_sock_getname,
.sendmsg = sco_sock_sendmsg,
.recvmsg = sco_sock_recvmsg,
- .poll_mask = bt_sock_poll_mask,
+ .poll = bt_sock_poll,
.ioctl = bt_sock_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 68c3578343b4..22a78eedf4b1 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -96,6 +96,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
u32 retval, duration;
+ int hh_len = ETH_HLEN;
struct sk_buff *skb;
void *data;
int ret;
@@ -131,12 +132,22 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb_reset_network_header(skb);
if (is_l2)
- __skb_push(skb, ETH_HLEN);
+ __skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
retval = bpf_test_run(prog, skb, repeat, &duration);
- if (!is_l2)
- __skb_push(skb, ETH_HLEN);
+ if (!is_l2) {
+ if (skb_headroom(skb) < hh_len) {
+ int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
+
+ if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ }
+ memset(__skb_push(skb, hh_len), 0, hh_len);
+ }
+
size = skb->len;
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index a948b072c28f..76deb6615883 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -1,6 +1,5 @@
menuconfig BPFILTER
bool "BPF based packet filtering framework (BPFILTER)"
- default n
depends on NET && BPF && INET
help
This builds experimental bpfilter framework that is aiming to
@@ -9,6 +8,7 @@ menuconfig BPFILTER
if BPFILTER
config BPFILTER_UMH
tristate "bpfilter kernel module with user mode helper"
+ depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC))
default m
help
This builds bpfilter kernel module with embedded user mode helper
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index dd86b022eff0..39c6980b5d99 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -15,20 +15,7 @@ ifeq ($(CONFIG_BPFILTER_UMH), y)
HOSTLDFLAGS += -static
endif
-# a bit of elf magic to convert bpfilter_umh binary into a binary blob
-# inside bpfilter_umh.o elf file referenced by
-# _binary_net_bpfilter_bpfilter_umh_start symbol
-# which bpfilter_kern.c passes further into umh blob loader at run-time
-quiet_cmd_copy_umh = GEN $@
- cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
- $(OBJCOPY) -I binary \
- `LC_ALL=C objdump -f net/bpfilter/bpfilter_umh \
- |awk -F' |,' '/file format/{print "-O",$$NF} \
- /^architecture:/{print "-B",$$2}'` \
- --rename-section .data=.init.rodata $< $@
-
-$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
- $(call cmd,copy_umh)
+$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
-bpfilter-objs += bpfilter_kern.o bpfilter_umh.o
+bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 09522573f611..f0fc182d3db7 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -10,11 +10,8 @@
#include <linux/file.h>
#include "msgfmt.h"
-#define UMH_start _binary_net_bpfilter_bpfilter_umh_start
-#define UMH_end _binary_net_bpfilter_bpfilter_umh_end
-
-extern char UMH_start;
-extern char UMH_end;
+extern char bpfilter_umh_start;
+extern char bpfilter_umh_end;
static struct umh_info info;
/* since ip_getsockopt() can run in parallel, serialize access to umh */
@@ -93,7 +90,9 @@ static int __init load_umh(void)
int err;
/* fork usermode process */
- err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info);
+ err = fork_usermode_blob(&bpfilter_umh_start,
+ &bpfilter_umh_end - &bpfilter_umh_start,
+ &info);
if (err)
return err;
pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
new file mode 100644
index 000000000000..40311d10d2f2
--- /dev/null
+++ b/net/bpfilter/bpfilter_umh_blob.S
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+ .section .init.rodata, "a"
+ .global bpfilter_umh_start
+bpfilter_umh_start:
+ .incbin "net/bpfilter/bpfilter_umh"
+ .global bpfilter_umh_end
+bpfilter_umh_end:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e0adcd123f48..711d7156efd8 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -131,8 +131,10 @@ static void caif_flow_cb(struct sk_buff *skb)
caifd = caif_get(skb->dev);
WARN_ON(caifd == NULL);
- if (caifd == NULL)
+ if (!caifd) {
+ rcu_read_unlock();
return;
+ }
caifd_hold(caifd);
rcu_read_unlock();
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index c7991867d622..a6fb1b3bcad9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -934,11 +934,15 @@ static int caif_release(struct socket *sock)
}
/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
-static __poll_t caif_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t caif_poll(struct file *file,
+ struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
+ __poll_t mask;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
- __poll_t mask = 0;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
/* exceptional events? */
if (sk->sk_err)
@@ -972,7 +976,7 @@ static const struct proto_ops caif_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = caif_poll_mask,
+ .poll = caif_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -993,7 +997,7 @@ static const struct proto_ops caif_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = caif_poll_mask,
+ .poll = caif_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9393f25df08d..0af8f0db892a 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1660,7 +1660,7 @@ static const struct proto_ops bcm_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/can/raw.c b/net/can/raw.c
index fd7e2f49ea6a..1051eee82581 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -843,7 +843,7 @@ static const struct proto_ops raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = raw_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f19bf3dc2bd6..9938952c5c78 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -819,8 +819,9 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
/**
* datagram_poll - generic datagram poll
+ * @file: file struct
* @sock: socket
- * @events to wait for
+ * @wait: poll table
*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
@@ -830,10 +831,14 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
-__poll_t datagram_poll_mask(struct socket *sock, __poll_t events)
+__poll_t datagram_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
- __poll_t mask = 0;
+ __poll_t mask;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -866,4 +871,4 @@ __poll_t datagram_poll_mask(struct socket *sock, __poll_t events)
return mask;
}
-EXPORT_SYMBOL(datagram_poll_mask);
+EXPORT_SYMBOL(datagram_poll);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index a04e1e88bf3a..50537ff961a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -285,16 +285,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
if (ifr->ifr_qlen < 0)
return -EINVAL;
if (dev->tx_queue_len ^ ifr->ifr_qlen) {
- unsigned int orig_len = dev->tx_queue_len;
-
- dev->tx_queue_len = ifr->ifr_qlen;
- err = call_netdevice_notifiers(
- NETDEV_CHANGE_TX_QUEUE_LEN, dev);
- err = notifier_to_errno(err);
- if (err) {
- dev->tx_queue_len = orig_len;
+ err = dev_change_tx_queue_len(dev, ifr->ifr_qlen);
+ if (err)
return err;
- }
}
return 0;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 126ffc5bc630..f64aa13811ea 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -416,6 +416,14 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
if (rule->mark && r->mark != rule->mark)
continue;
+ if (rule->suppress_ifgroup != -1 &&
+ r->suppress_ifgroup != rule->suppress_ifgroup)
+ continue;
+
+ if (rule->suppress_prefixlen != -1 &&
+ r->suppress_prefixlen != rule->suppress_prefixlen)
+ continue;
+
if (rule->mark_mask && r->mark_mask != rule->mark_mask)
continue;
@@ -436,6 +444,9 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
if (rule->ip_proto && r->ip_proto != rule->ip_proto)
continue;
+ if (rule->proto && r->proto != rule->proto)
+ continue;
+
if (fib_rule_port_range_set(&rule->sport_range) &&
!fib_rule_port_range_compare(&r->sport_range,
&rule->sport_range))
@@ -645,6 +656,73 @@ errout:
return err;
}
+static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
+ struct nlattr **tb, struct fib_rule *rule)
+{
+ struct fib_rule *r;
+
+ list_for_each_entry(r, &ops->rules_list, list) {
+ if (r->action != rule->action)
+ continue;
+
+ if (r->table != rule->table)
+ continue;
+
+ if (r->pref != rule->pref)
+ continue;
+
+ if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
+ continue;
+
+ if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
+ continue;
+
+ if (r->mark != rule->mark)
+ continue;
+
+ if (r->suppress_ifgroup != rule->suppress_ifgroup)
+ continue;
+
+ if (r->suppress_prefixlen != rule->suppress_prefixlen)
+ continue;
+
+ if (r->mark_mask != rule->mark_mask)
+ continue;
+
+ if (r->tun_id != rule->tun_id)
+ continue;
+
+ if (r->fr_net != rule->fr_net)
+ continue;
+
+ if (r->l3mdev != rule->l3mdev)
+ continue;
+
+ if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
+ !uid_eq(r->uid_range.end, rule->uid_range.end))
+ continue;
+
+ if (r->ip_proto != rule->ip_proto)
+ continue;
+
+ if (r->proto != rule->proto)
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->sport_range,
+ &rule->sport_range))
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->dport_range,
+ &rule->dport_range))
+ continue;
+
+ if (!ops->compare(r, frh, tb))
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -679,7 +757,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
- rule_find(ops, frh, tb, rule, user_priority)) {
+ rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
goto errout_free;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..06da770f543f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -459,11 +459,21 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
(!unaligned_ok && offset >= 0 &&
offset + ip_align >= 0 &&
offset + ip_align % size == 0))) {
+ bool ldx_off_ok = offset <= S16_MAX;
+
*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
- *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian);
- *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, BPF_REG_D,
- offset);
+ *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
+ size, 2 + endian + (!ldx_off_ok * 2));
+ if (ldx_off_ok) {
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+ BPF_REG_D, offset);
+ } else {
+ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+ BPF_REG_TMP, 0);
+ }
if (endian)
*insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
*insn++ = BPF_JMP_A(8);
@@ -1762,6 +1772,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING,
};
+static inline int sk_skb_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ int err = __bpf_try_make_writable(skb, write_len);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return err;
+}
+
+BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+ /* Idea is the following: should the needed direct read/write
+ * test fail during runtime, we can pull in more data and redo
+ * again, since implicitly, we invalidate previous checks here.
+ *
+ * Or, since we know how much we need to make read/writeable,
+ * this can be done once at the program beginning for direct
+ * access case. By this we overcome limitations of only current
+ * headroom being accessible.
+ */
+ return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto sk_skb_pull_data_proto = {
+ .func = sk_skb_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
u64, from, u64, to, u64, flags)
{
@@ -2779,7 +2820,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
static u32 __bpf_skb_max_len(const struct sk_buff *skb)
{
- return skb->dev->mtu + skb->dev->hard_header_len;
+ return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
+ SKB_MAX_ALLOC;
}
static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
@@ -2863,8 +2905,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
return __skb_trim_rcsum(skb, new_len);
}
-BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
- u64, flags)
+static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
+ u64 flags)
{
u32 max_len = __bpf_skb_max_len(skb);
u32 min_len = __bpf_skb_min_len(skb);
@@ -2900,6 +2942,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
if (!ret && skb_is_gso(skb))
skb_gso_reset(skb);
}
+ return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_pointers(skb);
return ret;
@@ -2914,9 +2963,27 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
.arg3_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags)
{
+ int ret = __bpf_skb_change_tail(skb, new_len, flags);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_tail_proto = {
+ .func = sk_skb_change_tail,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
+ u64 flags)
+{
u32 max_len = __bpf_skb_max_len(skb);
u32 new_len = skb->len + head_room;
int ret;
@@ -2941,8 +3008,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb);
}
+ return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_head(skb, head_room, flags);
+
bpf_compute_data_pointers(skb);
- return 0;
+ return ret;
}
static const struct bpf_func_proto bpf_skb_change_head_proto = {
@@ -2954,6 +3029,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_head(skb, head_room, flags);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_head_proto = {
+ .func = sk_skb_change_head,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3046,12 +3138,16 @@ static int __bpf_tx_xdp(struct net_device *dev,
u32 index)
{
struct xdp_frame *xdpf;
- int sent;
+ int err, sent;
if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP;
}
+ err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+ if (unlikely(err))
+ return err;
+
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@@ -3285,7 +3381,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
goto err;
}
- if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ err = xdp_ok_fwd_dev(fwd, skb->len);
+ if (unlikely(err))
goto err;
skb->dev = fwd;
@@ -4073,8 +4170,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
+ params->ifindex = dev->ifindex;
- return dev->ifindex;
+ return 0;
}
#endif
@@ -4098,7 +4196,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* verify forwarding is enabled on this interface */
in_dev = __in_dev_get_rcu(dev);
if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
- return 0;
+ return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl4.flowi4_iif = 1;
@@ -4123,7 +4221,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = fib_get_table(net, tbid);
if (unlikely(!tb))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else {
@@ -4135,8 +4233,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
}
- if (err || res.type != RTN_UNICAST)
- return 0;
+ if (err) {
+ /* map fib lookup errors to RTN_ type */
+ if (err == -EINVAL)
+ return BPF_FIB_LKUP_RET_BLACKHOLE;
+ if (err == -EHOSTUNREACH)
+ return BPF_FIB_LKUP_RET_UNREACHABLE;
+ if (err == -EACCES)
+ return BPF_FIB_LKUP_RET_PROHIBIT;
+
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+ }
+
+ if (res.type != RTN_UNICAST)
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
if (res.fi->fib_nhs > 1)
fib_select_path(net, &res, &fl4, NULL);
@@ -4144,19 +4254,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) {
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
if (params->tot_len > mtu)
- return 0;
+ return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
nh = &res.fi->fib_nh[res.nh_sel];
/* do not handle lwt encaps right now */
if (nh->nh_lwtstate)
- return 0;
+ return BPF_FIB_LKUP_RET_UNSUPP_LWT;
dev = nh->nh_dev;
- if (unlikely(!dev))
- return 0;
-
if (nh->nh_gw)
params->ipv4_dst = nh->nh_gw;
@@ -4166,10 +4273,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
* rcu_read_lock_bh is not needed here
*/
neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
- if (neigh)
- return bpf_fib_set_fwd_params(params, neigh, dev);
+ if (!neigh)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
- return 0;
+ return bpf_fib_set_fwd_params(params, neigh, dev);
}
#endif
@@ -4190,7 +4297,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev))
@@ -4198,7 +4305,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
idev = __in6_dev_get_safely(dev);
if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
- return 0;
+ return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl6.flowi6_iif = 1;
@@ -4225,7 +4332,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
} else {
@@ -4238,11 +4345,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
}
if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+
+ if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
+ switch (f6i->fib6_type) {
+ case RTN_BLACKHOLE:
+ return BPF_FIB_LKUP_RET_BLACKHOLE;
+ case RTN_UNREACHABLE:
+ return BPF_FIB_LKUP_RET_UNREACHABLE;
+ case RTN_PROHIBIT:
+ return BPF_FIB_LKUP_RET_PROHIBIT;
+ default:
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+ }
+ }
- if (unlikely(f6i->fib6_flags & RTF_REJECT ||
- f6i->fib6_type != RTN_UNICAST))
- return 0;
+ if (f6i->fib6_type != RTN_UNICAST)
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@@ -4252,11 +4371,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) {
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
if (params->tot_len > mtu)
- return 0;
+ return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
if (f6i->fib6_nh.nh_lwtstate)
- return 0;
+ return BPF_FIB_LKUP_RET_UNSUPP_LWT;
if (f6i->fib6_flags & RTF_GATEWAY)
*dst = f6i->fib6_nh.nh_gw;
@@ -4270,10 +4389,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
*/
neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
ndisc_hashfn, dst, dev);
- if (neigh)
- return bpf_fib_set_fwd_params(params, neigh, dev);
+ if (!neigh)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
- return 0;
+ return bpf_fib_set_fwd_params(params, neigh, dev);
}
#endif
@@ -4315,7 +4434,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
struct net *net = dev_net(skb->dev);
- int index = -EAFNOSUPPORT;
+ int rc = -EAFNOSUPPORT;
if (plen < sizeof(*params))
return -EINVAL;
@@ -4326,25 +4445,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
- index = bpf_ipv4_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv4_fib_lookup(net, params, flags, false);
break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- index = bpf_ipv6_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv6_fib_lookup(net, params, flags, false);
break;
#endif
}
- if (index > 0) {
+ if (!rc) {
struct net_device *dev;
- dev = dev_get_by_index_rcu(net, index);
+ dev = dev_get_by_index_rcu(net, params->ifindex);
if (!is_skb_forwardable(dev, skb))
- index = 0;
+ rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
- return index;
+ return rc;
}
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
@@ -4417,10 +4536,10 @@ static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
.arg4_type = ARG_CONST_SIZE
};
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
const void *, from, u32, len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
void *srh_tlvs, *srh_end, *ptr;
@@ -4446,9 +4565,6 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
memcpy(skb->data + offset, from, len);
return 0;
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
@@ -4464,7 +4580,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
u32, action, void *, param, u32, param_len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh;
@@ -4512,9 +4627,6 @@ BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
default:
return -EINVAL;
}
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
@@ -4530,7 +4642,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
s32, len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
void *srh_end, *srh_tlvs, *ptr;
@@ -4574,9 +4685,6 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
srh_state->hdrlen += len;
srh_state->valid = 0;
return 0;
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
@@ -4587,6 +4695,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
+#endif /* CONFIG_IPV6_SEG6_BPF */
bool bpf_helper_changes_pkt_data(void *func)
{
@@ -4595,9 +4704,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto ||
func == bpf_skb_change_head ||
+ func == sk_skb_change_head ||
func == bpf_skb_change_tail ||
+ func == sk_skb_change_tail ||
func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data ||
+ func == sk_skb_pull_data ||
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
@@ -4605,11 +4717,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data ||
func == bpf_xdp_adjust_tail ||
- func == bpf_lwt_push_encap ||
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
func == bpf_lwt_seg6_store_bytes ||
func == bpf_lwt_seg6_adjust_srh ||
- func == bpf_lwt_seg6_action
- )
+ func == bpf_lwt_seg6_action ||
+#endif
+ func == bpf_lwt_push_encap)
return true;
return false;
@@ -4849,11 +4962,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
case BPF_FUNC_skb_pull_data:
- return &bpf_skb_pull_data_proto;
+ return &sk_skb_pull_data_proto;
case BPF_FUNC_skb_change_tail:
- return &bpf_skb_change_tail_proto;
+ return &sk_skb_change_tail_proto;
case BPF_FUNC_skb_change_head:
- return &bpf_skb_change_head_proto;
+ return &sk_skb_change_head_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
@@ -4944,12 +5057,14 @@ static const struct bpf_func_proto *
lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
case BPF_FUNC_lwt_seg6_store_bytes:
return &bpf_lwt_seg6_store_bytes_proto;
case BPF_FUNC_lwt_seg6_action:
return &bpf_lwt_seg6_action_proto;
case BPF_FUNC_lwt_seg6_adjust_srh:
return &bpf_lwt_seg6_adjust_srh_proto;
+#endif
default:
return lwt_out_func_proto(func_id, prog);
}
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index b2b2323bdc84..188d693cb251 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -77,8 +77,20 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
d->lock = lock;
spin_lock_bh(lock);
}
- if (d->tail)
- return gnet_stats_copy(d, type, NULL, 0, padattr);
+ if (d->tail) {
+ int ret = gnet_stats_copy(d, type, NULL, 0, padattr);
+
+ /* The initial attribute added in gnet_stats_copy() may be
+ * preceded by a padding attribute, in which case d->tail will
+ * end up pointing at the padding instead of the real attribute.
+ * Fix this so gnet_stats_finish_copy() adjusts the length of
+ * the right attribute.
+ */
+ if (ret == 0 && d->tail->nla_type == padattr)
+ d->tail = (struct nlattr *)((char *)d->tail +
+ NLA_ALIGN(d->tail->nla_len));
+ return ret;
+ }
return 0;
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 68bf07206744..43a932cb609b 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -269,7 +269,7 @@ static void __page_pool_empty_ring(struct page_pool *pool)
struct page *page;
/* Empty recycle ring */
- while ((page = ptr_ring_consume(&pool->ring))) {
+ while ((page = ptr_ring_consume_bh(&pool->ring))) {
/* Verify the refcnt invariant of cached pages */
if (!(page_ref_count(page) == 1))
pr_crit("%s() page_pool refcnt %d violation\n",
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ef61222fdef..e3f743c141b3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
return err;
}
- dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-
- __dev_notify_flags(dev, old_flags, ~0U);
+ if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
+ __dev_notify_flags(dev, old_flags, 0U);
+ } else {
+ dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+ __dev_notify_flags(dev, old_flags, ~0U);
+ }
return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c642304f178c..fb35b62af272 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -858,6 +858,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->cloned = 1;
n->nohdr = 0;
n->peeked = 0;
+ C(pfmemalloc);
n->destructor = NULL;
C(tail);
C(end);
@@ -3719,6 +3720,7 @@ normal:
net_warn_ratelimited(
"skb_segment: too many frags: %u %u\n",
pos, mss);
+ err = -EINVAL;
goto err;
}
@@ -3752,11 +3754,10 @@ skip_fraglist:
perform_csum_check:
if (!csum) {
- if (skb_has_shared_frag(nskb)) {
- err = __skb_linearize(nskb);
- if (err)
- goto err;
- }
+ if (skb_has_shared_frag(nskb) &&
+ __skb_linearize(nskb))
+ goto err;
+
if (!nskb->remcsum_offload)
nskb->ip_summed = CHECKSUM_NONE;
SKB_GSO_CB(nskb)->csum =
@@ -5276,8 +5277,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
if (npages >= 1 << order) {
page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP |
- __GFP_NOWARN |
- __GFP_NORETRY,
+ __GFP_NOWARN,
order);
if (page)
goto fill_page;
diff --git a/net/core/sock.c b/net/core/sock.c
index bcc41829a16d..bc2d7a37297f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2277,9 +2277,9 @@ int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
pfrag->offset += use;
sge = sg + sg_curr - 1;
- if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
- sg->offset + sg->length == orig_offset) {
- sg->length += use;
+ if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
+ sge->offset + sge->length == orig_offset) {
+ sge->length += use;
} else {
sge = sg + sg_curr;
sg_unmark_end(sge);
@@ -3243,7 +3243,8 @@ static int req_prot_init(const struct proto *prot)
rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
rsk_prot->obj_size, 0,
- prot->slab_flags, NULL);
+ SLAB_ACCOUNT | prot->slab_flags,
+ NULL);
if (!rsk_prot->slab) {
pr_crit("%s: Can't create request sock SLAB cache!\n",
@@ -3258,7 +3259,8 @@ int proto_register(struct proto *prot, int alloc_slab)
if (alloc_slab) {
prot->slab = kmem_cache_create_usercopy(prot->name,
prot->obj_size, 0,
- SLAB_HWCACHE_ALIGN | prot->slab_flags,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
+ prot->slab_flags,
prot->useroffset, prot->usersize,
NULL);
@@ -3281,6 +3283,7 @@ int proto_register(struct proto *prot, int alloc_slab)
kmem_cache_create(prot->twsk_prot->twsk_slab_name,
prot->twsk_prot->twsk_obj_size,
0,
+ SLAB_ACCOUNT |
prot->slab_flags,
NULL);
if (prot->twsk_prot->twsk_slab == NULL)
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 8b5ba6dffac7..12877a1514e7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -600,7 +600,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
{
struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
- ktime_t now = ktime_get_real();
+ ktime_t now = ktime_get();
s64 delta = 0;
switch (fbtype) {
@@ -625,15 +625,14 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
case CCID3_FBACK_PERIODIC:
delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback);
if (delta <= 0)
- DCCP_BUG("delta (%ld) <= 0", (long)delta);
- else
- hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
+ delta = 1;
+ hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
break;
default:
return;
}
- ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
+ ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta,
hc->rx_x_recv, hc->rx_pinv);
hc->rx_tstamp_last_feedback = now;
@@ -680,7 +679,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
static u32 ccid3_first_li(struct sock *sk)
{
struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
- u32 x_recv, p, delta;
+ u32 x_recv, p;
+ s64 delta;
u64 fval;
if (hc->rx_rtt == 0) {
@@ -688,7 +688,9 @@ static u32 ccid3_first_li(struct sock *sk)
hc->rx_rtt = DCCP_FALLBACK_RTT;
}
- delta = ktime_to_us(net_timedelta(hc->rx_tstamp_last_feedback));
+ delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback);
+ if (delta <= 0)
+ delta = 1;
x_recv = scaled_div32(hc->rx_bytes_recv, delta);
if (x_recv == 0) { /* would also trigger divide-by-zero */
DCCP_WARN("X_recv==0\n");
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 0ea2ee56ac1b..f91e3816806b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,7 +316,8 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
-__poll_t dccp_poll_mask(struct socket *sock, __poll_t events);
+__poll_t dccp_poll(struct file *file, struct socket *sock,
+ poll_table *wait);
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
void dccp_req_err(struct sock *sk, u64 seq);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a9e478cd3787..b08feb219b44 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -984,7 +984,7 @@ static const struct proto_ops inet_dccp_ops = {
.accept = inet_accept,
.getname = inet_getname,
/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
- .poll_mask = dccp_poll_mask,
+ .poll = dccp_poll,
.ioctl = inet_ioctl,
/* FIXME: work on inet_listen to rename it to sock_common_listen */
.listen = inet_dccp_listen,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 17fc4e0166ba..6344f1b18a6a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1070,7 +1070,7 @@ static const struct proto_ops inet6_dccp_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet6_getname,
- .poll_mask = dccp_poll_mask,
+ .poll = dccp_poll,
.ioctl = inet6_ioctl,
.listen = inet_dccp_listen,
.shutdown = inet_shutdown,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ca21c1c76da0..0d56e36a6db7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -312,11 +312,20 @@ int dccp_disconnect(struct sock *sk, int flags)
EXPORT_SYMBOL_GPL(dccp_disconnect);
-__poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
+/*
+ * Wait for a DCCP event.
+ *
+ * Note that we don't need to lock the socket, as the upper poll layers
+ * take care of normal races (between the test and the event) and we don't
+ * go look at any of the socket buffers directly.
+ */
+__poll_t dccp_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
__poll_t mask;
struct sock *sk = sock->sk;
+ sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == DCCP_LISTEN)
return inet_csk_listen_poll(sk);
@@ -358,7 +367,7 @@ __poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
return mask;
}
-EXPORT_SYMBOL_GPL(dccp_poll_mask);
+EXPORT_SYMBOL_GPL(dccp_poll);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 9a686d890bfa..7d6ff983ba2c 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1207,11 +1207,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
}
-static __poll_t dn_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- __poll_t mask = datagram_poll_mask(sock, events);
+ __poll_t mask = datagram_poll(file, sock, wait);
if (!skb_queue_empty(&scp->other_receive_queue))
mask |= EPOLLRDBAND;
@@ -2331,7 +2331,7 @@ static const struct proto_ops dn_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = dn_accept,
.getname = dn_getname,
- .poll_mask = dn_poll_mask,
+ .poll = dn_poll,
.ioctl = dn_ioctl,
.listen = dn_listen,
.shutdown = dn_shutdown,
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 40c851693f77..0c9478b91fa5 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -86,35 +86,39 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
opt++;
kdebug("options: '%s'", opt);
do {
+ int opt_len, opt_nlen;
const char *eq;
- int opt_len, opt_nlen, opt_vlen, tmp;
+ char optval[128];
next_opt = memchr(opt, '#', end - opt) ?: end;
opt_len = next_opt - opt;
- if (opt_len <= 0 || opt_len > 128) {
+ if (opt_len <= 0 || opt_len > sizeof(optval)) {
pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n",
opt_len);
return -EINVAL;
}
- eq = memchr(opt, '=', opt_len) ?: end;
- opt_nlen = eq - opt;
- eq++;
- opt_vlen = next_opt - eq; /* will be -1 if no value */
+ eq = memchr(opt, '=', opt_len);
+ if (eq) {
+ opt_nlen = eq - opt;
+ eq++;
+ memcpy(optval, eq, next_opt - eq);
+ optval[next_opt - eq] = '\0';
+ } else {
+ opt_nlen = opt_len;
+ optval[0] = '\0';
+ }
- tmp = opt_vlen >= 0 ? opt_vlen : 0;
- kdebug("option '%*.*s' val '%*.*s'",
- opt_nlen, opt_nlen, opt, tmp, tmp, eq);
+ kdebug("option '%*.*s' val '%s'",
+ opt_nlen, opt_nlen, opt, optval);
/* see if it's an error number representing a DNS error
* that's to be recorded as the result in this key */
if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 &&
memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) {
kdebug("dns error number option");
- if (opt_vlen <= 0)
- goto bad_option_value;
- ret = kstrtoul(eq, 10, &derrno);
+ ret = kstrtoul(optval, 10, &derrno);
if (ret < 0)
goto bad_option_value;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 275449b0d633..3297e7fa9945 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -90,12 +90,18 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
return 0;
}
+static int lowpan_get_iflink(const struct net_device *dev)
+{
+ return lowpan_802154_dev(dev)->wdev->ifindex;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
.ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_open = lowpan_open,
.ndo_stop = lowpan_stop,
.ndo_neigh_construct = lowpan_neigh_construct,
+ .ndo_get_iflink = lowpan_get_iflink,
};
static void lowpan_setup(struct net_device *ldev)
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a0768d2759b8..a60658c85a9a 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -423,7 +423,7 @@ static const struct proto_ops ieee802154_raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = ieee802154_sock_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -969,7 +969,7 @@ static const struct proto_ops ieee802154_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = ieee802154_sock_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 15e125558c76..b403499fdabe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
- .poll_mask = tcp_poll_mask,
+ .poll = tcp_poll,
.ioctl = inet_ioctl,
.listen = inet_listen,
.shutdown = inet_shutdown,
@@ -1021,7 +1021,7 @@ const struct proto_ops inet_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
- .poll_mask = udp_poll_mask,
+ .poll = udp_poll,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
@@ -1042,7 +1042,7 @@ EXPORT_SYMBOL(inet_dgram_ops);
/*
* For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
- * udp_poll_mask
+ * udp_poll
*/
static const struct proto_ops inet_sockraw_ops = {
.family = PF_INET,
@@ -1053,7 +1053,7 @@ static const struct proto_ops inet_sockraw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b21833651394..e46cdd310e5f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -300,6 +300,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
struct flowi4 fl4 = {
.flowi4_iif = LOOPBACK_IFINDEX,
+ .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
.flowi4_scope = scope,
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..c9ec1603666b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -448,9 +448,7 @@ next_proto:
out_unlock:
rcu_read_unlock();
out:
- NAPI_GRO_CB(skb)->flush |= flush;
- skb_gro_remcsum_cleanup(skb, &grc);
- skb->remcsum_offload = 0;
+ skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
return pp;
}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 1859c473b21a..6a7d980105f6 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
out_unlock:
rcu_read_unlock();
out:
- NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_flush_final(skb, pp, flush);
return pp;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 85b617b655bc..28fef7d15959 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1200,13 +1200,13 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
spin_lock_bh(&im->lock);
if (pmc) {
im->interface = pmc->interface;
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- im->sfmode = pmc->sfmode;
- if (pmc->sfmode == MCAST_INCLUDE) {
+ if (im->sfmode == MCAST_INCLUDE) {
im->tomb = pmc->tomb;
im->sources = pmc->sources;
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = im->crcount;
+ psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ } else {
+ im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
}
in_dev_put(pmc->interface);
kfree(pmc);
@@ -1288,7 +1288,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
#endif
}
-static void igmp_group_added(struct ip_mc_list *im)
+static void igmp_group_added(struct ip_mc_list *im, unsigned int mode)
{
struct in_device *in_dev = im->interface;
#ifdef CONFIG_IP_MULTICAST
@@ -1316,7 +1316,13 @@ static void igmp_group_added(struct ip_mc_list *im)
}
/* else, v3 */
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ /* Based on RFC3376 5.1, for newly added INCLUDE SSM, we should
+ * not send filter-mode change record as the mode should be from
+ * IN() to IN(A).
+ */
+ if (mode == MCAST_EXCLUDE)
+ im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+
igmp_ifc_event(in_dev);
#endif
}
@@ -1381,8 +1387,7 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
/*
* A socket has joined a multicast group on device dev.
*/
-
-void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode)
{
struct ip_mc_list *im;
#ifdef CONFIG_IP_MULTICAST
@@ -1394,7 +1399,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
for_each_pmc_rtnl(in_dev, im) {
if (im->multiaddr == addr) {
im->users++;
- ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
+ ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
goto out;
}
}
@@ -1408,8 +1413,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
in_dev_hold(in_dev);
im->multiaddr = addr;
/* initial mode is (EX, empty) */
- im->sfmode = MCAST_EXCLUDE;
- im->sfcount[MCAST_EXCLUDE] = 1;
+ im->sfmode = mode;
+ im->sfcount[mode] = 1;
refcount_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
@@ -1426,12 +1431,17 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, im);
#endif
- igmp_group_added(im);
+ igmp_group_added(im, mode);
if (!in_dev->dead)
ip_rt_multicast_event(in_dev);
out:
return;
}
+
+void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+{
+ __ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ip_mc_inc_group);
static int ip_mc_check_iphdr(struct sk_buff *skb)
@@ -1688,7 +1698,7 @@ void ip_mc_remap(struct in_device *in_dev)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, pmc);
#endif
- igmp_group_added(pmc);
+ igmp_group_added(pmc, pmc->sfmode);
}
}
@@ -1751,7 +1761,7 @@ void ip_mc_up(struct in_device *in_dev)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, pmc);
#endif
- igmp_group_added(pmc);
+ igmp_group_added(pmc, pmc->sfmode);
}
}
@@ -2130,8 +2140,8 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
/* Join a multicast group
*/
-
-int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
+ unsigned int mode)
{
__be32 addr = imr->imr_multiaddr.s_addr;
struct ip_mc_socklist *iml, *i;
@@ -2172,15 +2182,30 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
memcpy(&iml->multi, imr, sizeof(*imr));
iml->next_rcu = inet->mc_list;
iml->sflist = NULL;
- iml->sfmode = MCAST_EXCLUDE;
+ iml->sfmode = mode;
rcu_assign_pointer(inet->mc_list, iml);
- ip_mc_inc_group(in_dev, addr);
+ __ip_mc_inc_group(in_dev, addr, mode);
err = 0;
done:
return err;
}
+
+/* Join ASM (Any-Source Multicast) group
+ */
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+{
+ return __ip_mc_join_group(sk, imr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ip_mc_join_group);
+/* Join SSM (Source-Specific Multicast) group
+ */
+int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
+ unsigned int mode)
+{
+ return __ip_mc_join_group(sk, imr, mode);
+}
+
static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
struct in_device *in_dev)
{
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c9e35b81d093..1e4cf3ab560f 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -90,7 +90,7 @@ static void inet_frags_free_cb(void *ptr, void *arg)
void inet_frags_exit_net(struct netns_frags *nf)
{
- nf->low_thresh = 0; /* prevent creation of new frags */
+ nf->high_thresh = 0; /* prevent creation of new frags */
rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b3308e9d9762..0e3edd25f881 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -523,6 +523,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->dev = from->dev;
to->mark = from->mark;
+ skb_copy_hash(to, from);
+
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fc32fdbeefa6..c0fe5ad996f2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -150,15 +150,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
{
struct sockaddr_in sin;
const struct iphdr *iph = ip_hdr(skb);
- __be16 *ports = (__be16 *)skb_transport_header(skb);
+ __be16 *ports;
+ int end;
- if (skb_transport_offset(skb) + 4 > (int)skb->len)
+ end = skb_transport_offset(skb) + 4;
+ if (end > 0 && !pskb_may_pull(skb, end))
return;
/* All current transport protocols have the port numbers in the
* first four bytes of the transport header and this function is
* written with this assumption in mind.
*/
+ ports = (__be16 *)skb_transport_header(skb);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = iph->daddr;
@@ -984,7 +987,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
mreq.imr_address.s_addr = mreqs.imr_interface;
mreq.imr_ifindex = 0;
- err = ip_mc_join_group(sk, &mreq);
+ err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
if (err && err != -EADDRINUSE)
break;
omode = MCAST_INCLUDE;
@@ -1061,7 +1064,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
mreq.imr_multiaddr = psin->sin_addr;
mreq.imr_address.s_addr = 0;
mreq.imr_ifindex = greqs.gsr_interface;
- err = ip_mc_join_group(sk, &mreq);
+ err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
if (err && err != -EADDRINUSE)
break;
greqs.gsr_interface = mreq.imr_ifindex;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ca0dad90803a..e77872c93c20 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1898,6 +1898,7 @@ static struct xt_match ipt_builtin_mt[] __read_mostly = {
.checkentry = icmp_checkentry,
.proto = IPPROTO_ICMP,
.family = NFPROTO_IPV4,
+ .me = THIS_MODULE,
},
};
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 805e83ec3ad9..164714104965 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -37,7 +37,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
@@ -71,7 +71,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
EXPORT_SYMBOL_GPL(nf_tproxy_laddr4);
struct sock *
-nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
@@ -79,16 +79,21 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
- struct tcphdr *tcph;
switch (protocol) {
- case IPPROTO_TCP:
+ case IPPROTO_TCP: {
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(struct tcphdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- tcph = hp;
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
ip_hdrlen(skb) +
- __tcp_hdrlen(tcph),
+ __tcp_hdrlen(hp),
saddr, sport,
daddr, dport,
in->ifindex, 0);
@@ -110,6 +115,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
BUG();
}
break;
+ }
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d06247ba08b2..5fa335fd3852 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -189,8 +189,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
if (write && ret == 0) {
low = make_kgid(user_ns, urange[0]);
high = make_kgid(user_ns, urange[1]);
- if (!gid_valid(low) || !gid_valid(high) ||
- (urange[1] < urange[0]) || gid_lt(high, low)) {
+ if (!gid_valid(low) || !gid_valid(high))
+ return -EINVAL;
+ if (urange[1] < urange[0] || gid_lt(high, low)) {
low = make_kgid(&init_user_ns, 1);
high = make_kgid(&init_user_ns, 0);
}
@@ -265,8 +266,9 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
- int ret;
u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
+ __le32 key[4];
+ int ret, i;
tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data)
@@ -275,11 +277,14 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
rcu_read_lock();
ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
- memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
+ memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
- memset(user_key, 0, sizeof(user_key));
+ memset(key, 0, sizeof(key));
rcu_read_unlock();
+ for (i = 0; i < ARRAY_SIZE(key); i++)
+ user_key[i] = le32_to_cpu(key[i]);
+
snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
user_key[0], user_key[1], user_key[2], user_key[3]);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
@@ -290,13 +295,17 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
ret = -EINVAL;
goto bad_key;
}
- tcp_fastopen_reset_cipher(net, NULL, user_key,
+
+ for (i = 0; i < ARRAY_SIZE(user_key); i++)
+ key[i] = cpu_to_le32(user_key[i]);
+
+ tcp_fastopen_reset_cipher(net, NULL, key,
TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
- user_key[0], user_key[1], user_key[2], user_key[3],
+ user_key[0], user_key[1], user_key[2], user_key[3],
(char *)tbl.data, ret);
kfree(tbl.data);
return ret;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 141acd92e58a..4491faf83f4f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -494,21 +494,32 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
}
/*
- * Socket is not locked. We are protected from async events by poll logic and
- * correct handling of state changes made by other threads is impossible in
- * any case.
+ * Wait for a TCP event.
+ *
+ * Note that we don't need to lock the socket, as the upper poll layers
+ * take care of normal races (between the test and the event) and we don't
+ * go look at any of the socket buffers directly.
*/
-__poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
+__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
+ __poll_t mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
- __poll_t mask = 0;
int state;
+ sock_poll_wait(file, sk_sleep(sk), wait);
+
state = inet_sk_state_load(sk);
if (state == TCP_LISTEN)
return inet_csk_listen_poll(sk);
+ /* Socket is not locked. We are protected from async events
+ * by poll logic and correct handling of state changes
+ * made by other threads is impossible in any case.
+ */
+
+ mask = 0;
+
/*
* EPOLLHUP is certainly not done right. But poll() doesn't
* have a notion of HUP in just one direction, and for a
@@ -589,7 +600,7 @@ __poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
return mask;
}
-EXPORT_SYMBOL(tcp_poll_mask);
+EXPORT_SYMBOL(tcp_poll);
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
@@ -1987,7 +1998,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
* shouldn't happen.
*/
if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
- "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
+ "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n",
*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
flags))
break;
@@ -2002,7 +2013,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
WARN(!(flags & MSG_PEEK),
- "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
+ "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n",
*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
}
@@ -2551,6 +2562,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
+ tp->copied_seq = tp->rcv_nxt;
+ tp->urg_data = 0;
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
skb_rbtree_purge(&tp->out_of_order_queue);
@@ -2810,14 +2823,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_REPAIR:
if (!tcp_can_repair_sock(sk))
err = -EPERM;
- else if (val == 1) {
+ else if (val == TCP_REPAIR_ON) {
tp->repair = 1;
sk->sk_reuse = SK_FORCE_REUSE;
tp->repair_queue = TCP_NO_QUEUE;
- } else if (val == 0) {
+ } else if (val == TCP_REPAIR_OFF) {
tp->repair = 0;
sk->sk_reuse = SK_NO_REUSE;
tcp_send_window_probe(sk);
+ } else if (val == TCP_REPAIR_OFF_NO_WP) {
+ tp->repair = 0;
+ sk->sk_reuse = SK_NO_REUSE;
} else
err = -EINVAL;
@@ -3709,8 +3725,7 @@ int tcp_abort(struct sock *sk, int err)
struct request_sock *req = inet_reqsk(sk);
local_bh_disable();
- inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
- req);
+ inet_csk_reqsk_queue_drop(req->rsk_listener, req);
local_bh_enable();
return 0;
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 5f5e5936760e..8b637f9f23a2 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -55,7 +55,6 @@ struct dctcp {
u32 dctcp_alpha;
u32 next_seq;
u32 ce_state;
- u32 delayed_ack_reserved;
u32 loss_cwnd;
};
@@ -96,7 +95,6 @@ static void dctcp_init(struct sock *sk)
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
- ca->delayed_ack_reserved = 0;
ca->loss_cwnd = 0;
ca->ce_state = 0;
@@ -131,23 +129,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- /* State has changed from CE=0 to CE=1 and delayed
- * ACK has not sent yet.
- */
- if (!ca->ce_state && ca->delayed_ack_reserved) {
- u32 tmp_rcv_nxt;
-
- /* Save current rcv_nxt. */
- tmp_rcv_nxt = tp->rcv_nxt;
-
- /* Generate previous ack with CE=0. */
- tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
- tp->rcv_nxt = ca->prior_rcv_nxt;
-
- tcp_send_ack(sk);
-
- /* Recover current rcv_nxt. */
- tp->rcv_nxt = tmp_rcv_nxt;
+ if (!ca->ce_state) {
+ /* State has changed from CE=0 to CE=1, force an immediate
+ * ACK to reflect the new CE state. If an ACK was delayed,
+ * send that first to reflect the prior CE state.
+ */
+ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+ __tcp_send_ack(sk, ca->prior_rcv_nxt);
+ tcp_enter_quickack_mode(sk, 1);
}
ca->prior_rcv_nxt = tp->rcv_nxt;
@@ -161,23 +150,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- /* State has changed from CE=1 to CE=0 and delayed
- * ACK has not sent yet.
- */
- if (ca->ce_state && ca->delayed_ack_reserved) {
- u32 tmp_rcv_nxt;
-
- /* Save current rcv_nxt. */
- tmp_rcv_nxt = tp->rcv_nxt;
-
- /* Generate previous ack with CE=1. */
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
- tp->rcv_nxt = ca->prior_rcv_nxt;
-
- tcp_send_ack(sk);
-
- /* Recover current rcv_nxt. */
- tp->rcv_nxt = tmp_rcv_nxt;
+ if (ca->ce_state) {
+ /* State has changed from CE=1 to CE=0, force an immediate
+ * ACK to reflect the new CE state. If an ACK was delayed,
+ * send that first to reflect the prior CE state.
+ */
+ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+ __tcp_send_ack(sk, ca->prior_rcv_nxt);
+ tcp_enter_quickack_mode(sk, 1);
}
ca->prior_rcv_nxt = tp->rcv_nxt;
@@ -248,25 +228,6 @@ static void dctcp_state(struct sock *sk, u8 new_state)
}
}
-static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
-{
- struct dctcp *ca = inet_csk_ca(sk);
-
- switch (ev) {
- case CA_EVENT_DELAYED_ACK:
- if (!ca->delayed_ack_reserved)
- ca->delayed_ack_reserved = 1;
- break;
- case CA_EVENT_NON_DELAYED_ACK:
- if (ca->delayed_ack_reserved)
- ca->delayed_ack_reserved = 0;
- break;
- default:
- /* Don't care for the rest. */
- break;
- }
-}
-
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
switch (ev) {
@@ -276,10 +237,6 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
case CA_EVENT_ECN_NO_CE:
dctcp_ce_state_1_to_0(sk);
break;
- case CA_EVENT_DELAYED_ACK:
- case CA_EVENT_NON_DELAYED_ACK:
- dctcp_update_ack_reserved(sk, ev);
- break;
default:
/* Don't care for the rest. */
break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 355d3dffd021..3bcd30a2ba06 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
-static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
+EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.
@@ -265,7 +266,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
* it is probably a retransmit.
*/
if (tp->ecn_flags & TCP_ECN_SEEN)
- tcp_enter_quickack_mode(sk, 1);
+ tcp_enter_quickack_mode(sk, 2);
break;
case INET_ECN_CE:
if (tcp_ca_needs_ecn(sk))
@@ -273,7 +274,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
/* Better not delay acks, sender can have a very low cwnd */
- tcp_enter_quickack_mode(sk, 1);
+ tcp_enter_quickack_mode(sk, 2);
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
}
tp->ecn_flags |= TCP_ECN_SEEN;
@@ -3181,6 +3182,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (tcp_is_reno(tp)) {
tcp_remove_reno_sacks(sk, pkts_acked);
+
+ /* If any of the cumulatively ACKed segments was
+ * retransmitted, non-SACK case cannot confirm that
+ * progress was due to original transmission due to
+ * lack of TCPCB_SACKED_ACKED bits even if some of
+ * the packets may have been never retransmitted.
+ */
+ if (flag & FLAG_RETRANS_DATA_ACKED)
+ flag &= ~FLAG_ORIG_SACK_ACKED;
} else {
int delta;
@@ -4348,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk,
return true;
}
+static bool tcp_ooo_try_coalesce(struct sock *sk,
+ struct sk_buff *to,
+ struct sk_buff *from,
+ bool *fragstolen)
+{
+ bool res = tcp_try_coalesce(sk, to, from, fragstolen);
+
+ /* In case tcp_drop() is called later, update to->gso_segs */
+ if (res) {
+ u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
+ max_t(u16, 1, skb_shinfo(from)->gso_segs);
+
+ skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+ }
+ return res;
+}
+
static void tcp_drop(struct sock *sk, struct sk_buff *skb)
{
sk_drops_add(sk, skb);
@@ -4471,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/
- if (tcp_try_coalesce(sk, tp->ooo_last_skb,
- skb, &fragstolen)) {
+ if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
+ skb, &fragstolen)) {
coalesce_done:
tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
@@ -4500,7 +4527,7 @@ coalesce_done:
/* All the bits are present. Drop. */
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- __kfree_skb(skb);
+ tcp_drop(sk, skb);
skb = NULL;
tcp_dsack_set(sk, seq, end_seq);
goto add_sack;
@@ -4519,11 +4546,11 @@ coalesce_done:
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- __kfree_skb(skb1);
+ tcp_drop(sk, skb1);
goto merge_right;
}
- } else if (tcp_try_coalesce(sk, skb1,
- skb, &fragstolen)) {
+ } else if (tcp_ooo_try_coalesce(sk, skb1,
+ skb, &fragstolen)) {
goto coalesce_done;
}
p = &parent->rb_right;
@@ -4892,6 +4919,7 @@ end:
static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 range_truesize, sum_tiny = 0;
struct sk_buff *skb, *head;
u32 start, end;
@@ -4903,6 +4931,7 @@ new_range:
}
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
+ range_truesize = skb->truesize;
for (head = skb;;) {
skb = skb_rb_next(skb);
@@ -4913,11 +4942,20 @@ new_range:
if (!skb ||
after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) {
- tcp_collapse(sk, NULL, &tp->out_of_order_queue,
- head, skb, start, end);
+ /* Do not attempt collapsing tiny skbs */
+ if (range_truesize != head->truesize ||
+ end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
+ tcp_collapse(sk, NULL, &tp->out_of_order_queue,
+ head, skb, start, end);
+ } else {
+ sum_tiny += range_truesize;
+ if (sum_tiny > sk->sk_rcvbuf >> 3)
+ return;
+ }
goto new_range;
}
+ range_truesize += skb->truesize;
if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
start = TCP_SKB_CB(skb)->seq;
if (after(TCP_SKB_CB(skb)->end_seq, end))
@@ -4932,6 +4970,7 @@ new_range:
* 2) not add too big latencies if thousands of packets sit there.
* (But if application shrinks SO_RCVBUF, we could still end up
* freeing whole queue here)
+ * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
*
* Return true if queue has shrunk.
*/
@@ -4939,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct rb_node *node, *prev;
+ int goal;
if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
return false;
NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ goal = sk->sk_rcvbuf >> 3;
node = &tp->ooo_last_skb->rbnode;
do {
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
+ goal -= rb_to_skb(node)->truesize;
tcp_drop(sk, rb_to_skb(node));
- sk_mem_reclaim(sk);
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
- !tcp_under_memory_pressure(sk))
- break;
+ if (!prev || goal <= 0) {
+ sk_mem_reclaim(sk);
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ !tcp_under_memory_pressure(sk))
+ break;
+ goal = sk->sk_rcvbuf >> 3;
+ }
node = prev;
} while (node);
tp->ooo_last_skb = rb_to_skb(prev);
@@ -4987,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk)
else if (tcp_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ return 0;
+
tcp_collapse_ofo_queue(sk);
if (!skb_queue_empty(&sk->sk_receive_queue))
tcp_collapse(sk, &sk->sk_receive_queue, NULL,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bea17f1e8302..3b2711e33e4c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -156,11 +156,24 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
*/
if (tcptw->tw_ts_recent_stamp &&
(!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ /* In case of repair and re-using TIME-WAIT sockets we still
+ * want to be sure that it is safe as above but honor the
+ * sequence numbers and time stamps set as part of the repair
+ * process.
+ *
+ * Without this check re-using a TIME-WAIT socket with TCP
+ * repair would accumulate a -1 on the repair assigned
+ * sequence number. The first time it is reused the sequence
+ * is -1, the second time -2, etc. This fixes that issue
+ * without appearing to create any others.
+ */
+ if (likely(!tp->repair)) {
+ tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+ if (tp->write_seq == 0)
+ tp->write_seq = 1;
+ tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
+ tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ }
sock_hold(sktw);
return 1;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8e08b409c71e..c4172c1fb198 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}
/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
+ u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
+
+ if (unlikely(rcv_nxt != tp->rcv_nxt))
+ return; /* Special ACK sent by DCTCP to reflect ECN */
tcp_dec_quickack_mode(sk, pkts);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@@ -1023,8 +1027,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
* We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine.
*/
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
- gfp_t gfp_mask)
+static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
+ int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet;
@@ -1100,7 +1104,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->source = inet->inet_sport;
th->dest = inet->inet_dport;
th->seq = htonl(tcb->seq);
- th->ack_seq = htonl(tp->rcv_nxt);
+ th->ack_seq = htonl(rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->tcp_flags);
@@ -1141,7 +1145,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+ tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
@@ -1178,6 +1182,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
return err;
}
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+ gfp_t gfp_mask)
+{
+ return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
+ tcp_sk(sk)->rcv_nxt);
+}
+
/* This routine just queues the buffer for sending.
*
* NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -3523,8 +3534,6 @@ void tcp_send_delayed_ack(struct sock *sk)
int ato = icsk->icsk_ack.ato;
unsigned long timeout;
- tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
-
if (ato > TCP_DELACK_MIN) {
const struct tcp_sock *tp = tcp_sk(sk);
int max_ato = HZ / 2;
@@ -3573,7 +3582,7 @@ void tcp_send_delayed_ack(struct sock *sk)
}
/* This routine sends an ack and also updates the window. */
-void tcp_send_ack(struct sock *sk)
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
{
struct sk_buff *buff;
@@ -3581,8 +3590,6 @@ void tcp_send_ack(struct sock *sk)
if (sk->sk_state == TCP_CLOSE)
return;
- tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
-
/* We are not putting this on the write queue, so
* tcp_transmit_skb() will set the ownership to this
* sock.
@@ -3608,9 +3615,14 @@ void tcp_send_ack(struct sock *sk)
skb_set_tcp_pure_ack(buff);
/* Send it off, this clears delayed acks for us. */
- tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
+ __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
+}
+EXPORT_SYMBOL_GPL(__tcp_send_ack);
+
+void tcp_send_ack(struct sock *sk)
+{
+ __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
}
-EXPORT_SYMBOL_GPL(tcp_send_ack);
/* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9bb27df4dac5..24e116ddae79 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2591,7 +2591,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* udp_poll - wait for a UDP event.
* @file - file struct
* @sock - socket
- * @events - events to wait for
+ * @wait - poll table
*
* This is same as datagram poll, except for the special case of
* blocking sockets. If application is using a blocking fd
@@ -2600,23 +2600,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* but then block when reading it. Add special case code
* to work around these arguably broken applications.
*/
-__poll_t udp_poll_mask(struct socket *sock, __poll_t events)
+__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
- __poll_t mask = datagram_poll_mask(sock, events);
+ __poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Check for false positives due to checksum errors */
- if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) &&
+ if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
mask &= ~(EPOLLIN | EPOLLRDNORM);
return mask;
}
-EXPORT_SYMBOL(udp_poll_mask);
+EXPORT_SYMBOL(udp_poll);
int udp_abort(struct sock *sk, int err)
{
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 92dc9e5a7ff3..69c54540d5b4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -394,7 +394,7 @@ unflush:
out_unlock:
rcu_read_unlock();
out:
- NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_flush_final(skb, pp, flush);
return pp;
}
EXPORT_SYMBOL(udp_gro_receive);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 0eff75525da1..b3885ca22d6f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -108,6 +108,7 @@ config IPV6_MIP6
config IPV6_ILA
tristate "IPv6: Identifier Locator Addressing (ILA)"
depends on NETFILTER
+ select DST_CACHE
select LWTUNNEL
---help---
Support for IPv6 Identifier Locator Addressing (ILA).
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c134286d6a41..f66a1cae3366 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2374,7 +2374,8 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
continue;
if ((rt->fib6_flags & noflags) != 0)
continue;
- fib6_info_hold(rt);
+ if (!fib6_info_hold_safe(rt))
+ continue;
break;
}
out:
@@ -4528,6 +4529,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags)
{
struct fib6_info *f6i;
+ u32 prio;
f6i = addrconf_get_prefix_route(&ifp->addr,
ifp->prefix_len,
@@ -4536,13 +4538,15 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
if (!f6i)
return -ENOENT;
- if (f6i->fib6_metric != ifp->rt_priority) {
+ prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF;
+ if (f6i->fib6_metric != prio) {
+ /* delete old one */
+ ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+
/* add new one */
addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
- /* delete old one */
- ip6_del_rt(dev_net(ifp->idev->dev), f6i);
} else {
if (!expires)
fib6_clean_expires(f6i);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 74f2a261e8df..9ed0eae91758 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -570,7 +570,7 @@ const struct proto_ops inet6_stream_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = inet_accept, /* ok */
.getname = inet6_getname,
- .poll_mask = tcp_poll_mask, /* ok */
+ .poll = tcp_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = inet_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
@@ -603,7 +603,7 @@ const struct proto_ops inet6_dgram_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = sock_no_accept, /* a do nothing */
.getname = inet6_getname,
- .poll_mask = udp_poll_mask, /* ok */
+ .poll = udp_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 1323b9679cf7..1c0bb9fb76e6 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -799,8 +799,7 @@ static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop)
{
struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts;
- txopts = ipv6_renew_options_kern(sk, old, IPV6_HOPOPTS,
- hop, hop ? ipv6_optlen(hop) : 0);
+ txopts = ipv6_renew_options(sk, old, IPV6_HOPOPTS, hop);
txopt_put(old);
if (IS_ERR(txopts))
return PTR_ERR(txopts);
@@ -1222,8 +1221,7 @@ static int calipso_req_setattr(struct request_sock *req,
if (IS_ERR(new))
return PTR_ERR(new);
- txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
- new, new ? ipv6_optlen(new) : 0);
+ txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new);
kfree(new);
@@ -1260,8 +1258,7 @@ static void calipso_req_delattr(struct request_sock *req)
if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new))
return; /* Nothing to do */
- txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
- new, new ? ipv6_optlen(new) : 0);
+ txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new);
if (!IS_ERR(txopts)) {
txopts = xchg(&req_inet->ipv6_opt, txopts);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2ee08b6a86a4..1a1f876f8e28 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -700,13 +700,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
}
if (np->rxopt.bits.rxorigdstaddr) {
struct sockaddr_in6 sin6;
- __be16 *ports = (__be16 *) skb_transport_header(skb);
+ __be16 *ports;
+ int end;
- if (skb_transport_offset(skb) + 4 <= (int)skb->len) {
+ end = skb_transport_offset(skb) + 4;
+ if (end <= 0 || pskb_may_pull(skb, end)) {
/* All current transport protocols have the port numbers in the
* first four bytes of the transport header and this function is
* written with this assumption in mind.
*/
+ ports = (__be16 *)skb_transport_header(skb);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ipv6_hdr(skb)->daddr;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 5bc2bf3733ab..20291c2036fc 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -1015,29 +1015,21 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
}
EXPORT_SYMBOL_GPL(ipv6_dup_options);
-static int ipv6_renew_option(void *ohdr,
- struct ipv6_opt_hdr __user *newopt, int newoptlen,
- int inherit,
- struct ipv6_opt_hdr **hdr,
- char **p)
+static void ipv6_renew_option(int renewtype,
+ struct ipv6_opt_hdr **dest,
+ struct ipv6_opt_hdr *old,
+ struct ipv6_opt_hdr *new,
+ int newtype, char **p)
{
- if (inherit) {
- if (ohdr) {
- memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
- *hdr = (struct ipv6_opt_hdr *)*p;
- *p += CMSG_ALIGN(ipv6_optlen(*hdr));
- }
- } else {
- if (newopt) {
- if (copy_from_user(*p, newopt, newoptlen))
- return -EFAULT;
- *hdr = (struct ipv6_opt_hdr *)*p;
- if (ipv6_optlen(*hdr) > newoptlen)
- return -EINVAL;
- *p += CMSG_ALIGN(newoptlen);
- }
- }
- return 0;
+ struct ipv6_opt_hdr *src;
+
+ src = (renewtype == newtype ? new : old);
+ if (!src)
+ return;
+
+ memcpy(*p, src, ipv6_optlen(src));
+ *dest = (struct ipv6_opt_hdr *)*p;
+ *p += CMSG_ALIGN(ipv6_optlen(*dest));
}
/**
@@ -1063,13 +1055,11 @@ static int ipv6_renew_option(void *ohdr,
*/
struct ipv6_txoptions *
ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
- int newtype,
- struct ipv6_opt_hdr __user *newopt, int newoptlen)
+ int newtype, struct ipv6_opt_hdr *newopt)
{
int tot_len = 0;
char *p;
struct ipv6_txoptions *opt2;
- int err;
if (opt) {
if (newtype != IPV6_HOPOPTS && opt->hopopt)
@@ -1082,8 +1072,8 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
}
- if (newopt && newoptlen)
- tot_len += CMSG_ALIGN(newoptlen);
+ if (newopt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(newopt));
if (!tot_len)
return NULL;
@@ -1098,29 +1088,19 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
- err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
- newtype != IPV6_HOPOPTS,
- &opt2->hopopt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
- newtype != IPV6_RTHDRDSTOPTS,
- &opt2->dst0opt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
- newtype != IPV6_RTHDR,
- (struct ipv6_opt_hdr **)&opt2->srcrt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
- newtype != IPV6_DSTOPTS,
- &opt2->dst1opt, &p);
- if (err)
- goto out;
+ ipv6_renew_option(IPV6_HOPOPTS, &opt2->hopopt,
+ (opt ? opt->hopopt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_RTHDRDSTOPTS, &opt2->dst0opt,
+ (opt ? opt->dst0opt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_RTHDR,
+ (struct ipv6_opt_hdr **)&opt2->srcrt,
+ (opt ? (struct ipv6_opt_hdr *)opt->srcrt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_DSTOPTS, &opt2->dst1opt,
+ (opt ? opt->dst1opt : NULL),
+ newopt, newtype, &p);
opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) +
(opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) +
@@ -1128,37 +1108,6 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0);
return opt2;
-out:
- sock_kfree_s(sk, opt2, opt2->tot_len);
- return ERR_PTR(err);
-}
-
-/**
- * ipv6_renew_options_kern - replace a specific ext hdr with a new one.
- *
- * @sk: sock from which to allocate memory
- * @opt: original options
- * @newtype: option type to replace in @opt
- * @newopt: new option of type @newtype to replace (kernel-mem)
- * @newoptlen: length of @newopt
- *
- * See ipv6_renew_options(). The difference is that @newopt is
- * kernel memory, rather than user memory.
- */
-struct ipv6_txoptions *
-ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt,
- int newtype, struct ipv6_opt_hdr *newopt,
- int newoptlen)
-{
- struct ipv6_txoptions *ret_val;
- const mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- ret_val = ipv6_renew_options(sk, opt, newtype,
- (struct ipv6_opt_hdr __user *)newopt,
- newoptlen);
- set_fs(old_fs);
- return ret_val;
}
struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index be491bf6ab6e..ef2505aefc15 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -402,9 +402,10 @@ static int icmp6_iif(const struct sk_buff *skb)
/* for local traffic to local address, skb dev is the loopback
* device. Check if there is a dst attached to the skb and if so
- * get the real device index.
+ * get the real device index. Same is needed for replies to a link
+ * local address on a device enslaved to an L3 master device
*/
- if (unlikely(iif == LOOPBACK_IFINDEX)) {
+ if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
const struct rt6_info *rt6 = skb_rt6_info(skb);
if (rt6)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1fb2f3118d60..d212738e9d10 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -935,20 +935,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
{
struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&rt->fib6_table->tb6_lock));
- enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
- struct fib6_info *iter = NULL, *match = NULL;
+ struct fib6_info *iter = NULL;
struct fib6_info __rcu **ins;
+ struct fib6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
- int append = (info->nlh &&
- (info->nlh->nlmsg_flags & NLM_F_APPEND));
int add = (!info->nlh ||
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
u16 nlflags = NLM_F_EXCL;
int err;
- if (append)
+ if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
nlflags |= NLM_F_APPEND;
ins = &fn->leaf;
@@ -970,8 +969,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
nlflags &= ~NLM_F_EXCL;
if (replace) {
- found++;
- break;
+ if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+ found++;
+ break;
+ }
+ if (rt_can_ecmp)
+ fallback_ins = fallback_ins ?: ins;
+ goto next_iter;
}
if (rt6_duplicate_nexthop(iter, rt)) {
@@ -986,51 +990,71 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
return -EEXIST;
}
-
- /* first route that matches */
- if (!match)
- match = iter;
+ /* If we have the same destination and the same metric,
+ * but not the same gateway, then the route we try to
+ * add is sibling to this route, increment our counter
+ * of siblings, and later we will add our route to the
+ * list.
+ * Only static routes (which don't have flag
+ * RTF_EXPIRES) are used for ECMPv6.
+ *
+ * To avoid long list, we only had siblings if the
+ * route have a gateway.
+ */
+ if (rt_can_ecmp &&
+ rt6_qualify_for_ecmp(iter))
+ rt->fib6_nsiblings++;
}
if (iter->fib6_metric > rt->fib6_metric)
break;
+next_iter:
ins = &iter->fib6_next;
}
+ if (fallback_ins && !found) {
+ /* No ECMP-able route found, replace first non-ECMP one */
+ ins = fallback_ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ found++;
+ }
+
/* Reset round-robin state, if necessary */
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
/* Link this route to others same route. */
- if (append && match) {
+ if (rt->fib6_nsiblings) {
+ unsigned int fib6_nsiblings;
struct fib6_info *sibling, *temp_sibling;
- if (rt->fib6_flags & RTF_REJECT) {
- NL_SET_ERR_MSG(extack,
- "Can not append a REJECT route");
- return -EINVAL;
- } else if (match->fib6_flags & RTF_REJECT) {
- NL_SET_ERR_MSG(extack,
- "Can not append to a REJECT route");
- return -EINVAL;
+ /* Find the first route that have the same metric */
+ sibling = leaf;
+ while (sibling) {
+ if (sibling->fib6_metric == rt->fib6_metric &&
+ rt6_qualify_for_ecmp(sibling)) {
+ list_add_tail(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
+ break;
+ }
+ sibling = rcu_dereference_protected(sibling->fib6_next,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
- event = FIB_EVENT_ENTRY_APPEND;
- rt->fib6_nsiblings = match->fib6_nsiblings;
- list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
- match->fib6_nsiblings++;
-
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
* is broken!
*/
+ fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
- &match->fib6_siblings, fib6_siblings) {
+ &rt->fib6_siblings, fib6_siblings) {
sibling->fib6_nsiblings++;
- BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
+ BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
+ fib6_nsiblings++;
}
-
- rt6_multipath_rebalance(match);
+ BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
+ rt6_multipath_rebalance(temp_sibling);
}
/*
@@ -1043,8 +1067,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add:
nlflags |= NLM_F_CREATE;
- err = call_fib6_entry_notifiers(info->nl_net, event, rt,
- extack);
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
if (err)
return err;
@@ -1062,7 +1087,7 @@ add:
}
} else {
- struct fib6_info *tmp;
+ int nsiblings;
if (!found) {
if (add)
@@ -1077,57 +1102,48 @@ add:
if (err)
return err;
- /* if route being replaced has siblings, set tmp to
- * last one, otherwise tmp is current route. this is
- * used to set fib6_next for new route
- */
- if (iter->fib6_nsiblings)
- tmp = list_last_entry(&iter->fib6_siblings,
- struct fib6_info,
- fib6_siblings);
- else
- tmp = iter;
-
- /* insert new route */
atomic_inc(&rt->fib6_ref);
rcu_assign_pointer(rt->fib6_node, fn);
- rt->fib6_next = tmp->fib6_next;
+ rt->fib6_next = iter->fib6_next;
rcu_assign_pointer(*ins, rt);
-
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
}
+ nsiblings = iter->fib6_nsiblings;
+ iter->fib6_node = NULL;
+ fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
+ fib6_info_release(iter);
- /* delete old route */
- rt = iter;
-
- if (rt->fib6_nsiblings) {
- struct fib6_info *tmp;
-
+ if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
- list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
- fib6_siblings) {
- iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
- if (rcu_access_pointer(fn->rr_ptr) == iter)
- fn->rr_ptr = NULL;
- fib6_info_release(iter);
-
- rt->fib6_nsiblings--;
- info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+ ins = &rt->fib6_next;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ while (iter) {
+ if (iter->fib6_metric > rt->fib6_metric)
+ break;
+ if (rt6_qualify_for_ecmp(iter)) {
+ *ins = iter->fib6_next;
+ iter->fib6_node = NULL;
+ fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
+ fib6_info_release(iter);
+ nsiblings--;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+ } else {
+ ins = &iter->fib6_next;
+ }
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
+ WARN_ON(nsiblings != 0);
}
-
- WARN_ON(rt->fib6_nsiblings != 0);
-
- rt->fib6_node = NULL;
- fib6_purge_rt(rt, fn, info->nl_net);
- if (rcu_access_pointer(fn->rr_ptr) == rt)
- fn->rr_ptr = NULL;
- fib6_info_release(rt);
}
return 0;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c8cf2fdbb13b..cd2cfb04e5d8 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -927,7 +927,6 @@ tx_err:
static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ip6_tnl *t = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
struct net_device_stats *stats;
@@ -1010,6 +1009,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
}
} else {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a14fb4fcdf18..3168847c30d1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -570,6 +570,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->dev = from->dev;
to->mark = from->mark;
+ skb_copy_hash(to, from);
+
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4d780c7f0130..568ca4187cd1 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -398,6 +398,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_DSTOPTS:
{
struct ipv6_txoptions *opt;
+ struct ipv6_opt_hdr *new = NULL;
+
+ /* hop-by-hop / destination options are privileged option */
+ retv = -EPERM;
+ if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
+ break;
/* remove any sticky options header with a zero option
* length, per RFC3542.
@@ -409,17 +415,22 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
else if (optlen < sizeof(struct ipv6_opt_hdr) ||
optlen & 0x7 || optlen > 8 * 255)
goto e_inval;
-
- /* hop-by-hop / destination options are privileged option */
- retv = -EPERM;
- if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
- break;
+ else {
+ new = memdup_user(optval, optlen);
+ if (IS_ERR(new)) {
+ retv = PTR_ERR(new);
+ break;
+ }
+ if (unlikely(ipv6_optlen(new) > optlen)) {
+ kfree(new);
+ goto e_inval;
+ }
+ }
opt = rcu_dereference_protected(np->opt,
lockdep_sock_is_held(sk));
- opt = ipv6_renew_options(sk, opt, optname,
- (struct ipv6_opt_hdr __user *)optval,
- optlen);
+ opt = ipv6_renew_options(sk, opt, optname, new);
+ kfree(new);
if (IS_ERR(opt)) {
retv = PTR_ERR(opt);
break;
@@ -718,8 +729,9 @@ done:
struct sockaddr_in6 *psin6;
psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
- retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
- &psin6->sin6_addr);
+ retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface,
+ &psin6->sin6_addr,
+ MCAST_INCLUDE);
/* prior join w/ different source is ok */
if (retv && retv != -EADDRINUSE)
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 975021df7c1c..f60f310785fd 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -95,6 +95,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int delta);
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
struct inet6_dev *idev);
+static int __ipv6_dev_mc_inc(struct net_device *dev,
+ const struct in6_addr *addr, unsigned int mode);
#define MLD_QRV_DEFAULT 2
/* RFC3810, 9.2. Query Interval */
@@ -132,7 +134,8 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
return iv > 0 ? iv : 1;
}
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
+ const struct in6_addr *addr, unsigned int mode)
{
struct net_device *dev = NULL;
struct ipv6_mc_socklist *mc_lst;
@@ -179,7 +182,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
}
mc_lst->ifindex = dev->ifindex;
- mc_lst->sfmode = MCAST_EXCLUDE;
+ mc_lst->sfmode = mode;
rwlock_init(&mc_lst->sflock);
mc_lst->sflist = NULL;
@@ -187,7 +190,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
* now add/increase the group membership on the device
*/
- err = ipv6_dev_mc_inc(dev, addr);
+ err = __ipv6_dev_mc_inc(dev, addr, mode);
if (err) {
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
@@ -199,8 +202,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return 0;
}
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ return __ipv6_sock_mc_join(sk, ifindex, addr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ipv6_sock_mc_join);
+int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
+ const struct in6_addr *addr, unsigned int mode)
+{
+ return __ipv6_sock_mc_join(sk, ifindex, addr, mode);
+}
+
/*
* socket leave on multicast group
*/
@@ -646,7 +660,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
return rv;
}
-static void igmp6_group_added(struct ifmcaddr6 *mc)
+static void igmp6_group_added(struct ifmcaddr6 *mc, unsigned int mode)
{
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
@@ -672,7 +686,13 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
}
/* else v2 */
- mc->mca_crcount = mc->idev->mc_qrv;
+ /* Based on RFC3810 6.1, for newly added INCLUDE SSM, we
+ * should not send filter-mode change record as the mode
+ * should be from IN() to IN(A).
+ */
+ if (mode == MCAST_EXCLUDE)
+ mc->mca_crcount = mc->idev->mc_qrv;
+
mld_ifc_event(mc->idev);
}
@@ -770,13 +790,13 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
spin_lock_bh(&im->mca_lock);
if (pmc) {
im->idev = pmc->idev;
- im->mca_crcount = idev->mc_qrv;
- im->mca_sfmode = pmc->mca_sfmode;
- if (pmc->mca_sfmode == MCAST_INCLUDE) {
+ if (im->mca_sfmode == MCAST_INCLUDE) {
im->mca_tomb = pmc->mca_tomb;
im->mca_sources = pmc->mca_sources;
for (psf = im->mca_sources; psf; psf = psf->sf_next)
- psf->sf_crcount = im->mca_crcount;
+ psf->sf_crcount = idev->mc_qrv;
+ } else {
+ im->mca_crcount = idev->mc_qrv;
}
in6_dev_put(pmc->idev);
kfree(pmc);
@@ -831,7 +851,8 @@ static void ma_put(struct ifmcaddr6 *mc)
}
static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
- const struct in6_addr *addr)
+ const struct in6_addr *addr,
+ unsigned int mode)
{
struct ifmcaddr6 *mc;
@@ -849,9 +870,8 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
refcount_set(&mc->mca_refcnt, 1);
spin_lock_init(&mc->mca_lock);
- /* initial mode is (EX, empty) */
- mc->mca_sfmode = MCAST_EXCLUDE;
- mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+ mc->mca_sfmode = mode;
+ mc->mca_sfcount[mode] = 1;
if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
@@ -863,7 +883,8 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
/*
* device multicast group inc (add if not found)
*/
-int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+static int __ipv6_dev_mc_inc(struct net_device *dev,
+ const struct in6_addr *addr, unsigned int mode)
{
struct ifmcaddr6 *mc;
struct inet6_dev *idev;
@@ -887,14 +908,13 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
if (ipv6_addr_equal(&mc->mca_addr, addr)) {
mc->mca_users++;
write_unlock_bh(&idev->lock);
- ip6_mc_add_src(idev, &mc->mca_addr, MCAST_EXCLUDE, 0,
- NULL, 0);
+ ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0);
in6_dev_put(idev);
return 0;
}
}
- mc = mca_alloc(idev, addr);
+ mc = mca_alloc(idev, addr, mode);
if (!mc) {
write_unlock_bh(&idev->lock);
in6_dev_put(idev);
@@ -911,11 +931,16 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
write_unlock_bh(&idev->lock);
mld_del_delrec(idev, mc);
- igmp6_group_added(mc);
+ igmp6_group_added(mc, mode);
ma_put(mc);
return 0;
}
+int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+{
+ return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE);
+}
+
/*
* device multicast group del
*/
@@ -1751,7 +1776,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
psf_next = psf->sf_next;
- if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
+ if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) {
psf_prev = psf;
continue;
}
@@ -2066,7 +2091,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
if (pmc->mca_sfcount[MCAST_EXCLUDE])
type = MLD2_CHANGE_TO_EXCLUDE;
else
- type = MLD2_CHANGE_TO_INCLUDE;
+ type = MLD2_ALLOW_NEW_SOURCES;
skb = add_grec(skb, pmc, type, 0, 0, 1);
spin_unlock_bh(&pmc->mca_lock);
}
@@ -2082,7 +2107,8 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev)
mld_send_initial_cr(idev);
idev->mc_dad_count--;
if (idev->mc_dad_count)
- mld_dad_start_timer(idev, idev->mc_maxdelay);
+ mld_dad_start_timer(idev,
+ unsolicited_report_interval(idev));
}
}
@@ -2094,7 +2120,8 @@ static void mld_dad_timer_expire(struct timer_list *t)
if (idev->mc_dad_count) {
idev->mc_dad_count--;
if (idev->mc_dad_count)
- mld_dad_start_timer(idev, idev->mc_maxdelay);
+ mld_dad_start_timer(idev,
+ unsolicited_report_interval(idev));
}
in6_dev_put(idev);
}
@@ -2452,7 +2479,8 @@ static void mld_ifc_timer_expire(struct timer_list *t)
if (idev->mc_ifc_count) {
idev->mc_ifc_count--;
if (idev->mc_ifc_count)
- mld_ifc_start_timer(idev, idev->mc_maxdelay);
+ mld_ifc_start_timer(idev,
+ unsolicited_report_interval(idev));
}
in6_dev_put(idev);
}
@@ -2543,7 +2571,7 @@ void ipv6_mc_up(struct inet6_dev *idev)
ipv6_mc_reset(idev);
for (i = idev->mc_list; i; i = i->next) {
mld_del_delrec(idev, i);
- igmp6_group_added(i);
+ igmp6_group_added(i, i->mca_sfmode);
}
read_unlock_bh(&idev->lock);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e640d2f3c55c..0ec273997d1d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -811,7 +811,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
return;
}
}
- if (ndopts.nd_opts_nonce)
+ if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
inc = ipv6_addr_is_multicast(daddr);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7eab959734bc..daf2e9e9193d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1909,6 +1909,7 @@ static struct xt_match ip6t_builtin_mt[] __read_mostly = {
.checkentry = icmp6_checkentry,
.proto = IPPROTO_ICMPV6,
.family = NFPROTO_IPV6,
+ .me = THIS_MODULE,
},
};
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5e0332014c17..e4d9e6976d3c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -107,7 +107,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
if (hdr == NULL)
goto err_reg;
- net->nf_frag.sysctl.frags_hdr = hdr;
+ net->nf_frag_frags_hdr = hdr;
return 0;
err_reg:
@@ -121,8 +121,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
struct ctl_table *table;
- table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
- unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+ table = net->nf_frag_frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf_frag_frags_hdr);
if (!net_eq(net, &init_net))
kfree(table);
}
@@ -585,6 +585,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fq->q.meat == fq->q.len &&
nf_ct_frag6_reasm(fq, skb, dev))
ret = 0;
+ else
+ skb_dst_drop(skb);
out_unlock:
spin_unlock_bh(&fq->q.lock);
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index bf1d6c421e3b..5dfd33af6451 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -55,7 +55,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto,
+ sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, tproto,
&iph->saddr,
nf_tproxy_laddr6(skb, laddr, &iph->daddr),
hp->source,
@@ -72,7 +72,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6);
struct sock *
-nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
@@ -80,15 +80,20 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
- struct tcphdr *tcph;
switch (protocol) {
- case IPPROTO_TCP:
+ case IPPROTO_TCP: {
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff,
+ sizeof(struct tcphdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- tcph = hp;
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
- thoff + __tcp_hdrlen(tcph),
+ thoff + __tcp_hdrlen(hp),
saddr, sport,
daddr, ntohs(dport),
in->ifindex, 0);
@@ -110,6 +115,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
BUG();
}
break;
+ }
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ce6f0d15b5dd..afc307c89d1a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1334,7 +1334,7 @@ void raw6_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-/* Same as inet6_dgram_ops, sans udp_poll_mask. */
+/* Same as inet6_dgram_ops, sans udp_poll. */
const struct proto_ops inet6_sockraw_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
@@ -1344,7 +1344,7 @@ const struct proto_ops inet6_sockraw_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = sock_no_accept, /* a do nothing */
.getname = inet6_getname,
- .poll_mask = datagram_poll_mask, /* ok */
+ .poll = datagram_poll, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 86a0e4333d42..ec18b3ce8b6d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -972,10 +972,10 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
rt->dst.lastuse = jiffies;
}
+/* Caller must already hold reference to @from */
static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
{
rt->rt6i_flags &= ~RTF_EXPIRES;
- fib6_info_hold(from);
rcu_assign_pointer(rt->from, from);
dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
if (from->fib6_metrics != &dst_default_metrics) {
@@ -984,6 +984,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
}
}
+/* Caller must already hold reference to @ort */
static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
{
struct net_device *dev = fib6_info_nh_dev(ort);
@@ -1044,9 +1045,14 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
struct net_device *dev = rt->fib6_nh.nh_dev;
struct rt6_info *nrt;
+ if (!fib6_info_hold_safe(rt))
+ return NULL;
+
nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
if (nrt)
ip6_rt_copy_init(nrt, rt);
+ else
+ fib6_info_release(rt);
return nrt;
}
@@ -1178,10 +1184,15 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
* Clone the route.
*/
+ if (!fib6_info_hold_safe(ort))
+ return NULL;
+
dev = ip6_rt_get_dev_rcu(ort);
rt = ip6_dst_alloc(dev_net(dev), dev, 0);
- if (!rt)
+ if (!rt) {
+ fib6_info_release(ort);
return NULL;
+ }
ip6_rt_copy_init(rt, ort);
rt->rt6i_flags |= RTF_CACHE;
@@ -1210,12 +1221,17 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
struct net_device *dev;
struct rt6_info *pcpu_rt;
+ if (!fib6_info_hold_safe(rt))
+ return NULL;
+
rcu_read_lock();
dev = ip6_rt_get_dev_rcu(rt);
pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
rcu_read_unlock();
- if (!pcpu_rt)
+ if (!pcpu_rt) {
+ fib6_info_release(rt);
return NULL;
+ }
ip6_rt_copy_init(pcpu_rt, rt);
pcpu_rt->rt6i_flags |= RTF_PCPU;
return pcpu_rt;
@@ -2486,7 +2502,7 @@ restart:
out:
if (ret)
- dst_hold(&ret->dst);
+ ip6_hold_safe(net, &ret, true);
else
ret = ip6_create_rt_rcu(rt);
@@ -3303,7 +3319,8 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
continue;
- fib6_info_hold(rt);
+ if (!fib6_info_hold_safe(rt))
+ continue;
rcu_read_unlock();
/* if gateway was specified only delete the one hop */
@@ -3409,6 +3426,9 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
rcu_read_lock();
from = rcu_dereference(rt->from);
+ /* This fib6_info_hold() is safe here because we hold reference to rt
+ * and rt already holds reference to fib6_info.
+ */
fib6_info_hold(from);
rcu_read_unlock();
@@ -3470,7 +3490,8 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
continue;
if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
continue;
- fib6_info_hold(rt);
+ if (!fib6_info_hold_safe(rt))
+ continue;
break;
}
out:
@@ -3530,8 +3551,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
break;
}
- if (rt)
- fib6_info_hold(rt);
+ if (rt && !fib6_info_hold_safe(rt))
+ rt = NULL;
rcu_read_unlock();
return rt;
}
@@ -3579,8 +3600,8 @@ restart:
struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
- (!idev || idev->cnf.accept_ra != 2)) {
- fib6_info_hold(rt);
+ (!idev || idev->cnf.accept_ra != 2) &&
+ fib6_info_hold_safe(rt)) {
rcu_read_unlock();
ip6_del_rt(net, rt);
goto restart;
@@ -3842,7 +3863,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
lockdep_is_held(&rt->fib6_table->tb6_lock));
while (iter) {
if (iter->fib6_metric == rt->fib6_metric &&
- iter->fib6_nsiblings)
+ rt6_qualify_for_ecmp(iter))
return iter;
iter = rcu_dereference_protected(iter->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -4388,6 +4409,13 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rt = NULL;
goto cleanup;
}
+ if (!rt6_qualify_for_ecmp(rt)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack,
+ "Device only routes can not be added for IPv6 using the multipath API.");
+ fib6_info_release(rt);
+ goto cleanup;
+ }
rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
@@ -4439,7 +4467,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
- cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
nhn++;
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 33fb35cbfac1..558fe8cc6d43 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -373,7 +373,7 @@ static int seg6_hmac_init_algo(void)
return -ENOMEM;
for_each_possible_cpu(cpu) {
- tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL);
+ tfm = crypto_alloc_shash(algo->name, 0, 0);
if (IS_ERR(tfm))
return PTR_ERR(tfm);
p_tfm = per_cpu_ptr(algo->tfms, cpu);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 19ccf0dc996c..a8854dd3e9c5 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -101,7 +101,7 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
if (do_flowlabel > 0) {
hash = skb_get_hash(skb);
- rol32(hash, 16);
+ hash = rol32(hash, 16);
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
flowlabel = ip6_flowlabel(inner_hdr);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7efa9fd7e109..03e6b7a2bc53 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -938,7 +938,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
&tcp_hashinfo, NULL, 0,
&ipv6h->saddr,
th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb),
+ ntohs(th->source),
+ tcp_v6_iif_l3_slave(skb),
tcp_v6_sdif(skb));
if (!sk1)
goto out;
@@ -1609,7 +1610,8 @@ do_time_wait:
skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
- ntohs(th->dest), tcp_v6_iif(skb),
+ ntohs(th->dest),
+ tcp_v6_iif_l3_slave(skb),
sdif);
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 68e86257a549..893a022f9620 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1488,11 +1488,14 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
return 0;
}
-static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events)
+__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
+ sock_poll_wait(file, sk_sleep(sk), wait);
+
if (sk->sk_state == IUCV_LISTEN)
return iucv_accept_poll(sk);
@@ -2385,7 +2388,7 @@ static const struct proto_ops iucv_sock_ops = {
.getname = iucv_sock_getname,
.sendmsg = iucv_sock_sendmsg,
.recvmsg = iucv_sock_recvmsg,
- .poll_mask = iucv_sock_poll_mask,
+ .poll = iucv_sock_poll,
.ioctl = sock_no_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 84b7d5c6fec8..d3601d421571 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1336,9 +1336,9 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
struct list_head *head;
int index = 0;
- /* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state,
- * so we set sk_state, otherwise epoll_wait always returns right away
- * with EPOLLHUP
+ /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
+ * we set sk_state, otherwise epoll_wait always returns right away with
+ * EPOLLHUP
*/
kcm->sk.sk_state = TCP_ESTABLISHED;
@@ -1903,7 +1903,7 @@ static const struct proto_ops kcm_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = kcm_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -1924,7 +1924,7 @@ static const struct proto_ops kcm_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = kcm_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 8bdc1cbe490a..5e1d2946ffbf 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3751,7 +3751,7 @@ static const struct proto_ops pfkey_ops = {
/* Now the operations that really occur. */
.release = pfkey_release,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.sendmsg = pfkey_sendmsg,
.recvmsg = pfkey_recvmsg,
};
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 181073bf6925..a9c05b2bc1b0 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -613,7 +613,7 @@ static const struct proto_ops l2tp_ip_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = l2tp_ip_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 336e4c00abbc..957369192ca1 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -754,7 +754,7 @@ static const struct proto_ops l2tp_ip6_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = l2tp_ip6_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = inet6_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 55188382845c..e398797878a9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1818,7 +1818,7 @@ static const struct proto_ops pppol2tp_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pppol2tp_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = pppol2tp_setsockopt,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 804de8490186..1beeea9549fa 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1192,7 +1192,7 @@ static const struct proto_ops llc_ui_ops = {
.socketpair = sock_no_socketpair,
.accept = llc_ui_accept,
.getname = llc_ui_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = llc_ui_ioctl,
.listen = llc_ui_listen,
.shutdown = llc_ui_shutdown,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0a38cc1cbebc..932985ca4e66 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2254,11 +2254,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
sdata->control_port_over_nl80211)) {
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
bool noencrypt = status->flag & RX_FLAG_DECRYPTED;
- struct ethhdr *ehdr = eth_hdr(skb);
- cfg80211_rx_control_port(dev, skb->data, skb->len,
- ehdr->h_source,
- be16_to_cpu(skb->protocol), noencrypt);
+ cfg80211_rx_control_port(dev, skb, noencrypt);
dev_kfree_skb(skb);
} else {
/* deliver to local stack */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 44b5dfe8727d..fa1f1e63a264 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4845,7 +4845,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
skb_reset_network_header(skb);
skb_reset_mac_header(skb);
+ local_bh_disable();
__ieee80211_subif_start_xmit(skb, skb->dev, flags);
+ local_bh_enable();
return 0;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5e2e511c4a6f..d02fbfec3783 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2111,7 +2111,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (!sta->uploaded)
continue;
- if (sta->sdata->vif.type != NL80211_IFTYPE_AP)
+ if (sta->sdata->vif.type != NL80211_IFTYPE_AP &&
+ sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN)
continue;
for (state = IEEE80211_STA_NOTEXIST;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index dbd7d1fad277..f0a1c536ef15 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -460,6 +460,13 @@ config NF_TABLES
if NF_TABLES
+config NF_TABLES_SET
+ tristate "Netfilter nf_tables set infrastructure"
+ help
+ This option enables the nf_tables set infrastructure that allows to
+ look up for elements in a set and to build one-way mappings between
+ matchings and actions.
+
config NF_TABLES_INET
depends on IPV6
select NF_TABLES_IPV4
@@ -493,24 +500,6 @@ config NFT_FLOW_OFFLOAD
This option adds the "flow_offload" expression that you can use to
choose what flows are placed into the hardware.
-config NFT_SET_RBTREE
- tristate "Netfilter nf_tables rbtree set module"
- help
- This option adds the "rbtree" set type (Red Black tree) that is used
- to build interval-based sets.
-
-config NFT_SET_HASH
- tristate "Netfilter nf_tables hash set module"
- help
- This option adds the "hash" set type that is used to build one-way
- mappings between matchings and actions.
-
-config NFT_SET_BITMAP
- tristate "Netfilter nf_tables bitmap set module"
- help
- This option adds the "bitmap" set type that is used to build sets
- whose keys are smaller or equal to 16 bits.
-
config NFT_COUNTER
tristate "Netfilter nf_tables counter module"
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 44449389e527..8a76dced974d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -78,7 +78,11 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
+nf_tables_set-objs := nf_tables_set_core.o \
+ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o
+
obj-$(CONFIG_NF_TABLES) += nf_tables.o
+obj-$(CONFIG_NF_TABLES_SET) += nf_tables_set.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
@@ -91,9 +95,6 @@ obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
-obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
-obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
-obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index d8383609fe28..510039862aa9 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -47,6 +47,8 @@ struct nf_conncount_tuple {
struct hlist_node node;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone;
+ int cpu;
+ u32 jiffies32;
};
struct nf_conncount_rb {
@@ -91,11 +93,42 @@ bool nf_conncount_add(struct hlist_head *head,
return false;
conn->tuple = *tuple;
conn->zone = *zone;
+ conn->cpu = raw_smp_processor_id();
+ conn->jiffies32 = (u32)jiffies;
hlist_add_head(&conn->node, head);
return true;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
+static const struct nf_conntrack_tuple_hash *
+find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
+{
+ const struct nf_conntrack_tuple_hash *found;
+ unsigned long a, b;
+ int cpu = raw_smp_processor_id();
+ __s32 age;
+
+ found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
+ if (found)
+ return found;
+ b = conn->jiffies32;
+ a = (u32)jiffies;
+
+ /* conn might have been added just before by another cpu and
+ * might still be unconfirmed. In this case, nf_conntrack_find()
+ * returns no result. Thus only evict if this cpu added the
+ * stale entry or if the entry is older than two jiffies.
+ */
+ age = a - b;
+ if (conn->cpu == cpu || age >= 2) {
+ hlist_del(&conn->node);
+ kmem_cache_free(conncount_conn_cachep, conn);
+ return ERR_PTR(-ENOENT);
+ }
+
+ return ERR_PTR(-EAGAIN);
+}
+
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
@@ -103,18 +136,27 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn;
- struct hlist_node *n;
struct nf_conn *found_ct;
+ struct hlist_node *n;
unsigned int length = 0;
*addit = tuple ? true : false;
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
- found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
- if (found == NULL) {
- hlist_del(&conn->node);
- kmem_cache_free(conncount_conn_cachep, conn);
+ found = find_or_evict(net, conn);
+ if (IS_ERR(found)) {
+ /* Not found, but might be about to be confirmed */
+ if (PTR_ERR(found) == -EAGAIN) {
+ length++;
+ if (!tuple)
+ continue;
+
+ if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
+ nf_ct_zone_id(zone, zone->dir))
+ *addit = false;
+ }
continue;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3465da2a98bd..3d5280425027 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2043,7 +2043,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
return -EOPNOTSUPP;
/* On boot, we can set this without any fancy locking. */
- if (!nf_conntrack_htable_size)
+ if (!nf_conntrack_hash)
return param_set_uint(val, kp);
rc = kstrtouint(val, 0, &hashsize);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 551a1eddf0fa..a75b11c39312 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -465,6 +465,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
+
+ /* Maybe someone has gotten the helper already when unhelp above.
+ * So need to wait it.
+ */
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index abe647d5b8c6..9ce6336d1e55 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -243,14 +243,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
* We currently ignore Sync packets
*
* sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
},
[DCCP_PKT_SYNCACK] = {
/*
* We currently ignore SyncAck packets
*
* sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
},
},
[CT_DCCP_ROLE_SERVER] = {
@@ -371,14 +371,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
* We currently ignore Sync packets
*
* sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
},
[DCCP_PKT_SYNCACK] = {
/*
* We currently ignore SyncAck packets
*
* sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
},
},
};
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 426457047578..a61d6df6e5f6 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -424,6 +424,10 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
if (write) {
struct ctl_table tmp = *table;
+ /* proc_dostring() can append to existing strings, so we need to
+ * initialize it as an empty string.
+ */
+ buf[0] = '\0';
tmp.data = buf;
r = proc_dostring(&tmp, write, buffer, lenp, ppos);
if (r)
@@ -442,14 +446,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
mutex_unlock(&nf_log_mutex);
} else {
+ struct ctl_table tmp = *table;
+
+ tmp.data = buf;
mutex_lock(&nf_log_mutex);
logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
if (!logger)
- table->data = "NONE";
+ strlcpy(buf, "NONE", sizeof(buf));
else
- table->data = logger->name;
- r = proc_dostring(table, write, buffer, lenp, ppos);
+ strlcpy(buf, logger->name, sizeof(buf));
mutex_unlock(&nf_log_mutex);
+ r = proc_dostring(&tmp, write, buffer, lenp, ppos);
}
return r;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 896d4a36081d..f5745e4c6513 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -75,6 +75,7 @@ static void nft_ctx_init(struct nft_ctx *ctx,
{
ctx->net = net;
ctx->family = family;
+ ctx->level = 0;
ctx->table = table;
ctx->chain = chain;
ctx->nla = nla;
@@ -1597,7 +1598,6 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
struct nft_base_chain *basechain;
struct nft_stats *stats = NULL;
struct nft_chain_hook hook;
- const struct nlattr *name;
struct nf_hook_ops *ops;
struct nft_trans *trans;
int err;
@@ -1645,12 +1645,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return PTR_ERR(stats);
}
+ err = -ENOMEM;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
sizeof(struct nft_trans_chain));
- if (trans == NULL) {
- free_percpu(stats);
- return -ENOMEM;
- }
+ if (trans == NULL)
+ goto err;
nft_trans_chain_stats(trans) = stats;
nft_trans_chain_update(trans) = true;
@@ -1660,19 +1659,37 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
else
nft_trans_chain_policy(trans) = -1;
- name = nla[NFTA_CHAIN_NAME];
- if (nla[NFTA_CHAIN_HANDLE] && name) {
- nft_trans_chain_name(trans) =
- nla_strdup(name, GFP_KERNEL);
- if (!nft_trans_chain_name(trans)) {
- kfree(trans);
- free_percpu(stats);
- return -ENOMEM;
+ if (nla[NFTA_CHAIN_HANDLE] &&
+ nla[NFTA_CHAIN_NAME]) {
+ struct nft_trans *tmp;
+ char *name;
+
+ err = -ENOMEM;
+ name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+ if (!name)
+ goto err;
+
+ err = -EEXIST;
+ list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) {
+ if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
+ tmp->ctx.table == table &&
+ nft_trans_chain_update(tmp) &&
+ nft_trans_chain_name(tmp) &&
+ strcmp(name, nft_trans_chain_name(tmp)) == 0) {
+ kfree(name);
+ goto err;
+ }
}
+
+ nft_trans_chain_name(trans) = name;
}
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err:
+ free_percpu(stats);
+ kfree(trans);
+ return err;
}
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
@@ -2254,6 +2271,39 @@ done:
return skb->len;
}
+static int nf_tables_dump_rules_start(struct netlink_callback *cb)
+{
+ const struct nlattr * const *nla = cb->data;
+ struct nft_rule_dump_ctx *ctx = NULL;
+
+ if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
+ ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (nla[NFTA_RULE_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+ GFP_ATOMIC);
+ if (!ctx->table) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
+ if (nla[NFTA_RULE_CHAIN]) {
+ ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+ GFP_ATOMIC);
+ if (!ctx->chain) {
+ kfree(ctx->table);
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ cb->data = ctx;
+ return 0;
+}
+
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
struct nft_rule_dump_ctx *ctx = cb->data;
@@ -2283,38 +2333,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start= nf_tables_dump_rules_start,
.dump = nf_tables_dump_rules,
.done = nf_tables_dump_rules_done,
.module = THIS_MODULE,
+ .data = (void *)nla,
};
- if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
- struct nft_rule_dump_ctx *ctx;
-
- ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
- if (!ctx)
- return -ENOMEM;
-
- if (nla[NFTA_RULE_TABLE]) {
- ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
- GFP_ATOMIC);
- if (!ctx->table) {
- kfree(ctx);
- return -ENOMEM;
- }
- }
- if (nla[NFTA_RULE_CHAIN]) {
- ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
- GFP_ATOMIC);
- if (!ctx->chain) {
- kfree(ctx->table);
- kfree(ctx);
- return -ENOMEM;
- }
- }
- c.data = ctx;
- }
-
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -2384,6 +2409,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
struct nft_rule *rule;
int err;
+ if (ctx->level == NFT_JUMP_STACK_SIZE)
+ return -EMLINK;
+
list_for_each_entry(rule, &chain->rules, list) {
if (!nft_is_active_next(ctx->net, rule))
continue;
@@ -3161,6 +3189,18 @@ done:
return skb->len;
}
+static int nf_tables_dump_sets_start(struct netlink_callback *cb)
+{
+ struct nft_ctx *ctx_dump = NULL;
+
+ ctx_dump = kmemdup(cb->data, sizeof(*ctx_dump), GFP_ATOMIC);
+ if (ctx_dump == NULL)
+ return -ENOMEM;
+
+ cb->data = ctx_dump;
+ return 0;
+}
+
static int nf_tables_dump_sets_done(struct netlink_callback *cb)
{
kfree(cb->data);
@@ -3188,18 +3228,12 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_sets_start,
.dump = nf_tables_dump_sets,
.done = nf_tables_dump_sets_done,
+ .data = &ctx,
.module = THIS_MODULE,
};
- struct nft_ctx *ctx_dump;
-
- ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_ATOMIC);
- if (ctx_dump == NULL)
- return -ENOMEM;
-
- *ctx_dump = ctx;
- c.data = ctx_dump;
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -3849,6 +3883,15 @@ nla_put_failure:
return -ENOSPC;
}
+static int nf_tables_dump_set_start(struct netlink_callback *cb)
+{
+ struct nft_set_dump_ctx *dump_ctx = cb->data;
+
+ cb->data = kmemdup(dump_ctx, sizeof(*dump_ctx), GFP_ATOMIC);
+
+ return cb->data ? 0 : -ENOMEM;
+}
+
static int nf_tables_dump_set_done(struct netlink_callback *cb)
{
kfree(cb->data);
@@ -4002,20 +4045,17 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_set_start,
.dump = nf_tables_dump_set,
.done = nf_tables_dump_set_done,
.module = THIS_MODULE,
};
- struct nft_set_dump_ctx *dump_ctx;
-
- dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_ATOMIC);
- if (!dump_ctx)
- return -ENOMEM;
-
- dump_ctx->set = set;
- dump_ctx->ctx = ctx;
+ struct nft_set_dump_ctx dump_ctx = {
+ .set = set,
+ .ctx = ctx,
+ };
- c.data = dump_ctx;
+ c.data = &dump_ctx;
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -4975,38 +5015,42 @@ done:
return skb->len;
}
-static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+static int nf_tables_dump_obj_start(struct netlink_callback *cb)
{
- struct nft_obj_filter *filter = cb->data;
+ const struct nlattr * const *nla = cb->data;
+ struct nft_obj_filter *filter = NULL;
- if (filter) {
- kfree(filter->table);
- kfree(filter);
+ if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) {
+ filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
+ if (!filter)
+ return -ENOMEM;
+
+ if (nla[NFTA_OBJ_TABLE]) {
+ filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+ if (!filter->table) {
+ kfree(filter);
+ return -ENOMEM;
+ }
+ }
+
+ if (nla[NFTA_OBJ_TYPE])
+ filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
}
+ cb->data = filter;
return 0;
}
-static struct nft_obj_filter *
-nft_obj_filter_alloc(const struct nlattr * const nla[])
+static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
- struct nft_obj_filter *filter;
-
- filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
- if (!filter)
- return ERR_PTR(-ENOMEM);
+ struct nft_obj_filter *filter = cb->data;
- if (nla[NFTA_OBJ_TABLE]) {
- filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
- if (!filter->table) {
- kfree(filter);
- return ERR_PTR(-ENOMEM);
- }
+ if (filter) {
+ kfree(filter->table);
+ kfree(filter);
}
- if (nla[NFTA_OBJ_TYPE])
- filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- return filter;
+ return 0;
}
/* called with rcu_read_lock held */
@@ -5027,21 +5071,13 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_obj_start,
.dump = nf_tables_dump_obj,
.done = nf_tables_dump_obj_done,
.module = THIS_MODULE,
+ .data = (void *)nla,
};
- if (nla[NFTA_OBJ_TABLE] ||
- nla[NFTA_OBJ_TYPE]) {
- struct nft_obj_filter *filter;
-
- filter = nft_obj_filter_alloc(nla);
- if (IS_ERR(filter))
- return -ENOMEM;
-
- c.data = filter;
- }
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -5320,8 +5356,6 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
flowtable->ops[i].priv = &flowtable->data;
flowtable->ops[i].hook = flowtable->data.type->hook;
flowtable->ops[i].dev = dev_array[i];
- flowtable->dev_name[i] = kstrdup(dev_array[i]->name,
- GFP_KERNEL);
}
return err;
@@ -5479,10 +5513,8 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
err6:
i = flowtable->ops_len;
err5:
- for (k = i - 1; k >= 0; k--) {
- kfree(flowtable->dev_name[k]);
+ for (k = i - 1; k >= 0; k--)
nf_unregister_net_hook(net, &flowtable->ops[k]);
- }
kfree(flowtable->ops);
err4:
@@ -5581,9 +5613,10 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
for (i = 0; i < flowtable->ops_len; i++) {
- if (flowtable->dev_name[i][0] &&
- nla_put_string(skb, NFTA_DEVICE_NAME,
- flowtable->dev_name[i]))
+ const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
+
+ if (dev &&
+ nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest_devs);
@@ -5650,37 +5683,39 @@ done:
return skb->len;
}
-static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
+static int nf_tables_dump_flowtable_start(struct netlink_callback *cb)
{
- struct nft_flowtable_filter *filter = cb->data;
+ const struct nlattr * const *nla = cb->data;
+ struct nft_flowtable_filter *filter = NULL;
- if (!filter)
- return 0;
+ if (nla[NFTA_FLOWTABLE_TABLE]) {
+ filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
+ if (!filter)
+ return -ENOMEM;
- kfree(filter->table);
- kfree(filter);
+ filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
+ GFP_ATOMIC);
+ if (!filter->table) {
+ kfree(filter);
+ return -ENOMEM;
+ }
+ }
+ cb->data = filter;
return 0;
}
-static struct nft_flowtable_filter *
-nft_flowtable_filter_alloc(const struct nlattr * const nla[])
+static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
{
- struct nft_flowtable_filter *filter;
+ struct nft_flowtable_filter *filter = cb->data;
- filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
if (!filter)
- return ERR_PTR(-ENOMEM);
+ return 0;
- if (nla[NFTA_FLOWTABLE_TABLE]) {
- filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
- GFP_ATOMIC);
- if (!filter->table) {
- kfree(filter);
- return ERR_PTR(-ENOMEM);
- }
- }
- return filter;
+ kfree(filter->table);
+ kfree(filter);
+
+ return 0;
}
/* called with rcu_read_lock held */
@@ -5700,20 +5735,13 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_flowtable_start,
.dump = nf_tables_dump_flowtable,
.done = nf_tables_dump_flowtable_done,
.module = THIS_MODULE,
+ .data = (void *)nla,
};
- if (nla[NFTA_FLOWTABLE_TABLE]) {
- struct nft_flowtable_filter *filter;
-
- filter = nft_flowtable_filter_alloc(nla);
- if (IS_ERR(filter))
- return -ENOMEM;
-
- c.data = filter;
- }
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -5783,6 +5811,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
module_put(flowtable->data.type->owner);
+ kfree(flowtable);
}
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
@@ -5825,7 +5854,6 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev,
continue;
nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
- flowtable->dev_name[i][0] = '\0';
flowtable->ops[i].dev = NULL;
break;
}
@@ -6086,6 +6114,9 @@ static void nft_commit_release(struct nft_trans *trans)
case NFT_MSG_DELTABLE:
nf_tables_table_destroy(&trans->ctx);
break;
+ case NFT_MSG_NEWCHAIN:
+ kfree(nft_trans_chain_name(trans));
+ break;
case NFT_MSG_DELCHAIN:
nf_tables_chain_destroy(&trans->ctx);
break;
@@ -6315,13 +6346,15 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
break;
case NFT_MSG_NEWCHAIN:
- if (nft_trans_chain_update(trans))
+ if (nft_trans_chain_update(trans)) {
nft_chain_commit_update(trans);
- else
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+ /* trans destroyed after rcu grace period */
+ } else {
nft_clear(net, trans->ctx.chain);
-
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
- nft_trans_destroy(trans);
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+ nft_trans_destroy(trans);
+ }
break;
case NFT_MSG_DELCHAIN:
nft_chain_del(trans->ctx.chain);
@@ -6471,7 +6504,7 @@ static int __nf_tables_abort(struct net *net)
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
free_percpu(nft_trans_chain_stats(trans));
-
+ kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
@@ -6837,13 +6870,6 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
err = nf_tables_check_loops(ctx, data->verdict.chain);
if (err < 0)
return err;
-
- if (ctx->chain->level + 1 >
- data->verdict.chain->level) {
- if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
- return -EMLINK;
- data->verdict.chain->level = ctx->chain->level + 1;
- }
}
return 0;
diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c
new file mode 100644
index 000000000000..814789644bd3
--- /dev/null
+++ b/net/netfilter/nf_tables_set_core.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <net/netfilter/nf_tables_core.h>
+
+static int __init nf_tables_set_module_init(void)
+{
+ nft_register_set(&nft_set_hash_fast_type);
+ nft_register_set(&nft_set_hash_type);
+ nft_register_set(&nft_set_rhash_type);
+ nft_register_set(&nft_set_bitmap_type);
+ nft_register_set(&nft_set_rbtree_type);
+
+ return 0;
+}
+
+static void __exit nf_tables_set_module_exit(void)
+{
+ nft_unregister_set(&nft_set_rbtree_type);
+ nft_unregister_set(&nft_set_bitmap_type);
+ nft_unregister_set(&nft_set_rhash_type);
+ nft_unregister_set(&nft_set_hash_type);
+ nft_unregister_set(&nft_set_hash_fast_type);
+}
+
+module_init(nf_tables_set_module_init);
+module_exit(nf_tables_set_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 4ccd2988f9db..ea4ba551abb2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1243,6 +1243,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
[NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) },
[NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
+ [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 },
+ [NFQA_CFG_MASK] = { .type = NLA_U32 },
+ [NFQA_CFG_FLAGS] = { .type = NLA_U32 },
};
static const struct nf_queue_handler nfqh = {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 8d1ff654e5af..32535eea51b2 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -832,10 +832,18 @@ nft_target_select_ops(const struct nft_ctx *ctx,
rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
family = ctx->family;
+ if (strcmp(tg_name, XT_ERROR_TARGET) == 0 ||
+ strcmp(tg_name, XT_STANDARD_TARGET) == 0 ||
+ strcmp(tg_name, "standard") == 0)
+ return ERR_PTR(-EINVAL);
+
/* Re-use the existing target if it's already loaded. */
list_for_each_entry(nft_target, &nft_target_list, head) {
struct xt_target *target = nft_target->ops.data;
+ if (!target->target)
+ continue;
+
if (nft_target_cmp(target, tg_name, rev, family))
return &nft_target->ops;
}
@@ -844,6 +852,11 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
+ if (!target->target) {
+ err = -EINVAL;
+ goto err;
+ }
+
if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) {
err = -EINVAL;
goto err;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 15adf8ca82c3..0777a93211e2 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -98,6 +98,7 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
const struct nft_data **d)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ struct nft_ctx *pctx = (struct nft_ctx *)ctx;
const struct nft_data *data;
int err;
@@ -109,9 +110,11 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
+ pctx->level++;
err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
+ pctx->level--;
break;
default:
break;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 42e6fadf1417..c2a1d84cdfc4 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -155,7 +155,9 @@ static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ struct nft_ctx *pctx = (struct nft_ctx *)ctx;
const struct nft_data *data;
+ int err;
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
@@ -165,10 +167,17 @@ static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- return nft_chain_validate(ctx, data->verdict.chain);
+ pctx->level++;
+ err = nft_chain_validate(ctx, data->verdict.chain);
+ if (err < 0)
+ return err;
+ pctx->level--;
+ break;
default:
- return 0;
+ break;
}
+
+ return 0;
}
static int nft_lookup_validate(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index d6626e01c7ee..128bc16f52dd 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -296,7 +296,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
-static struct nft_set_type nft_bitmap_type __read_mostly = {
+struct nft_set_type nft_set_bitmap_type __read_mostly = {
.owner = THIS_MODULE,
.ops = {
.privsize = nft_bitmap_privsize,
@@ -314,20 +314,3 @@ static struct nft_set_type nft_bitmap_type __read_mostly = {
.get = nft_bitmap_get,
},
};
-
-static int __init nft_bitmap_module_init(void)
-{
- return nft_register_set(&nft_bitmap_type);
-}
-
-static void __exit nft_bitmap_module_exit(void)
-{
- nft_unregister_set(&nft_bitmap_type);
-}
-
-module_init(nft_bitmap_module_init);
-module_exit(nft_bitmap_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6f9a1365a09f..90c3e7e6cacb 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -387,6 +387,7 @@ static void nft_rhash_destroy(const struct nft_set *set)
struct nft_rhash *priv = nft_set_priv(set);
cancel_delayed_work_sync(&priv->gc_work);
+ rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)set);
}
@@ -654,7 +655,7 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features
return true;
}
-static struct nft_set_type nft_rhash_type __read_mostly = {
+struct nft_set_type nft_set_rhash_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT | NFT_SET_EVAL,
@@ -677,7 +678,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = {
},
};
-static struct nft_set_type nft_hash_type __read_mostly = {
+struct nft_set_type nft_set_hash_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
@@ -697,7 +698,7 @@ static struct nft_set_type nft_hash_type __read_mostly = {
},
};
-static struct nft_set_type nft_hash_fast_type __read_mostly = {
+struct nft_set_type nft_set_hash_fast_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
@@ -716,26 +717,3 @@ static struct nft_set_type nft_hash_fast_type __read_mostly = {
.get = nft_hash_get,
},
};
-
-static int __init nft_hash_module_init(void)
-{
- if (nft_register_set(&nft_hash_fast_type) ||
- nft_register_set(&nft_hash_type) ||
- nft_register_set(&nft_rhash_type))
- return 1;
- return 0;
-}
-
-static void __exit nft_hash_module_exit(void)
-{
- nft_unregister_set(&nft_rhash_type);
- nft_unregister_set(&nft_hash_type);
- nft_unregister_set(&nft_hash_fast_type);
-}
-
-module_init(nft_hash_module_init);
-module_exit(nft_hash_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 7f3a9a211034..9873d734b494 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -381,7 +381,7 @@ static void nft_rbtree_gc(struct work_struct *work)
gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
if (!gcb)
- goto out;
+ break;
atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, rbe);
@@ -390,10 +390,12 @@ static void nft_rbtree_gc(struct work_struct *work)
rbe = rb_entry(prev, struct nft_rbtree_elem, node);
atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, rbe);
+ prev = NULL;
}
node = rb_next(node);
+ if (!node)
+ break;
}
-out:
if (gcb) {
for (i = 0; i < gcb->head.cnt; i++) {
rbe = gcb->elems[i];
@@ -440,6 +442,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct rb_node *node;
cancel_delayed_work_sync(&priv->gc_work);
+ rcu_barrier();
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -462,7 +465,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
-static struct nft_set_type nft_rbtree_type __read_mostly = {
+struct nft_set_type nft_set_rbtree_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
@@ -481,20 +484,3 @@ static struct nft_set_type nft_rbtree_type __read_mostly = {
.get = nft_rbtree_get,
},
};
-
-static int __init nft_rbtree_module_init(void)
-{
- return nft_register_set(&nft_rbtree_type);
-}
-
-static void __exit nft_rbtree_module_exit(void)
-{
- nft_unregister_set(&nft_rbtree_type);
-}
-
-module_init(nft_rbtree_module_init);
-module_exit(nft_rbtree_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 58fce4e749a9..d76550a8b642 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -61,7 +61,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
@@ -77,7 +77,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
@@ -150,7 +150,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
+ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
@@ -171,7 +171,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
+ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff,
tproto, &iph->saddr, laddr,
hp->source, lport,
xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1189b84413d5..393573a99a5a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2658,7 +2658,7 @@ static const struct proto_ops netlink_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = netlink_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 93fbcafbf388..03f37c4e64fe 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1355,7 +1355,7 @@ static const struct proto_ops nr_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = nr_accept,
.getname = nr_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = nr_ioctl,
.listen = nr_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 2ceefa183cee..6a196e438b6c 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -752,11 +752,14 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
pr_debug("Fragment %zd bytes remaining %zd",
frag_len, remaining_len);
- pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT,
+ pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, 0,
frag_len + LLCP_HEADER_SIZE, &err);
if (pdu == NULL) {
- pr_err("Could not allocate PDU\n");
- continue;
+ pr_err("Could not allocate PDU (error=%d)\n", err);
+ len -= remaining_len;
+ if (len == 0)
+ len = err;
+ break;
}
pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ab5bb14b49af..ea0c0c6f1874 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -548,13 +548,16 @@ static inline __poll_t llcp_accept_poll(struct sock *parent)
return 0;
}
-static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
pr_debug("%p\n", sk);
+ sock_poll_wait(file, sk_sleep(sk), wait);
+
if (sk->sk_state == LLCP_LISTEN)
return llcp_accept_poll(sk);
@@ -896,7 +899,7 @@ static const struct proto_ops llcp_sock_ops = {
.socketpair = sock_no_socketpair,
.accept = llcp_sock_accept,
.getname = llcp_sock_getname,
- .poll_mask = llcp_sock_poll_mask,
+ .poll = llcp_sock_poll,
.ioctl = sock_no_ioctl,
.listen = llcp_sock_listen,
.shutdown = sock_no_shutdown,
@@ -916,7 +919,7 @@ static const struct proto_ops llcp_rawsock_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = llcp_sock_getname,
- .poll_mask = llcp_sock_poll_mask,
+ .poll = llcp_sock_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 60c322531c49..e2188deb08dc 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -284,7 +284,7 @@ static const struct proto_ops rawsock_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -304,7 +304,7 @@ static const struct proto_ops rawsock_raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 9696ef96b719..1a30e165eeb4 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -104,7 +104,7 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
__skb_pull(skb, nsh_len);
skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
+ skb->mac_len = proto == htons(ETH_P_TEB) ? ETH_HLEN : 0;
skb->protocol = proto;
features &= NETIF_F_SG;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 50809748c127..9b27d0cd766d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2262,6 +2262,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (po->stats.stats1.tp_drops)
status |= TP_STATUS_LOSING;
}
+
+ if (do_vnet &&
+ virtio_net_hdr_from_skb(skb, h.raw + macoff -
+ sizeof(struct virtio_net_hdr),
+ vio_le(), true, 0))
+ goto drop_n_account;
+
po->stats.stats1.tp_packets++;
if (copy_skb) {
status |= TP_STATUS_COPY;
@@ -2269,15 +2276,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
spin_unlock(&sk->sk_receive_queue.lock);
- if (do_vnet) {
- if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
- sizeof(struct virtio_net_hdr),
- vio_le(), true, 0)) {
- spin_lock(&sk->sk_receive_queue.lock);
- goto drop_n_account;
- }
- }
-
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
@@ -2880,6 +2878,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_free;
} else if (reserve) {
skb_reserve(skb, -reserve);
+ if (len < reserve)
+ skb_reset_network_header(skb);
}
/* Returns -EFAULT on error */
@@ -4078,11 +4078,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
return 0;
}
-static __poll_t packet_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t packet_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
- __poll_t mask = datagram_poll_mask(sock, events);
+ __poll_t mask = datagram_poll(file, sock, wait);
spin_lock_bh(&sk->sk_receive_queue.lock);
if (po->rx_ring.pg_vec) {
@@ -4424,7 +4425,7 @@ static const struct proto_ops packet_ops_spkt = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = packet_getname_spkt,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = packet_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -4445,7 +4446,7 @@ static const struct proto_ops packet_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = packet_getname,
- .poll_mask = packet_poll_mask,
+ .poll = packet_poll,
.ioctl = packet_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index c295c4e20f01..30187990257f 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -340,12 +340,15 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
return sizeof(struct sockaddr_pn);
}
-static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
struct pep_sock *pn = pep_sk(sk);
__poll_t mask = 0;
+ poll_wait(file, sk_sleep(sk), wait);
+
if (sk->sk_state == TCP_CLOSE)
return EPOLLERR;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -445,7 +448,7 @@ const struct proto_ops phonet_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pn_socket_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = pn_socket_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -470,7 +473,7 @@ const struct proto_ops phonet_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = pn_socket_accept,
.getname = pn_socket_getname,
- .poll_mask = pn_socket_poll_mask,
+ .poll = pn_socket_poll,
.ioctl = pn_socket_ioctl,
.listen = pn_socket_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 1b5025ea5b04..86e1e37eb4e8 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -191,8 +191,13 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
hdr->type = cpu_to_le32(type);
hdr->src_node_id = cpu_to_le32(from->sq_node);
hdr->src_port_id = cpu_to_le32(from->sq_port);
- hdr->dst_node_id = cpu_to_le32(to->sq_node);
- hdr->dst_port_id = cpu_to_le32(to->sq_port);
+ if (to->sq_port == QRTR_PORT_CTRL) {
+ hdr->dst_node_id = cpu_to_le32(node->nid);
+ hdr->dst_port_id = cpu_to_le32(QRTR_NODE_BCAST);
+ } else {
+ hdr->dst_node_id = cpu_to_le32(to->sq_node);
+ hdr->dst_port_id = cpu_to_le32(to->sq_port);
+ }
hdr->size = cpu_to_le32(len);
hdr->confirm_rx = 0;
@@ -764,6 +769,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
node = NULL;
if (addr->sq_node == QRTR_NODE_BCAST) {
enqueue_fn = qrtr_bcast_enqueue;
+ if (addr->sq_port != QRTR_PORT_CTRL) {
+ release_sock(sk);
+ return -ENOTCONN;
+ }
} else if (addr->sq_node == ipc->us.sq_node) {
enqueue_fn = qrtr_local_enqueue;
} else {
@@ -1023,7 +1032,7 @@ static const struct proto_ops qrtr_proto_ops = {
.recvmsg = qrtr_recvmsg,
.getname = qrtr_getname,
.ioctl = qrtr_ioctl,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
.getsockopt = sock_no_getsockopt,
diff --git a/net/rds/connection.c b/net/rds/connection.c
index abef75da89a7..cfb05953b0e5 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -659,11 +659,19 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
int rds_conn_init(void)
{
+ int ret;
+
+ ret = rds_loop_net_init(); /* register pernet callback */
+ if (ret)
+ return ret;
+
rds_conn_slab = kmem_cache_create("rds_connection",
sizeof(struct rds_connection),
0, 0, NULL);
- if (!rds_conn_slab)
+ if (!rds_conn_slab) {
+ rds_loop_net_exit();
return -ENOMEM;
+ }
rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
rds_info_register_func(RDS_INFO_SEND_MESSAGES,
@@ -676,6 +684,7 @@ int rds_conn_init(void)
void rds_conn_exit(void)
{
+ rds_loop_net_exit(); /* unregister pernet callback */
rds_loop_exit();
WARN_ON(!hlist_empty(rds_conn_hash));
diff --git a/net/rds/loop.c b/net/rds/loop.c
index dac6218a460e..feea1f96ee2a 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -33,6 +33,8 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/in.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include "rds_single_path.h"
#include "rds.h"
@@ -40,6 +42,17 @@
static DEFINE_SPINLOCK(loop_conns_lock);
static LIST_HEAD(loop_conns);
+static atomic_t rds_loop_unloading = ATOMIC_INIT(0);
+
+static void rds_loop_set_unloading(void)
+{
+ atomic_set(&rds_loop_unloading, 1);
+}
+
+static bool rds_loop_is_unloading(struct rds_connection *conn)
+{
+ return atomic_read(&rds_loop_unloading) != 0;
+}
/*
* This 'loopback' transport is a special case for flows that originate
@@ -165,6 +178,8 @@ void rds_loop_exit(void)
struct rds_loop_connection *lc, *_lc;
LIST_HEAD(tmp_list);
+ rds_loop_set_unloading();
+ synchronize_rcu();
/* avoid calling conn_destroy with irqs off */
spin_lock_irq(&loop_conns_lock);
list_splice(&loop_conns, &tmp_list);
@@ -177,6 +192,46 @@ void rds_loop_exit(void)
}
}
+static void rds_loop_kill_conns(struct net *net)
+{
+ struct rds_loop_connection *lc, *_lc;
+ LIST_HEAD(tmp_list);
+
+ spin_lock_irq(&loop_conns_lock);
+ list_for_each_entry_safe(lc, _lc, &loop_conns, loop_node) {
+ struct net *c_net = read_pnet(&lc->conn->c_net);
+
+ if (net != c_net)
+ continue;
+ list_move_tail(&lc->loop_node, &tmp_list);
+ }
+ spin_unlock_irq(&loop_conns_lock);
+
+ list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) {
+ WARN_ON(lc->conn->c_passive);
+ rds_conn_destroy(lc->conn);
+ }
+}
+
+static void __net_exit rds_loop_exit_net(struct net *net)
+{
+ rds_loop_kill_conns(net);
+}
+
+static struct pernet_operations rds_loop_net_ops = {
+ .exit = rds_loop_exit_net,
+};
+
+int rds_loop_net_init(void)
+{
+ return register_pernet_device(&rds_loop_net_ops);
+}
+
+void rds_loop_net_exit(void)
+{
+ unregister_pernet_device(&rds_loop_net_ops);
+}
+
/*
* This is missing .xmit_* because loop doesn't go through generic
* rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and
@@ -194,4 +249,5 @@ struct rds_transport rds_loop_transport = {
.inc_free = rds_loop_inc_free,
.t_name = "loopback",
.t_type = RDS_TRANS_LOOP,
+ .t_unloading = rds_loop_is_unloading,
};
diff --git a/net/rds/loop.h b/net/rds/loop.h
index 469fa4b2da4f..bbc8cdd030df 100644
--- a/net/rds/loop.h
+++ b/net/rds/loop.h
@@ -5,6 +5,8 @@
/* loop.c */
extern struct rds_transport rds_loop_transport;
+int rds_loop_net_init(void);
+void rds_loop_net_exit(void);
void rds_loop_exit(void);
#endif
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ebe42e7eb456..d00a0ef39a56 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1470,7 +1470,7 @@ static const struct proto_ops rose_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = rose_accept,
.getname = rose_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = rose_ioctl,
.listen = rose_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 3b1ac93efee2..2b463047dd7b 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -734,11 +734,15 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
/*
* permit an RxRPC socket to be polled
*/
-static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
struct rxrpc_sock *rx = rxrpc_sk(sk);
- __poll_t mask = 0;
+ __poll_t mask;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
/* the socket is readable if there are any messages waiting on the Rx
* queue */
@@ -945,7 +949,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = rxrpc_poll_mask,
+ .poll = rxrpc_poll,
.ioctl = sock_no_ioctl,
.listen = rxrpc_listen,
.shutdown = rxrpc_shutdown,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 526a8e491626..6e7124e57918 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -91,7 +91,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
params_old = rtnl_dereference(p->params);
- params_new->action = parm->action;
+ p->tcf_action = parm->action;
params_new->update_flags = parm->update_flags;
rcu_assign_pointer(p->params, params_new);
if (params_old)
@@ -561,7 +561,7 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&p->tcf_tm);
bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
- action = params->action;
+ action = READ_ONCE(p->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
goto drop_stats;
@@ -599,11 +599,11 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
.index = p->tcf_index,
.refcnt = p->tcf_refcnt - ref,
.bindcnt = p->tcf_bindcnt - bind,
+ .action = p->tcf_action,
};
struct tcf_t t;
params = rtnl_dereference(p->params);
- opt.action = params->action;
opt.update_flags = params->update_flags;
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 626dac81a48a..9bc6c2ae98a5 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -36,7 +36,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&t->tcf_tm);
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
- action = params->action;
+ action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
@@ -182,7 +182,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
params_old = rtnl_dereference(t->params);
- params_new->action = parm->action;
+ t->tcf_action = parm->action;
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
@@ -254,13 +254,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
.index = t->tcf_index,
.refcnt = t->tcf_refcnt - ref,
.bindcnt = t->tcf_bindcnt - bind,
+ .action = t->tcf_action,
};
struct tcf_t tm;
params = rtnl_dereference(t->params);
opt.t_action = params->tcft_action;
- opt.action = params->action;
if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index cdc3c87c53e6..f74513a7c7a8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1053,7 +1053,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
for (tp = rtnl_dereference(chain->filter_chain);
tp; tp = rtnl_dereference(tp->next))
tfilter_notify(net, oskb, n, tp, block,
- q, parent, 0, event, false);
+ q, parent, NULL, event, false);
}
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1444,7 +1444,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, block, q, parent, 0,
+ if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2b5be42a9f1c..9e8b26a80fb3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -66,7 +66,7 @@ struct fl_flow_mask {
struct rhashtable_params filter_ht_params;
struct flow_dissector dissector;
struct list_head filters;
- struct rcu_head rcu;
+ struct rcu_work rwork;
struct list_head list;
};
@@ -203,6 +203,20 @@ static int fl_init(struct tcf_proto *tp)
return rhashtable_init(&head->ht, &mask_ht_params);
}
+static void fl_mask_free(struct fl_flow_mask *mask)
+{
+ rhashtable_destroy(&mask->ht);
+ kfree(mask);
+}
+
+static void fl_mask_free_work(struct work_struct *work)
+{
+ struct fl_flow_mask *mask = container_of(to_rcu_work(work),
+ struct fl_flow_mask, rwork);
+
+ fl_mask_free(mask);
+}
+
static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
bool async)
{
@@ -210,12 +224,11 @@ static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
return false;
rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
- rhashtable_destroy(&mask->ht);
list_del_rcu(&mask->list);
if (async)
- kfree_rcu(mask, rcu);
+ tcf_queue_work(&mask->rwork, fl_mask_free_work);
else
- kfree(mask);
+ fl_mask_free(mask);
return true;
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index cd2e0e342fb6..6c0a9d5dbf94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -479,24 +479,28 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
q->cparams.mtu = psched_mtu(qdisc_dev(sch));
if (opt) {
- int err = fq_codel_change(sch, opt, extack);
+ err = fq_codel_change(sch, opt, extack);
if (err)
- return err;
+ goto init_failure;
}
err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
- return err;
+ goto init_failure;
if (!q->flows) {
q->flows = kvcalloc(q->flows_cnt,
sizeof(struct fq_codel_flow),
GFP_KERNEL);
- if (!q->flows)
- return -ENOMEM;
+ if (!q->flows) {
+ err = -ENOMEM;
+ goto init_failure;
+ }
q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL);
- if (!q->backlogs)
- return -ENOMEM;
+ if (!q->backlogs) {
+ err = -ENOMEM;
+ goto alloc_failure;
+ }
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
@@ -509,6 +513,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
else
sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
+
+alloc_failure:
+ kvfree(q->flows);
+ q->flows = NULL;
+init_failure:
+ q->flows_cnt = 0;
+ return err;
}
static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ae9877ea205..3278a76f6861 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1385,8 +1385,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
if (next_time == 0 || next_time > q->root.cl_cfmin)
next_time = q->root.cl_cfmin;
}
- WARN_ON(next_time == 0);
- qdisc_watchdog_schedule(&q->watchdog, next_time);
+ if (next_time)
+ qdisc_watchdog_schedule(&q->watchdog, next_time);
}
static int
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 79daa98208c3..bfb9f812e2ef 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -237,7 +237,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
/* Account for a different sized first fragment */
if (msg_len >= first_len) {
msg->can_delay = 0;
- SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS);
+ if (msg_len > first_len)
+ SCTP_INC_STATS(sock_net(asoc->base.sk),
+ SCTP_MIB_FRAGUSRMSGS);
} else {
/* Which may be the only one... */
first_len = msg_len;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 7339918a805d..0cd2e764f47f 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1010,7 +1010,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = sctp_getname,
- .poll_mask = sctp_poll_mask,
+ .poll = sctp_poll,
.ioctl = inet6_ioctl,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5dffbc493008..67f73d3a1356 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1016,7 +1016,7 @@ static const struct proto_ops inet_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname, /* Semantics are different. */
- .poll_mask = sctp_poll_mask,
+ .poll = sctp_poll,
.ioctl = inet_ioctl,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown, /* Looks harmless. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d20f7addee19..ce620e878538 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7717,12 +7717,14 @@ out:
* here, again, by modeling the current TCP/UDP code. We don't have
* a good way to test with it yet.
*/
-__poll_t sctp_poll_mask(struct socket *sock, __poll_t events)
+__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
struct sctp_sock *sp = sctp_sk(sk);
__poll_t mask;
+ poll_wait(file, sk_sleep(sk), wait);
+
sock_rps_record_flow(sk);
/* A TCP-style listening socket becomes readable when the accept queue
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 445b7ef61677..12cac85da994 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -282,7 +282,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
if (dst) {
/* Re-fetch, as under layers may have a higher minimum size */
- pmtu = SCTP_TRUNC4(dst_mtu(dst));
+ pmtu = sctp_dst_mtu(dst);
change = t->pathmtu != pmtu;
}
t->pathmtu = pmtu;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index da7f02edcd37..05e4ffe5aabd 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -45,6 +45,7 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
*/
static void smc_tcp_listen_work(struct work_struct *);
+static void smc_connect_work(struct work_struct *);
static void smc_set_keepalive(struct sock *sk, int val)
{
@@ -122,6 +123,12 @@ static int smc_release(struct socket *sock)
goto out;
smc = smc_sk(sk);
+
+ /* cleanup for a dangling non-blocking connect */
+ flush_work(&smc->connect_work);
+ kfree(smc->connect_info);
+ smc->connect_info = NULL;
+
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -140,7 +147,8 @@ static int smc_release(struct socket *sock)
smc->clcsock = NULL;
}
if (smc->use_fallback) {
- sock_put(sk); /* passive closing */
+ if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
+ sock_put(sk); /* passive closing */
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
}
@@ -186,6 +194,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_protocol = protocol;
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
+ INIT_WORK(&smc->connect_work, smc_connect_work);
INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
@@ -409,12 +418,18 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
{
int rc;
- if (reason_code < 0) /* error, fallback is not possible */
+ if (reason_code < 0) { /* error, fallback is not possible */
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return reason_code;
+ }
if (reason_code != SMC_CLC_DECL_REPLY) {
rc = smc_clc_send_decline(smc, reason_code);
- if (rc < 0)
+ if (rc < 0) {
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
+ }
}
return smc_connect_fallback(smc);
}
@@ -427,8 +442,6 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
- if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
- sock_put(&smc->sk); /* passive closing */
return reason_code;
}
@@ -576,6 +589,35 @@ static int __smc_connect(struct smc_sock *smc)
return 0;
}
+static void smc_connect_work(struct work_struct *work)
+{
+ struct smc_sock *smc = container_of(work, struct smc_sock,
+ connect_work);
+ int rc;
+
+ lock_sock(&smc->sk);
+ rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
+ smc->connect_info->alen, smc->connect_info->flags);
+ if (smc->clcsock->sk->sk_err) {
+ smc->sk.sk_err = smc->clcsock->sk->sk_err;
+ goto out;
+ }
+ if (rc < 0) {
+ smc->sk.sk_err = -rc;
+ goto out;
+ }
+
+ rc = __smc_connect(smc);
+ if (rc < 0)
+ smc->sk.sk_err = -rc;
+
+out:
+ smc->sk.sk_state_change(&smc->sk);
+ kfree(smc->connect_info);
+ smc->connect_info = NULL;
+ release_sock(&smc->sk);
+}
+
static int smc_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
@@ -605,15 +647,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
smc_copy_sock_settings_to_clc(smc);
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
- rc = kernel_connect(smc->clcsock, addr, alen, flags);
- if (rc)
- goto out;
+ if (flags & O_NONBLOCK) {
+ if (smc->connect_info) {
+ rc = -EALREADY;
+ goto out;
+ }
+ smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
+ if (!smc->connect_info) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ smc->connect_info->alen = alen;
+ smc->connect_info->flags = flags ^ O_NONBLOCK;
+ memcpy(&smc->connect_info->addr, addr, alen);
+ schedule_work(&smc->connect_work);
+ rc = -EINPROGRESS;
+ } else {
+ rc = kernel_connect(smc->clcsock, addr, alen, flags);
+ if (rc)
+ goto out;
- rc = __smc_connect(smc);
- if (rc < 0)
- goto out;
- else
- rc = 0; /* success cases including fallback */
+ rc = __smc_connect(smc);
+ if (rc < 0)
+ goto out;
+ else
+ rc = 0; /* success cases including fallback */
+ }
out:
release_sock(sk);
@@ -1273,40 +1332,26 @@ static __poll_t smc_accept_poll(struct sock *parent)
return mask;
}
-static __poll_t smc_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t smc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
struct smc_sock *smc;
- int rc;
if (!sk)
return EPOLLNVAL;
smc = smc_sk(sock->sk);
- sock_hold(sk);
- lock_sock(sk);
if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
/* delegate to CLC child sock */
- release_sock(sk);
- mask = smc->clcsock->ops->poll_mask(smc->clcsock, events);
- lock_sock(sk);
+ mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
sk->sk_err = smc->clcsock->sk->sk_err;
- if (sk->sk_err) {
+ if (sk->sk_err)
mask |= EPOLLERR;
- } else {
- /* if non-blocking connect finished ... */
- if (sk->sk_state == SMC_INIT &&
- mask & EPOLLOUT &&
- smc->clcsock->sk->sk_state != TCP_CLOSE) {
- rc = __smc_connect(smc);
- if (rc < 0)
- mask |= EPOLLERR;
- /* success cases including fallback */
- mask |= EPOLLOUT | EPOLLWRNORM;
- }
- }
} else {
+ if (sk->sk_state != SMC_CLOSED)
+ sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_err)
mask |= EPOLLERR;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1332,10 +1377,7 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events)
}
if (smc->conn.urg_state == SMC_URG_VALID)
mask |= EPOLLPRI;
-
}
- release_sock(sk);
- sock_put(sk);
return mask;
}
@@ -1415,7 +1457,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- get_user(val, (int __user *)optval);
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
lock_sock(sk);
switch (optname) {
@@ -1483,10 +1526,13 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
return -EBADF;
return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
}
+ lock_sock(&smc->sk);
switch (cmd) {
case SIOCINQ: /* same as FIONREAD */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1495,8 +1541,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
break;
case SIOCOUTQ:
/* output queue size (not send + not acked) */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1506,8 +1554,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
break;
case SIOCOUTQNSD:
/* output queue size (not send only) */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1515,8 +1565,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
answ = smc_tx_prepared_sends(&smc->conn);
break;
case SIOCATMARK:
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED) {
answ = 0;
@@ -1532,8 +1584,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
}
break;
default:
+ release_sock(&smc->sk);
return -ENOIOCTLCMD;
}
+ release_sock(&smc->sk);
return put_user(answ, (int __user *)arg);
}
@@ -1619,7 +1673,7 @@ static const struct proto_ops smc_sock_ops = {
.socketpair = sock_no_socketpair,
.accept = smc_accept,
.getname = smc_getname,
- .poll_mask = smc_poll_mask,
+ .poll = smc_poll,
.ioctl = smc_ioctl,
.listen = smc_listen,
.shutdown = smc_shutdown,
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 51ae1f10d81a..d7ca26570482 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -187,11 +187,19 @@ struct smc_connection {
struct work_struct close_work; /* peer sent some closing */
};
+struct smc_connect_info {
+ int flags;
+ int alen;
+ struct sockaddr addr;
+};
+
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
+ struct smc_connect_info *connect_info; /* connect address & flags */
+ struct work_struct connect_work; /* handle non-blocking connect*/
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 717449b1da0b..ae5d168653ce 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -250,6 +250,7 @@ out:
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type)
{
+ long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
struct sock *clc_sk = smc->clcsock->sk;
struct smc_clc_msg_hdr *clcm = buf;
struct msghdr msg = {NULL, 0};
@@ -306,7 +307,6 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
memset(&msg, 0, sizeof(struct msghdr));
iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
krflags = MSG_WAITALL;
- smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
smc->sk.sk_err = EPROTO;
@@ -322,6 +322,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
out:
+ smc->clcsock->sk->sk_rcvtimeo = rcvtimeo;
return reason_code;
}
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index fa41d9881741..ac961dfb1ea1 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -107,6 +107,8 @@ static void smc_close_active_abort(struct smc_sock *smc)
}
switch (sk->sk_state) {
case SMC_INIT:
+ sk->sk_state = SMC_PEERABORTWAIT;
+ break;
case SMC_ACTIVE:
sk->sk_state = SMC_PEERABORTWAIT;
release_sock(sk);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index cee666400752..f82886b7d1d8 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -495,7 +495,8 @@ out:
void smc_tx_consumer_update(struct smc_connection *conn, bool force)
{
- union smc_host_cursor cfed, cons;
+ union smc_host_cursor cfed, cons, prod;
+ int sender_free = conn->rmb_desc->len;
int to_confirm;
smc_curs_write(&cons,
@@ -505,11 +506,18 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
smc_curs_read(&conn->rx_curs_confirmed, conn),
conn);
to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
+ if (to_confirm > conn->rmbe_update_limit) {
+ smc_curs_write(&prod,
+ smc_curs_read(&conn->local_rx_ctrl.prod, conn),
+ conn);
+ sender_free = conn->rmb_desc->len -
+ smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
+ }
if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
force ||
((to_confirm > conn->rmbe_update_limit) &&
- ((to_confirm > (conn->rmb_desc->len / 2)) ||
+ ((sender_free <= (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */
diff --git a/net/socket.c b/net/socket.c
index 8a109012608a..85633622c94d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -117,10 +117,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
-static struct wait_queue_head *sock_get_poll_head(struct file *file,
- __poll_t events);
-static __poll_t sock_poll_mask(struct file *file, __poll_t);
-static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait);
+static __poll_t sock_poll(struct file *file,
+ struct poll_table_struct *wait);
static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
static long compat_sock_ioctl(struct file *file,
@@ -143,8 +141,6 @@ static const struct file_operations socket_file_ops = {
.llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
- .get_poll_head = sock_get_poll_head,
- .poll_mask = sock_poll_mask,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
@@ -1130,48 +1126,16 @@ out_release:
}
EXPORT_SYMBOL(sock_create_lite);
-static struct wait_queue_head *sock_get_poll_head(struct file *file,
- __poll_t events)
-{
- struct socket *sock = file->private_data;
-
- if (!sock->ops->poll_mask)
- return NULL;
- sock_poll_busy_loop(sock, events);
- return sk_sleep(sock->sk);
-}
-
-static __poll_t sock_poll_mask(struct file *file, __poll_t events)
-{
- struct socket *sock = file->private_data;
-
- /*
- * We need to be sure we are in sync with the socket flags modification.
- *
- * This memory barrier is paired in the wq_has_sleeper.
- */
- smp_mb();
-
- /* this socket can poll_ll so tell the system call */
- return sock->ops->poll_mask(sock, events) |
- (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0);
-}
-
/* No kernel lock held - perfect */
static __poll_t sock_poll(struct file *file, poll_table *wait)
{
struct socket *sock = file->private_data;
- __poll_t events = poll_requested_events(wait), mask = 0;
-
- if (sock->ops->poll) {
- sock_poll_busy_loop(sock, events);
- mask = sock->ops->poll(file, sock, wait);
- } else if (sock->ops->poll_mask) {
- sock_poll_wait(file, sock_get_poll_head(file, events), wait);
- mask = sock->ops->poll_mask(sock, events);
- }
+ __poll_t events = poll_requested_events(wait);
- return mask | sock_poll_busy_flag(sock);
+ sock_poll_busy_loop(sock, events);
+ if (!sock->ops->poll)
+ return 0;
+ return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock);
}
static int sock_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 1a9695183599..625acb27efcc 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -35,7 +35,6 @@ struct _strp_msg {
*/
struct strp_msg strp;
int accum_len;
- int early_eaten;
};
static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
@@ -115,20 +114,6 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
head = strp->skb_head;
if (head) {
/* Message already in progress */
-
- stm = _strp_msg(head);
- if (unlikely(stm->early_eaten)) {
- /* Already some number of bytes on the receive sock
- * data saved in skb_head, just indicate they
- * are consumed.
- */
- eaten = orig_len <= stm->early_eaten ?
- orig_len : stm->early_eaten;
- stm->early_eaten -= eaten;
-
- return eaten;
- }
-
if (unlikely(orig_offset)) {
/* Getting data with a non-zero offset when a message is
* in progress is not expected. If it does happen, we
@@ -297,9 +282,9 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
}
stm->accum_len += cand_len;
+ eaten += cand_len;
strp->need_bytes = stm->strp.full_len -
stm->accum_len;
- stm->early_eaten = cand_len;
STRP_STATS_ADD(strp->stats.bytes, cand_len);
desc->count = 0; /* Stop reading socket */
break;
@@ -392,7 +377,7 @@ static int strp_read_sock(struct strparser *strp)
/* Lower sock lock held */
void strp_data_ready(struct strparser *strp)
{
- if (unlikely(strp->stopped))
+ if (unlikely(strp->stopped) || strp->paused)
return;
/* This check is needed to synchronize with do_strp_work.
@@ -407,9 +392,6 @@ void strp_data_ready(struct strparser *strp)
return;
}
- if (strp->paused)
- return;
-
if (strp->need_bytes) {
if (strp_peek_len(strp) < strp->need_bytes)
return;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 9f666e0650e2..2830709957bd 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -133,6 +133,8 @@ static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
}
/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
+ * Returns true if message should be dropped by caller, i.e., if it is a
+ * trial message or we are inside trial period. Otherwise false.
*/
static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
struct tipc_media_addr *maddr,
@@ -168,8 +170,9 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
+ /* Accept regular link requests/responses only after trial period */
if (mtyp != DSC_TRIAL_MSG)
- return false;
+ return trial;
sugg_addr = tipc_node_try_addr(net, peer_id, src);
if (sugg_addr)
@@ -284,7 +287,6 @@ static void tipc_disc_timeout(struct timer_list *t)
{
struct tipc_discoverer *d = from_timer(d, t, timer);
struct tipc_net *tn = tipc_net(d->net);
- u32 self = tipc_own_addr(d->net);
struct tipc_media_addr maddr;
struct sk_buff *skb = NULL;
struct net *net = d->net;
@@ -298,12 +300,14 @@ static void tipc_disc_timeout(struct timer_list *t)
goto exit;
}
- /* Did we just leave the address trial period ? */
- if (!self && !time_before(jiffies, tn->addr_trial_end)) {
- self = tn->trial_addr;
- tipc_net_finalize(net, self);
- msg_set_prevnode(buf_msg(d->skb), self);
+ /* Trial period over ? */
+ if (!time_before(jiffies, tn->addr_trial_end)) {
+ /* Did we just leave it ? */
+ if (!tipc_own_addr(net))
+ tipc_net_finalize(net, tn->trial_addr);
+
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net));
}
/* Adjust timeout interval according to discovery phase */
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 4fbaa0464405..a7f6964c3a4b 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -121,12 +121,17 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
void tipc_net_finalize(struct net *net, u32 addr)
{
- tipc_set_node_addr(net, addr);
- smp_mb();
- tipc_named_reinit(net);
- tipc_sk_reinit(net);
- tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
- TIPC_CLUSTER_SCOPE, 0, addr);
+ struct tipc_net *tn = tipc_net(net);
+
+ spin_lock_bh(&tn->node_list_lock);
+ if (!tipc_own_addr(net)) {
+ tipc_set_node_addr(net, addr);
+ tipc_named_reinit(net);
+ tipc_sk_reinit(net);
+ tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+ TIPC_CLUSTER_SCOPE, 0, addr);
+ }
+ spin_unlock_bh(&tn->node_list_lock);
}
void tipc_net_stop(struct net *net)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 6a44eb812baf..0453bd451ce8 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -797,6 +797,7 @@ static u32 tipc_node_suggest_addr(struct net *net, u32 addr)
}
/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not
+ * Returns suggested address if any, otherwise 0
*/
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
{
@@ -819,12 +820,14 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
if (n) {
addr = n->addr;
tipc_node_put(n);
+ return addr;
}
- /* Even this node may be in trial phase */
+
+ /* Even this node may be in conflict */
if (tn->trial_addr == addr)
return tipc_node_suggest_addr(net, addr);
- return addr;
+ return 0;
}
void tipc_node_check_dest(struct net *net, u32 addr,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 14a5d055717d..930852c54d7a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -692,9 +692,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
}
/**
- * tipc_poll - read pollmask
+ * tipc_poll - read and possibly block on pollmask
* @file: file structure associated with the socket
* @sock: socket for which to calculate the poll bits
+ * @wait: ???
*
* Returns pollmask value
*
@@ -708,12 +709,15 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
* imply that the operation will succeed, merely that it should be performed
* and will not block.
*/
-static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t tipc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
__poll_t revents = 0;
+ sock_poll_wait(file, sk_sleep(sk), wait);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -3033,7 +3037,7 @@ static const struct proto_ops msg_ops = {
.socketpair = tipc_socketpair,
.accept = sock_no_accept,
.getname = tipc_getname,
- .poll_mask = tipc_poll_mask,
+ .poll = tipc_poll,
.ioctl = tipc_ioctl,
.listen = sock_no_listen,
.shutdown = tipc_shutdown,
@@ -3054,7 +3058,7 @@ static const struct proto_ops packet_ops = {
.socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
- .poll_mask = tipc_poll_mask,
+ .poll = tipc_poll,
.ioctl = tipc_ioctl,
.listen = tipc_listen,
.shutdown = tipc_shutdown,
@@ -3075,7 +3079,7 @@ static const struct proto_ops stream_ops = {
.socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
- .poll_mask = tipc_poll_mask,
+ .poll = tipc_poll,
.ioctl = tipc_ioctl,
.listen = tipc_listen,
.shutdown = tipc_shutdown,
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index a127d61e8af9..301f22430469 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -712,7 +712,7 @@ static int __init tls_register(void)
build_protos(tls_prots[TLSV4], &tcp_prot);
tls_sw_proto_ops = inet_stream_ops;
- tls_sw_proto_ops.poll_mask = tls_sw_poll_mask;
+ tls_sw_proto_ops.poll = tls_sw_poll;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
#ifdef CONFIG_TLS_DEVICE
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index f127fac88acf..1f3d9789af30 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -440,7 +440,7 @@ alloc_encrypted:
ret = tls_push_record(sk, msg->msg_flags, record_type);
if (!ret)
continue;
- if (ret == -EAGAIN)
+ if (ret < 0)
goto send_end;
copied -= try_to_copy;
@@ -646,6 +646,9 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
return NULL;
}
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return NULL;
+
if (sock_flag(sk, SOCK_DONE))
return NULL;
@@ -701,6 +704,10 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
nsg = skb_to_sgvec(skb, &sgin[1],
rxm->offset + tls_ctx->rx.prepend_size,
rxm->full_len - tls_ctx->rx.prepend_size);
+ if (nsg < 0) {
+ ret = nsg;
+ goto out;
+ }
tls_make_aad(ctx->rx_aad_ciphertext,
rxm->full_len - tls_ctx->rx.overhead_size,
@@ -712,6 +719,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
rxm->full_len - tls_ctx->rx.overhead_size,
skb, sk->sk_allocation);
+out:
if (sgin != &sgin_arr[0])
kfree(sgin);
@@ -919,22 +927,23 @@ splice_read_end:
return copied ? : err;
}
-__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events)
+unsigned int tls_sw_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait)
{
+ unsigned int ret;
struct sock *sk = sock->sk;
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- __poll_t mask;
- /* Grab EPOLLOUT and EPOLLHUP from the underlying socket */
- mask = ctx->sk_poll_mask(sock, events);
+ /* Grab POLLOUT and POLLHUP from the underlying socket */
+ ret = ctx->sk_poll(file, sock, wait);
- /* Clear EPOLLIN bits, and set based on recv_pkt */
- mask &= ~(EPOLLIN | EPOLLRDNORM);
+ /* Clear POLLIN bits, and set based on recv_pkt */
+ ret &= ~(POLLIN | POLLRDNORM);
if (ctx->recv_pkt)
- mask |= EPOLLIN | EPOLLRDNORM;
+ ret |= POLLIN | POLLRDNORM;
- return mask;
+ return ret;
}
static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
@@ -1191,7 +1200,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
sk->sk_data_ready = tls_data_ready;
write_unlock_bh(&sk->sk_callback_lock);
- sw_ctx_rx->sk_poll_mask = sk->sk_socket->ops->poll_mask;
+ sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll;
strp_check_rcv(&sw_ctx_rx->strp);
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 95b02a71fd47..e5473c03d667 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -638,8 +638,9 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
static int unix_getname(struct socket *, struct sockaddr *, int);
-static __poll_t unix_poll_mask(struct socket *, __poll_t);
-static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t);
+static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
+static __poll_t unix_dgram_poll(struct file *, struct socket *,
+ poll_table *);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -680,7 +681,7 @@ static const struct proto_ops unix_stream_ops = {
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
- .poll_mask = unix_poll_mask,
+ .poll = unix_poll,
.ioctl = unix_ioctl,
.listen = unix_listen,
.shutdown = unix_shutdown,
@@ -703,7 +704,7 @@ static const struct proto_ops unix_dgram_ops = {
.socketpair = unix_socketpair,
.accept = sock_no_accept,
.getname = unix_getname,
- .poll_mask = unix_dgram_poll_mask,
+ .poll = unix_dgram_poll,
.ioctl = unix_ioctl,
.listen = sock_no_listen,
.shutdown = unix_shutdown,
@@ -725,7 +726,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
- .poll_mask = unix_dgram_poll_mask,
+ .poll = unix_dgram_poll,
.ioctl = unix_ioctl,
.listen = unix_listen,
.shutdown = unix_shutdown,
@@ -2629,10 +2630,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return err;
}
-static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
- __poll_t mask = 0;
+ __poll_t mask;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
/* exceptional events? */
if (sk->sk_err)
@@ -2661,11 +2665,15 @@ static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
return mask;
}
-static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
struct sock *sk = sock->sk, *other;
- int writable;
- __poll_t mask = 0;
+ unsigned int writable;
+ __poll_t mask;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -2691,7 +2699,7 @@ static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
}
/* No write status requested, avoid expensive OUT tests. */
- if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
+ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
return mask;
writable = unix_writable(sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index bb5d5fa68c35..c1076c19b858 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,11 +850,18 @@ static int vsock_shutdown(struct socket *sock, int mode)
return err;
}
-static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events)
+static __poll_t vsock_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
{
- struct sock *sk = sock->sk;
- struct vsock_sock *vsk = vsock_sk(sk);
- __poll_t mask = 0;
+ struct sock *sk;
+ __poll_t mask;
+ struct vsock_sock *vsk;
+
+ sk = sock->sk;
+ vsk = vsock_sk(sk);
+
+ poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
if (sk->sk_err)
/* Signify that there has been an error on this socket. */
@@ -1084,7 +1091,7 @@ static const struct proto_ops vsock_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = vsock_getname,
- .poll_mask = vsock_poll_mask,
+ .poll = vsock_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
@@ -1842,7 +1849,7 @@ static const struct proto_ops vsock_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = vsock_accept,
.getname = vsock_getname,
- .poll_mask = vsock_poll_mask,
+ .poll = vsock_poll,
.ioctl = sock_no_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 8e03bd3f3668..5d3cce9e8744 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -201,7 +201,7 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
return -ENODEV;
}
- if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
+ if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
return virtio_transport_send_pkt_loopback(vsock, pkt);
if (pkt->reply)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c7bbe5f0aae8..80bc986c79e5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4409,6 +4409,7 @@ static int parse_station_flags(struct genl_info *info,
params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) |
BIT(NL80211_STA_FLAG_MFP) |
BIT(NL80211_STA_FLAG_AUTHORIZED);
+ break;
default:
return -EINVAL;
}
@@ -6231,7 +6232,7 @@ do { \
nl80211_check_s32);
/*
* Check HT operation mode based on
- * IEEE 802.11 2012 8.4.2.59 HT Operation element.
+ * IEEE 802.11-2016 9.4.2.57 HT Operation element.
*/
if (tb[NL80211_MESHCONF_HT_OPMODE]) {
ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
@@ -6241,22 +6242,9 @@ do { \
IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
return -EINVAL;
- if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) &&
- (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
- return -EINVAL;
+ /* NON_HT_STA bit is reserved, but some programs set it */
+ ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
- switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
- case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
- case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
- if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
- return -EINVAL;
- break;
- case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
- case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
- if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
- return -EINVAL;
- break;
- }
cfg->ht_opmode = ht_opmode;
mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1));
}
@@ -10962,9 +10950,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
rem) {
u8 *mask_pat;
- nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
- nl80211_packet_pattern_policy,
- info->extack);
+ err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+ nl80211_packet_pattern_policy,
+ info->extack);
+ if (err)
+ goto error;
+
err = -EINVAL;
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
@@ -11213,8 +11204,11 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
rem) {
u8 *mask_pat;
- nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
- nl80211_packet_pattern_policy, NULL);
+ err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+ nl80211_packet_pattern_policy, NULL);
+ if (err)
+ return err;
+
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
return -EINVAL;
@@ -14930,20 +14924,24 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
static int __nl80211_rx_control_port(struct net_device *dev,
- const u8 *buf, size_t len,
- const u8 *addr, u16 proto,
+ struct sk_buff *skb,
bool unencrypted, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct ethhdr *ehdr = eth_hdr(skb);
+ const u8 *addr = ehdr->h_source;
+ u16 proto = be16_to_cpu(skb->protocol);
struct sk_buff *msg;
void *hdr;
+ struct nlattr *frame;
+
u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid);
if (!nlportid)
return -ENOENT;
- msg = nlmsg_new(100 + len, gfp);
+ msg = nlmsg_new(100 + skb->len, gfp);
if (!msg)
return -ENOMEM;
@@ -14957,13 +14955,17 @@ static int __nl80211_rx_control_port(struct net_device *dev,
nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
NL80211_ATTR_PAD) ||
- nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) ||
(unencrypted && nla_put_flag(msg,
NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT)))
goto nla_put_failure;
+ frame = nla_reserve(msg, NL80211_ATTR_FRAME, skb->len);
+ if (!frame)
+ goto nla_put_failure;
+
+ skb_copy_bits(skb, 0, nla_data(frame), skb->len);
genlmsg_end(msg, hdr);
return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
@@ -14974,14 +14976,12 @@ static int __nl80211_rx_control_port(struct net_device *dev,
}
bool cfg80211_rx_control_port(struct net_device *dev,
- const u8 *buf, size_t len,
- const u8 *addr, u16 proto, bool unencrypted)
+ struct sk_buff *skb, bool unencrypted)
{
int ret;
- trace_cfg80211_rx_control_port(dev, buf, len, addr, proto, unencrypted);
- ret = __nl80211_rx_control_port(dev, buf, len, addr, proto,
- unencrypted, GFP_ATOMIC);
+ trace_cfg80211_rx_control_port(dev, skb, unencrypted);
+ ret = __nl80211_rx_control_port(dev, skb, unencrypted, GFP_ATOMIC);
trace_cfg80211_return_bool(ret == 0);
return ret == 0;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bbe6298e4bb9..4fc66a117b7d 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2240,7 +2240,9 @@ static void wiphy_update_regulatory(struct wiphy *wiphy,
* as some drivers used this to restore its orig_* reg domain.
*/
if (initiator == NL80211_REGDOM_SET_BY_CORE &&
- wiphy->regulatory_flags & REGULATORY_CUSTOM_REG)
+ wiphy->regulatory_flags & REGULATORY_CUSTOM_REG &&
+ !(wiphy->regulatory_flags &
+ REGULATORY_WIPHY_SELF_MANAGED))
reg_call_notifier(wiphy, lr);
return;
}
@@ -2787,26 +2789,6 @@ static void notify_self_managed_wiphys(struct regulatory_request *request)
}
}
-static bool reg_only_self_managed_wiphys(void)
-{
- struct cfg80211_registered_device *rdev;
- struct wiphy *wiphy;
- bool self_managed_found = false;
-
- ASSERT_RTNL();
-
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
- wiphy = &rdev->wiphy;
- if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
- self_managed_found = true;
- else
- return false;
- }
-
- /* make sure at least one self-managed wiphy exists */
- return self_managed_found;
-}
-
/*
* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_*
* Regulatory hints come on a first come first serve basis and we
@@ -2839,10 +2821,6 @@ static void reg_process_pending_hints(void)
spin_unlock(&reg_requests_lock);
notify_self_managed_wiphys(reg_request);
- if (reg_only_self_managed_wiphys()) {
- reg_free_request(reg_request);
- return;
- }
reg_process_hint(reg_request);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 2b417a2fe63f..7c73510b161f 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2627,23 +2627,25 @@ TRACE_EVENT(cfg80211_mgmt_tx_status,
);
TRACE_EVENT(cfg80211_rx_control_port,
- TP_PROTO(struct net_device *netdev, const u8 *buf, size_t len,
- const u8 *addr, u16 proto, bool unencrypted),
- TP_ARGS(netdev, buf, len, addr, proto, unencrypted),
+ TP_PROTO(struct net_device *netdev, struct sk_buff *skb,
+ bool unencrypted),
+ TP_ARGS(netdev, skb, unencrypted),
TP_STRUCT__entry(
NETDEV_ENTRY
- MAC_ENTRY(addr)
+ __field(int, len)
+ MAC_ENTRY(from)
__field(u16, proto)
__field(bool, unencrypted)
),
TP_fast_assign(
NETDEV_ASSIGN;
- MAC_ASSIGN(addr, addr);
- __entry->proto = proto;
+ __entry->len = skb->len;
+ MAC_ASSIGN(from, eth_hdr(skb)->h_source);
+ __entry->proto = be16_to_cpu(skb->protocol);
__entry->unencrypted = unencrypted;
),
- TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT " proto: 0x%x, unencrypted: %s",
- NETDEV_PR_ARG, MAC_PR_ARG(addr),
+ TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s",
+ NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from),
__entry->proto, BOOL_TO_STR(__entry->unencrypted))
);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f93365ae0fdd..d49aa79b7997 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1750,7 +1750,7 @@ static const struct proto_ops x25_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = x25_accept,
.getname = x25_getname,
- .poll_mask = datagram_poll_mask,
+ .poll = datagram_poll,
.ioctl = x25_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_x25_ioctl,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3b3410ada097..72335c2e8108 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb)
{
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk);
+ unsigned long flags;
+ spin_lock_irqsave(&xs->tx_completion_lock, flags);
WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
+ spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb);
}
@@ -215,9 +218,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
struct sk_buff *skb;
int err = 0;
- if (unlikely(!xs->tx))
- return -ENOBUFS;
-
mutex_lock(&xs->mutex);
while (xskq_peek_desc(xs->tx, &desc)) {
@@ -230,22 +230,13 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
- if (xskq_reserve_addr(xs->umem->cq)) {
- err = -EAGAIN;
- goto out;
- }
-
- len = desc.len;
- if (unlikely(len > xs->dev->mtu)) {
- err = -EMSGSIZE;
+ if (xskq_reserve_addr(xs->umem->cq))
goto out;
- }
- if (xs->queue_id >= xs->dev->real_num_tx_queues) {
- err = -ENXIO;
+ if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- }
+ len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
@@ -268,15 +259,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
+ xskq_discard_desc(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
- err = -EAGAIN;
- /* SKB consumed by dev_direct_xmit() */
+ /* SKB completed but not sent */
+ err = -EBUSY;
goto out;
}
sent_frame = true;
- xskq_discard_desc(xs->tx);
}
out:
@@ -297,15 +288,18 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
+ if (unlikely(!xs->tx))
+ return -ENOBUFS;
if (need_wait)
return -EOPNOTSUPP;
return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
}
-static __poll_t xsk_poll_mask(struct socket *sock, __poll_t events)
+static unsigned int xsk_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait)
{
- __poll_t mask = datagram_poll_mask(sock, events);
+ unsigned int mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
@@ -696,7 +690,7 @@ static const struct proto_ops xsk_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll_mask = xsk_poll_mask,
+ .poll = xsk_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -754,6 +748,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs = xdp_sk(sk);
mutex_init(&xs->mutex);
+ spin_lock_init(&xs->tx_completion_lock);
local_bh_disable();
sock_prot_inuse_add(net, &xsk_proto, 1);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index ef6a6f0ec949..52ecaf770642 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
return (entries > dcnt) ? dcnt : entries;
}
-static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
-{
- return q->nentries - (producer - q->cons_tail);
-}
-
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
{
- u32 free_entries = xskq_nb_free_lazy(q, producer);
+ u32 free_entries = q->nentries - (producer - q->cons_tail);
if (free_entries >= dcnt)
return free_entries;
@@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
+ if (xskq_nb_free(q, q->prod_tail, 1) == 0)
return -ENOSPC;
ring->desc[q->prod_tail++ & q->ring_mask] = addr;