summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c1
-rw-r--r--net/atm/mpc.c1
-rw-r--r--net/bluetooth/hci_core.c7
-rw-r--r--net/bluetooth/hci_event.c13
-rw-r--r--net/bluetooth/hci_sync.c7
-rw-r--r--net/bluetooth/l2cap_core.c8
-rw-r--r--net/bluetooth/mgmt.c4
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c96
-rw-r--r--net/bridge/br_switchdev.c84
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c30
-rw-r--r--net/can/j1939/j1939-priv.h3
-rw-r--r--net/can/j1939/main.c2
-rw-r--r--net/can/j1939/socket.c46
-rw-r--r--net/ceph/messenger_v1.c33
-rw-r--r--net/ceph/messenger_v2.c7
-rw-r--r--net/ceph/osd_client.c27
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c27
-rw-r--r--net/core/page_pool_user.c3
-rw-r--r--net/core/rtnetlink.c30
-rw-r--r--net/core/skmsg.c7
-rw-r--r--net/core/sock.c23
-rw-r--r--net/devlink/core.c24
-rw-r--r--net/devlink/port.c2
-rw-r--r--net/handshake/handshake-test.c5
-rw-r--r--net/hsr/hsr_forward.c4
-rw-r--r--net/ipv4/af_inet.c6
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c21
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/inet_hashtables.c25
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv4/ip_output.c13
-rw-r--r--net/ipv4/ip_tunnel.c29
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c1
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tunnel4.c1
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udp_tunnel_core.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c1
-rw-r--r--net/ipv6/addrconf.c28
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/esp6.c1
-rw-r--r--net/ipv6/exthdrs.c10
-rw-r--r--net/ipv6/ip6_output.c13
-rw-r--r--net/ipv6/ip6_udp_tunnel.c1
-rw-r--r--net/ipv6/mip6.c1
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/seg6.c20
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/tunnel6.c1
-rw-r--r--net/ipv6/xfrm6_tunnel.c1
-rw-r--r--net/iucv/iucv.c4
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/mac80211/cfg.c16
-rw-r--r--net/mac80211/debugfs_netdev.c4
-rw-r--r--net/mac80211/debugfs_netdev.h5
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/mlme.c114
-rw-r--r--net/mac80211/rate.c3
-rw-r--r--net/mac80211/scan.c30
-rw-r--r--net/mac80211/tx.c12
-rw-r--r--net/mac80211/wbrf.c2
-rw-r--r--net/mctp/route.c12
-rw-r--r--net/mptcp/diag.c11
-rw-r--r--net/mptcp/fastopen.c6
-rw-r--r--net/mptcp/options.c11
-rw-r--r--net/mptcp/pm_netlink.c69
-rw-r--r--net/mptcp/pm_userspace.c38
-rw-r--r--net/mptcp/protocol.c85
-rw-r--r--net/mptcp/protocol.h52
-rw-r--r--net/mptcp/subflow.c86
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/nf_conntrack_core.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c12
-rw-r--r--net/netfilter/nf_flow_table_core.c17
-rw-r--r--net/netfilter/nf_nat_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c92
-rw-r--r--net/netfilter/nfnetlink_queue.c13
-rw-r--r--net/netfilter/nft_compat.c37
-rw-r--r--net/netfilter/nft_ct.c14
-rw-r--r--net/netfilter/nft_flow_offload.c1
-rw-r--r--net/netfilter/nft_set_hash.c8
-rw-r--r--net/netfilter/nft_set_pipapo.c128
-rw-r--r--net/netfilter/nft_set_pipapo.h22
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c19
-rw-r--r--net/netfilter/nft_set_rbtree.c17
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netrom/af_netrom.c14
-rw-r--r--net/netrom/nr_dev.c2
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/netrom/nr_out.c2
-rw-r--r--net/netrom/nr_route.c8
-rw-r--r--net/netrom/nr_subr.c5
-rw-r--r--net/openvswitch/flow_netlink.c49
-rw-r--r--net/phonet/datagram.c4
-rw-r--r--net/phonet/pep.c41
-rw-r--r--net/rds/rdma.c3
-rw-r--r--net/rds/recv.c13
-rw-r--r--net/rds/send.c6
-rw-r--r--net/rxrpc/ar-internal.h37
-rw-r--r--net/rxrpc/call_event.c12
-rw-r--r--net/rxrpc/call_object.c1
-rw-r--r--net/rxrpc/conn_event.c10
-rw-r--r--net/rxrpc/input.c115
-rw-r--r--net/rxrpc/output.c8
-rw-r--r--net/rxrpc/proc.c2
-rw-r--r--net/rxrpc/rxkad.c4
-rw-r--r--net/sched/act_mirred.c38
-rw-r--r--net/sched/cls_flower.c5
-rw-r--r--net/sched/em_canid.c1
-rw-r--r--net/sched/em_cmp.c1
-rw-r--r--net/sched/em_meta.c1
-rw-r--r--net/sched/em_nbyte.c1
-rw-r--r--net/sched/em_text.c1
-rw-r--r--net/sched/em_u32.c1
-rw-r--r--net/sctp/inqueue.c14
-rw-r--r--net/smc/af_smc.c1
-rw-r--r--net/switchdev/switchdev.c73
-rw-r--r--net/tipc/bearer.c6
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c193
-rw-r--r--net/unix/af_unix.c19
-rw-r--r--net/unix/garbage.c10
-rw-r--r--net/wireless/core.c3
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/scan.c59
-rw-r--r--net/xdp/xsk.c3
-rw-r--r--net/xfrm/xfrm_algo.c1
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_output.c6
-rw-r--r--net/xfrm/xfrm_policy.c6
-rw-r--r--net/xfrm/xfrm_user.c4
140 files changed, 1684 insertions, 788 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 7b3341cef926..850d4a185f55 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -179,4 +179,5 @@ static void __exit lowpan_module_exit(void)
module_init(lowpan_module_init);
module_exit(lowpan_module_exit);
+MODULE_DESCRIPTION("IPv6 over Low-Power Wireless Personal Area Network core module");
MODULE_LICENSE("GPL");
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 033871e718a3..324e3ab96bb3 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1532,4 +1532,5 @@ static void __exit atm_mpoa_cleanup(void)
module_init(atm_mpoa_init);
module_exit(atm_mpoa_cleanup);
+MODULE_DESCRIPTION("Multi-Protocol Over ATM (MPOA) driver");
MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 65601aa52e0d..2821a42cefdc 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
+ hci_dev_hold(hdev);
BT_DBG("%s", hdev->name);
if (hdev->hw_error)
@@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work)
else
bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
- if (hci_dev_do_close(hdev))
- return;
+ if (!hci_dev_do_close(hdev))
+ hci_dev_do_open(hdev);
- hci_dev_do_open(hdev);
+ hci_dev_put(hdev);
}
void hci_uuids_clear(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ef8c3bed7361..2a5f5a7d2412 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5329,9 +5329,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn || !hci_conn_ssp_enabled(conn))
+ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
goto unlock;
+ /* Assume remote supports SSP since it has triggered this event */
+ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+
hci_conn_hold(conn);
if (!hci_dev_test_flag(hdev, HCI_MGMT))
@@ -6794,6 +6797,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_UNKNOWN_CONN_ID);
+ if (max > hcon->le_conn_max_interval)
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+
if (hci_check_conn_params(min, max, latency, timeout))
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_INVALID_LL_PARAMS);
@@ -7420,10 +7427,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
* keep track of the bdaddr of the connection event that woke us up.
*/
if (event == HCI_EV_CONN_REQUEST) {
- bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_request->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_CONN_COMPLETE) {
- bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_LE_META) {
struct hci_ev_le_meta *le_ev = (void *)skb->data;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index a6fc8a2a5c67..5716345a26df 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -2206,8 +2206,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
/* During suspend, only wakeable devices can be in acceptlist */
if (hdev->suspended &&
- !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) {
+ hci_le_del_accept_list_sync(hdev, &params->addr,
+ params->addr_type);
return 0;
+ }
/* Select filter policy to accept all advertising */
if (*num_entries >= hdev->le_accept_list_size)
@@ -5559,7 +5562,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
bt_dev_dbg(hdev, "");
- if (hci_dev_test_flag(hdev, HCI_INQUIRY))
+ if (test_bit(HCI_INQUIRY, &hdev->flags))
return 0;
hci_dev_lock(hdev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 60298975d5c4..656f49b299d2 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5613,7 +5613,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
memset(&rsp, 0, sizeof(rsp));
- err = hci_check_conn_params(min, max, latency, to_multiplier);
+ if (max > hcon->le_conn_max_interval) {
+ BT_DBG("requested connection interval exceeds current bounds.");
+ err = -EINVAL;
+ } else {
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
+ }
+
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index bb72ff6eb22f..ee3b4aad8bd8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1045,6 +1045,8 @@ static void rpa_expired(struct work_struct *work)
hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL);
}
+static int set_discoverable_sync(struct hci_dev *hdev, void *data);
+
static void discov_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -1063,7 +1065,7 @@ static void discov_off(struct work_struct *work)
hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
hdev->discov_timeout = 0;
- hci_update_discoverable(hdev);
+ hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL);
mgmt_new_settings(hdev);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 053ef8f25fae..1d34d8497033 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1941,7 +1941,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
/* Get data directly from socket receive queue without copying it. */
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
- if (!skb_linearize(skb)) {
+ if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) {
s = rfcomm_recv_frame(s, skb);
if (!s)
break;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index ed1720890757..35e10c5a766d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -43,6 +43,10 @@
#include <linux/sysctl.h>
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
static unsigned int brnf_net_id __read_mostly;
struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_STOLEN;
}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
+ * the same nf_conn entry, which will happen for multicast (broadcast)
+ * Frames on bridges.
+ *
+ * Example:
+ * macvlan0
+ * br0
+ * ethX ethY
+ *
+ * ethX (or Y) receives multicast or broadcast packet containing
+ * an IP packet, not yet in conntrack table.
+ *
+ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+ * -> skb->_nfct now references a unconfirmed entry
+ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+ * interface.
+ * 3. skb gets passed up the stack.
+ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+ * and schedules a work queue to send them out on the lower devices.
+ *
+ * The clone skb->_nfct is not a copy, it is the same entry as the
+ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
+ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+ *
+ * The Macvlan broadcast worker and normal confirm path will race.
+ *
+ * This race will not happen if step 2 already confirmed a clone. In that
+ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
+ * hash table). This works fine.
+ *
+ * But such confirmation won't happen when eb/ip/nftables rules dropped the
+ * packets before they reached the nf_confirm step in postrouting.
+ *
+ * Work around this problem by explicit confirmation of the entry at
+ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
+ * entry.
+ *
+ */
+static unsigned int br_nf_local_in(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conntrack *nfct = skb_nfct(skb);
+ const struct nf_ct_hook *ct_hook;
+ struct nf_conn *ct;
+ int ret;
+
+ if (!nfct || skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ if (likely(nf_ct_is_confirmed(ct)))
+ return NF_ACCEPT;
+
+ WARN_ON_ONCE(skb_shared(skb));
+ WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+
+ /* We can't call nf_confirm here, it would create a dependency
+ * on nf_conntrack module.
+ */
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (!ct_hook) {
+ skb->_nfct = 0ul;
+ nf_conntrack_put(nfct);
+ return NF_ACCEPT;
+ }
+
+ nf_bridge_pull_encap_header(skb);
+ ret = ct_hook->confirm(skb);
+ switch (ret & NF_VERDICT_MASK) {
+ case NF_STOLEN:
+ return NF_STOLEN;
+ default:
+ nf_bridge_push_encap_header(skb);
+ break;
+ }
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
+
+ return ret;
+}
+#endif
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ {
+ .hook = br_nf_local_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_BR_PRI_LAST,
+ },
+#endif
{
.hook = br_nf_forward,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ee84e783e1df..7b41ee8740cb 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
}
static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+ struct net_device *dev,
+ unsigned long action,
enum switchdev_obj_id id,
const struct net_bridge_mdb_entry *mp,
struct net_device *orig_dev)
{
- struct switchdev_obj_port_mdb *mdb;
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = id,
+ .orig_dev = orig_dev,
+ },
+ };
+ struct switchdev_obj_port_mdb *pmdb;
- mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
+ br_switchdev_mdb_populate(&mdb, mp);
+
+ if (action == SWITCHDEV_PORT_OBJ_ADD &&
+ switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) {
+ /* This event is already in the deferred queue of
+ * events, so this replay must be elided, lest the
+ * driver receives duplicate events for it. This can
+ * only happen when replaying additions, since
+ * modifications are always immediately visible in
+ * br->mdb_list, whereas actual event delivery may be
+ * delayed.
+ */
+ return 0;
+ }
- mdb->obj.id = id;
- mdb->obj.orig_dev = orig_dev;
- br_switchdev_mdb_populate(mdb, mp);
- list_add_tail(&mdb->obj.list, mdb_list);
+ pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC);
+ if (!pmdb)
+ return -ENOMEM;
+ list_add_tail(&pmdb->obj.list, mdb_list);
return 0;
}
@@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
- /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
- * because the write-side protection is br->multicast_lock. But we
- * need to emulate the [ blocking ] calling context of a regular
- * switchdev event, so since both br->multicast_lock and RCU read side
- * critical sections are atomic, we have no choice but to pick the RCU
- * read side lock, queue up all our events, leave the critical section
- * and notify switchdev from blocking context.
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ /* br_switchdev_mdb_queue_one() will take care to not queue a
+ * replay of an event that is already pending in the switchdev
+ * deferred queue. In order to safely determine that, there
+ * must be no new deferred MDB notifications enqueued for the
+ * duration of the MDB scan. Therefore, grab the write-side
+ * lock to avoid racing with any concurrent IGMP/MLD snooping.
*/
- rcu_read_lock();
+ spin_lock_bh(&br->multicast_lock);
- hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+ hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group __rcu * const *pp;
const struct net_bridge_port_group *p;
if (mp->host_joined) {
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_HOST_MDB,
mp, br_dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
- for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->key.port->dev != dev)
continue;
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_PORT_MDB,
mp, dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
}
- rcu_read_unlock();
-
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
+ spin_unlock_bh(&br->multicast_lock);
list_for_each_entry(obj, &mdb_list, list) {
err = br_switchdev_mdb_replay_one(nb, dev,
@@ -786,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
+
+ /* Make sure that the device leaving this bridge has seen all
+ * relevant events before it is disassociated. In the normal
+ * case, when the device is directly attached to the bridge,
+ * this is covered by del_nbp(). If the association was indirect
+ * however, e.g. via a team or bond, and the device is leaving
+ * that intermediate device, then the bridge port remains in
+ * place.
+ */
+ switchdev_deferred_process();
}
/* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index abb090f94ed2..6f877e31709b 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
return nf_conntrack_in(skb, &bridge_state);
}
+static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ if (skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ /* nf_conntrack_confirm() cannot handle concurrent clones,
+ * this happens for broad/multicast frames with e.g. macvlan on top
+ * of the bridge device.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ return NF_ACCEPT;
+
+ /* let inet prerouting call conntrack again */
+ skb->_nfct = 0;
+ nf_ct_put(ct);
+
+ return NF_ACCEPT;
+}
+
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
struct nf_bridge_frag_data *data)
{
@@ -386,6 +410,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
.priority = NF_IP_PRI_CONNTRACK,
},
{
+ .hook = nf_ct_bridge_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+ {
.hook = nf_ct_bridge_post,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 16af1a7f80f6..31a93cae5111 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -86,7 +86,7 @@ struct j1939_priv {
unsigned int tp_max_packet_size;
/* lock for j1939_socks list */
- spinlock_t j1939_socks_lock;
+ rwlock_t j1939_socks_lock;
struct list_head j1939_socks;
struct kref rx_kref;
@@ -301,6 +301,7 @@ struct j1939_sock {
int ifindex;
struct j1939_addr addr;
+ spinlock_t filters_lock;
struct j1939_filter *filters;
int nfilters;
pgn_t pgn_rx_filter;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index ecff1c947d68..a6fb89fa6278 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
return ERR_PTR(-ENOMEM);
j1939_tp_init(priv);
- spin_lock_init(&priv->j1939_socks_lock);
+ rwlock_init(&priv->j1939_socks_lock);
INIT_LIST_HEAD(&priv->j1939_socks);
mutex_lock(&j1939_netdev_lock);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 14c431663233..305dd72c844c 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
jsk->state |= J1939_SOCK_BOUND;
j1939_priv_get(priv);
- spin_lock_bh(&priv->j1939_socks_lock);
+ write_lock_bh(&priv->j1939_socks_lock);
list_add_tail(&jsk->list, &priv->j1939_socks);
- spin_unlock_bh(&priv->j1939_socks_lock);
+ write_unlock_bh(&priv->j1939_socks_lock);
}
static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
{
- spin_lock_bh(&priv->j1939_socks_lock);
+ write_lock_bh(&priv->j1939_socks_lock);
list_del_init(&jsk->list);
- spin_unlock_bh(&priv->j1939_socks_lock);
+ write_unlock_bh(&priv->j1939_socks_lock);
j1939_priv_put(priv);
jsk->state &= ~J1939_SOCK_BOUND;
@@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk,
static bool j1939_sk_match_filter(struct j1939_sock *jsk,
const struct j1939_sk_buff_cb *skcb)
{
- const struct j1939_filter *f = jsk->filters;
- int nfilter = jsk->nfilters;
+ const struct j1939_filter *f;
+ int nfilter;
+
+ spin_lock_bh(&jsk->filters_lock);
+
+ f = jsk->filters;
+ nfilter = jsk->nfilters;
if (!nfilter)
/* receive all when no filters are assigned */
- return true;
+ goto filter_match_found;
for (; nfilter; ++f, --nfilter) {
if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
@@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk,
continue;
if ((skcb->addr.src_name & f->name_mask) != f->name)
continue;
- return true;
+ goto filter_match_found;
}
+
+ spin_unlock_bh(&jsk->filters_lock);
return false;
+
+filter_match_found:
+ spin_unlock_bh(&jsk->filters_lock);
+ return true;
}
static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
@@ -329,13 +340,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
struct j1939_sock *jsk;
bool match = false;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
match = j1939_sk_recv_match_one(jsk, skcb);
if (match)
break;
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
return match;
}
@@ -344,11 +355,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
{
struct j1939_sock *jsk;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
j1939_sk_recv_one(jsk, skb);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
}
static void j1939_sk_sock_destruct(struct sock *sk)
@@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk)
atomic_set(&jsk->skb_pending, 0);
spin_lock_init(&jsk->sk_session_queue_lock);
INIT_LIST_HEAD(&jsk->sk_session_queue);
+ spin_lock_init(&jsk->filters_lock);
/* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */
sock_set_flag(sk, SOCK_RCU_FREE);
@@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
}
lock_sock(&jsk->sk);
+ spin_lock_bh(&jsk->filters_lock);
ofilters = jsk->filters;
jsk->filters = filters;
jsk->nfilters = count;
+ spin_unlock_bh(&jsk->filters_lock);
release_sock(&jsk->sk);
kfree(ofilters);
return 0;
@@ -1080,12 +1094,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
}
/* spread RX notifications to all sockets subscribed to this session */
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
if (j1939_sk_recv_match_one(jsk, &session->skcb))
__j1939_sk_errqueue(session, &jsk->sk, type);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
};
void j1939_sk_send_loop_abort(struct sock *sk, int err)
@@ -1273,7 +1287,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
struct j1939_sock *jsk;
int error_code = ENETDOWN;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
jsk->sk.sk_err = error_code;
if (!sock_flag(&jsk->sk, SOCK_DEAD))
@@ -1281,7 +1295,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
j1939_sk_queue_drop_all(priv, jsk, error_code);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
}
static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index f9a50d7f0d20..0cb61c76b9b8 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
/* Initialize data cursor if it's not a sparse read */
- if (!msg->sparse_read)
- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+ u64 len = msg->sparse_read_total ? : data_len;
+
+ ceph_msg_data_cursor_init(&msg->cursor, msg, len);
}
/*
@@ -991,7 +992,7 @@ static inline int read_partial_message_section(struct ceph_connection *con,
return read_partial_message_chunk(con, section, sec_len, crc);
}
-static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
@@ -1026,7 +1027,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
return 1;
}
-static int read_sparse_msg_data(struct ceph_connection *con)
+static int read_partial_sparse_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
@@ -1036,31 +1037,31 @@ static int read_sparse_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
- do {
+ while (cursor->total_resid) {
if (con->v1.in_sr_kvec.iov_base)
ret = read_partial_message_chunk(con,
&con->v1.in_sr_kvec,
con->v1.in_sr_len,
&crc);
else if (cursor->sr_resid > 0)
- ret = read_sparse_msg_extent(con, &crc);
-
- if (ret <= 0) {
- if (do_datacrc)
- con->in_data_crc = crc;
- return ret;
- }
+ ret = read_partial_sparse_msg_extent(con, &crc);
+ if (ret <= 0)
+ break;
memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
ret = con->ops->sparse_read(con, cursor,
(char **)&con->v1.in_sr_kvec.iov_base);
+ if (ret <= 0) {
+ ret = ret ? ret : 1; /* must return > 0 to indicate success */
+ break;
+ }
con->v1.in_sr_len = ret;
- } while (ret > 0);
+ }
if (do_datacrc)
con->in_data_crc = crc;
- return ret < 0 ? ret : 1; /* must return > 0 to indicate success */
+ return ret;
}
static int read_partial_msg_data(struct ceph_connection *con)
@@ -1253,8 +1254,8 @@ static int read_partial_message(struct ceph_connection *con)
if (!m->num_data_items)
return -EIO;
- if (m->sparse_read)
- ret = read_sparse_msg_data(con);
+ if (m->sparse_read_total)
+ ret = read_partial_sparse_msg_data(con);
else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index f8ec60e1aba3..bd608ffa0627 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con)
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
struct page **pages = NULL;
- bool sparse = con->in_msg->sparse_read;
+ bool sparse = !!con->in_msg->sparse_read_total;
int dpos = 0;
int tail_len;
int ret;
@@ -2034,6 +2034,9 @@ static int prepare_sparse_read_data(struct ceph_connection *con)
if (!con_secure(con))
con->in_data_crc = -1;
+ ceph_msg_data_cursor_init(&con->v2.in_cursor, msg,
+ msg->sparse_read_total);
+
reset_in_kvecs(con);
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
con->v2.data_len_remain = data_len(msg);
@@ -2060,7 +2063,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
}
if (data_len(msg)) {
- if (msg->sparse_read)
+ if (msg->sparse_read_total)
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
else
con->v2.in_state = IN_S_PREPARE_READ_DATA;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 625622016f57..9d078b37fe0b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}
m = ceph_msg_get(req->r_reply);
- m->sparse_read = (bool)srlen;
+ m->sparse_read_total = srlen;
dout("get_reply tid %lld %p\n", tid, m);
@@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con,
}
if (o->o_sparse_op_idx < 0) {
- u64 srlen = sparse_data_requested(req);
-
- dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
- __func__, o->o_osd, srlen);
- ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+ dout("%s: [%d] starting new sparse read req\n",
+ __func__, o->o_osd);
} else {
u64 end;
@@ -5857,8 +5854,8 @@ static int osd_sparse_read(struct ceph_connection *con,
struct ceph_osd *o = con->private;
struct ceph_sparse_read *sr = &o->o_sparse_read;
u32 count = sr->sr_count;
- u64 eoff, elen;
- int ret;
+ u64 eoff, elen, len = 0;
+ int i, ret;
switch (sr->sr_state) {
case CEPH_SPARSE_READ_HDR:
@@ -5903,8 +5900,20 @@ next_op:
convert_extent_map(sr);
ret = sizeof(sr->sr_datalen);
*pbuf = (char *)&sr->sr_datalen;
- sr->sr_state = CEPH_SPARSE_READ_DATA;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_PRE;
break;
+ case CEPH_SPARSE_READ_DATA_PRE:
+ /* Convert sr_datalen to host-endian */
+ sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen);
+ for (i = 0; i < count; i++)
+ len += sr->sr_extent[i].len;
+ if (sr->sr_datalen != len) {
+ pr_warn_ratelimited("data len %u != extent len %llu\n",
+ sr->sr_datalen, len);
+ return -EREMOTEIO;
+ }
+ sr->sr_state = CEPH_SPARSE_READ_DATA;
+ fallthrough;
case CEPH_SPARSE_READ_DATA:
if (sr->sr_index >= count) {
sr->sr_state = CEPH_SPARSE_READ_HDR;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 103d46fa0eeb..a8b625abe242 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -751,7 +751,7 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
size_t len, void *from, void *priv2)
{
__wsum *csum = priv2;
- __wsum next = csum_partial_copy_nocheck(from, iter_to, len);
+ __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len);
*csum = csum_block_add(*csum, next, progress);
return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index cb2dab0feee0..76e6438f4858 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -336,7 +336,7 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
return -ENOMEM;
netdev_name_node_add(net, name_node);
/* The node that holds dev->name acts as a head of per-device list. */
- list_add_tail(&name_node->list, &dev->name_node->list);
+ list_add_tail_rcu(&name_node->list, &dev->name_node->list);
return 0;
}
@@ -9074,28 +9074,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
}
EXPORT_SYMBOL(netdev_port_same_parent_id);
-static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
-{
-#if IS_ENABLED(CONFIG_DPLL)
- rtnl_lock();
- dev->dpll_pin = dpll_pin;
- rtnl_unlock();
-#endif
-}
-
-void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
-{
- WARN_ON(!dpll_pin);
- netdev_dpll_pin_assign(dev, dpll_pin);
-}
-EXPORT_SYMBOL(netdev_dpll_pin_set);
-
-void netdev_dpll_pin_clear(struct net_device *dev)
-{
- netdev_dpll_pin_assign(dev, NULL);
-}
-EXPORT_SYMBOL(netdev_dpll_pin_clear);
-
/**
* dev_change_proto_down - set carrier according to proto_down.
*
@@ -11652,11 +11630,12 @@ static void __init net_dev_struct_check(void)
CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160);
/* TXRX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
- CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38);
/* RX read-mostly hotpath */
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index ffe5244e5597..278294aca66a 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -94,11 +94,12 @@ netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
state->pp_id = pool->user.id;
err = fill(skb, pool, info);
if (err)
- break;
+ goto out;
}
state->pp_id = 0;
}
+out:
mutex_unlock(&page_pools_lock);
rtnl_unlock();
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f6f29eb03ec2..bd50e9fe3234 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1020,14 +1020,17 @@ static size_t rtnl_xdp_size(void)
static size_t rtnl_prop_list_size(const struct net_device *dev)
{
struct netdev_name_node *name_node;
- size_t size;
+ unsigned int cnt = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(name_node, &dev->name_node->list, list)
+ cnt++;
+ rcu_read_unlock();
- if (list_empty(&dev->name_node->list))
+ if (!cnt)
return 0;
- size = nla_total_size(0);
- list_for_each_entry(name_node, &dev->name_node->list, list)
- size += nla_total_size(ALTIFNAMSIZ);
- return size;
+
+ return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ);
}
static size_t rtnl_proto_down_size(const struct net_device *dev)
@@ -1054,7 +1057,7 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev)
{
size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */
- size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev));
+ size += dpll_netdev_pin_handle_size(dev);
return size;
}
@@ -1789,7 +1792,7 @@ static int rtnl_fill_dpll_pin(struct sk_buff *skb,
if (!dpll_pin_nest)
return -EMSGSIZE;
- ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev));
+ ret = dpll_netdev_add_pin_handle(skb, dev);
if (ret < 0)
goto nest_cancel;
@@ -5166,10 +5169,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
- struct nlattr *br_spec, *attr = NULL;
+ struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
int rem, err = -EOPNOTSUPP;
u16 flags = 0;
- bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
return -EINVAL;
@@ -5187,11 +5189,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
- have_flags = true;
+ br_flags_attr = attr;
flags = nla_get_u16(attr);
}
@@ -5235,8 +5237,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- if (have_flags)
- memcpy(nla_data(attr), &flags, sizeof(flags));
+ if (br_flags_attr)
+ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
out:
return err;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 93ecfceac1bc..4d75ef9d24bf 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
- if (psock)
- psock->saved_data_ready(sk);
+ if (psock) {
+ read_lock_bh(&sk->sk_callback_lock);
+ sk_psock_data_ready(sk, psock);
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
rcu_read_unlock();
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 0a7f46c37f0c..5e78798456fd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1188,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
*/
WRITE_ONCE(sk->sk_txrehash, (u8)val);
return 0;
+ case SO_PEEK_OFF:
+ {
+ int (*set_peek_off)(struct sock *sk, int val);
+
+ set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+ if (set_peek_off)
+ ret = set_peek_off(sk, val);
+ else
+ ret = -EOPNOTSUPP;
+ return ret;
+ }
}
sockopt_lock_sock(sk);
@@ -1430,18 +1441,6 @@ set_sndbuf:
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
break;
- case SO_PEEK_OFF:
- {
- int (*set_peek_off)(struct sock *sk, int val);
-
- set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
- if (set_peek_off)
- ret = set_peek_off(sk, val);
- else
- ret = -EOPNOTSUPP;
- break;
- }
-
case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 4275a2bc6d8e..7f0b093208d7 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -46,7 +46,7 @@ struct devlink_rel {
u32 obj_index;
devlink_rel_notify_cb_t *notify_cb;
devlink_rel_cleanup_cb_t *cleanup_cb;
- struct work_struct notify_work;
+ struct delayed_work notify_work;
} nested_in;
};
@@ -70,7 +70,7 @@ static void __devlink_rel_put(struct devlink_rel *rel)
static void devlink_rel_nested_in_notify_work(struct work_struct *work)
{
struct devlink_rel *rel = container_of(work, struct devlink_rel,
- nested_in.notify_work);
+ nested_in.notify_work.work);
struct devlink *devlink;
devlink = devlinks_xa_get(rel->nested_in.devlink_index);
@@ -96,13 +96,13 @@ rel_put:
return;
reschedule_work:
- schedule_work(&rel->nested_in.notify_work);
+ schedule_delayed_work(&rel->nested_in.notify_work, 1);
}
static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel)
{
__devlink_rel_get(rel);
- schedule_work(&rel->nested_in.notify_work);
+ schedule_delayed_work(&rel->nested_in.notify_work, 0);
}
static struct devlink_rel *devlink_rel_alloc(void)
@@ -123,8 +123,8 @@ static struct devlink_rel *devlink_rel_alloc(void)
}
refcount_set(&rel->refcount, 1);
- INIT_WORK(&rel->nested_in.notify_work,
- &devlink_rel_nested_in_notify_work);
+ INIT_DELAYED_WORK(&rel->nested_in.notify_work,
+ &devlink_rel_nested_in_notify_work);
return rel;
}
@@ -529,14 +529,20 @@ static int __init devlink_init(void)
{
int err;
- err = genl_register_family(&devlink_nl_family);
- if (err)
- goto out;
err = register_pernet_subsys(&devlink_pernet_ops);
if (err)
goto out;
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out_unreg_pernet_subsys;
err = register_netdevice_notifier(&devlink_port_netdevice_nb);
+ if (!err)
+ return 0;
+
+ genl_unregister_family(&devlink_nl_family);
+out_unreg_pernet_subsys:
+ unregister_pernet_subsys(&devlink_pernet_ops);
out:
WARN_ON(err);
return err;
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 78592912f657..4b2d46ccfe48 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -583,7 +583,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
err = devlink_nl_port_fill(msg, devlink_port,
- DEVLINK_CMD_NEW,
+ DEVLINK_CMD_PORT_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags,
cb->extack);
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 16ed7bfd29e4..34fd1d9b2db8 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -471,7 +471,10 @@ static void handshake_req_destroy_test1(struct kunit *test)
handshake_req_cancel(sock->sk);
/* Act */
- fput(filp);
+ /* Ensure the close/release/put process has run to
+ * completion before checking the result.
+ */
+ __fput_sync(filp);
/* Assert */
KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 80cdc6f6b34c..5d68cb181695 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
return false;
/* Get next tlv */
- total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
+ total_length += hsr_sup_tag->tlv.HSR_TLV_length;
if (!pskb_may_pull(skb, total_length))
return false;
skb_pull(skb, total_length);
@@ -435,7 +435,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
continue;
/* Don't send frame over port where it has been sent before.
- * Also fro SAN, this shouldn't be done.
+ * Also for SAN, this shouldn't be done.
*/
if (!frame->is_from_san &&
hsr_register_frame_out(port, frame->node_src,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4e635dd3d3c8..a5a820ee2026 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1628,10 +1628,12 @@ EXPORT_SYMBOL(inet_current_timestamp);
int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
- if (sk->sk_family == AF_INET)
+ unsigned int family = READ_ONCE(sk->sk_family);
+
+ if (family == AF_INET)
return ip_recv_error(sk, msg, len, addr_len);
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
+ if (family == AF_INET6)
return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
#endif
return -EINVAL;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a2e6e1fdf82b..64aec3dff8ec 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -597,5 +597,6 @@ static void __exit ah4_fini(void)
module_init(ah4_init);
module_exit(ah4_fini);
+MODULE_DESCRIPTION("IPv4 AH transformation library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9456f5bb35e5..0d0d725b46ad 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
if (neigh) {
if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
r->arp_flags = arp_state_to_flags(neigh);
read_unlock_bh(&neigh->lock);
r->arp_ha.sa_family = dev->type;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ca0ff15dc8fa..bc74f131fe4d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1825,6 +1825,21 @@ done:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
@@ -1876,8 +1891,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
head = &tgt_net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
- tgt_net->dev_base_seq;
+ cb->seq = inet_base_seq(tgt_net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -2278,8 +2292,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4ccfc104f13a..4dd9e5040672 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1247,5 +1247,6 @@ static void __exit esp4_fini(void)
module_init(esp4_init);
module_exit(esp4_fini);
+MODULE_DESCRIPTION("IPv4 ESP transformation library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 93e9193df544..308ff34002ea 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1130,10 +1130,33 @@ ok:
return 0;
error:
+ if (sk_hashed(sk)) {
+ spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash);
+
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+
+ spin_lock(lock);
+ sk_nulls_del_node_init_rcu(sk);
+ spin_unlock(lock);
+
+ sk->sk_hash = 0;
+ inet_sk(sk)->inet_sport = 0;
+ inet_sk(sk)->inet_num = 0;
+
+ if (tw)
+ inet_twsk_bind_unhash(tw, hinfo);
+ }
+
spin_unlock(&head2->lock);
if (tb_created)
inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
+ spin_unlock(&head->lock);
+
+ if (tw)
+ inet_twsk_deschedule_put(tw);
+
+ local_bh_enable();
+
return -ENOMEM;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5169c3c72cff..6b9cf5a24c19 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1793,6 +1793,7 @@ static void __exit ipgre_fini(void)
module_init(ipgre_init);
module_exit(ipgre_fini);
+MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("gre");
MODULE_ALIAS_RTNL_LINK("gretap");
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 41537d18eecf..67d846622365 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -972,8 +972,8 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
+ bool paged, hold_tskey, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
- bool paged, extra_uref = false;
u32 tskey = 0;
skb = skb_peek_tail(queue);
@@ -982,10 +982,6 @@ static int __ip_append_data(struct sock *sk,
mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
paged = !!cork->gso_size;
- if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
-
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1052,6 +1048,11 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
+ hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+ if (hold_tskey)
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
/* So, what's going on in the loop below?
*
* We use calculated fragment length to generate chained skb,
@@ -1274,6 +1275,8 @@ error:
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+ if (hold_tskey)
+ atomic_dec(&sk->sk_tskey);
return err;
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index beeae624c412..1b6981de3f29 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -554,6 +554,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
+static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
+{
+ /* we must cap headroom to some upperlimit, else pskb_expand_head
+ * will overflow header offsets in skb_headers_offset_update().
+ */
+ static const unsigned int max_allowed = 512;
+
+ if (headroom > max_allowed)
+ headroom = max_allowed;
+
+ if (headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, headroom);
+}
+
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{
@@ -632,13 +646,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, headroom);
-
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
+
+ ip_tunnel_adj_headroom(dev, headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -818,16 +832,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, max_headroom);
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, max_headroom)) {
ip_rt_put(rt);
DEV_STATS_INC(dev, tx_dropped);
kfree_skb(skb);
return;
}
+ ip_tunnel_adj_headroom(dev, max_headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -1298,4 +1312,5 @@ void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
}
EXPORT_SYMBOL_GPL(ip_tunnel_setup);
+MODULE_DESCRIPTION("IPv4 tunnel implementation library");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 586b1b3e35b8..80ccd6661aa3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
};
skb_reset_network_header(skb);
- csum = csum_partial(icmp6h, len, 0);
+ csum = skb_checksum(skb, skb_transport_offset(skb), len, 0);
icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
IPPROTO_ICMPV6, csum);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 9ab9b3ebe0cd..d1d6bb28ed6e 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -721,6 +721,7 @@ static void __exit vti_fini(void)
module_init(vti_init);
module_exit(vti_fini);
+MODULE_DESCRIPTION("Virtual (secure) IP tunneling library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("vti");
MODULE_ALIAS_NETDEV("ip_vti0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 27b8f83c6ea2..03afa3871efc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -658,6 +658,7 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
+MODULE_DESCRIPTION("IP/IP protocol decoder library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("ipip");
MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7e2481b9eae1..c82dc42f57c6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4615,7 +4615,8 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32);
/* RX read-mostly hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 5048c47c79b2..4c1f836aae38 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -294,4 +294,5 @@ static void __exit tunnel4_fini(void)
module_init(tunnel4_init);
module_exit(tunnel4_fini);
+MODULE_DESCRIPTION("IPv4 XFRM tunnel library");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f631b0a21af4..e474b201900f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk)
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
{
- if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
- bool slow = lock_sock_fast(sk);
-
- sk_peek_offset_bwd(sk, len);
- unlock_sock_fast(sk, slow);
- }
+ sk_peek_offset_bwd(sk, len);
if (!skb_unref(skb))
return;
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index a87defb2b167..860aff5f8599 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -253,4 +253,5 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup);
+MODULE_DESCRIPTION("IPv4 Foo over UDP tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 8489fa106583..8cb266af1393 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -114,5 +114,6 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
+MODULE_DESCRIPTION("IPv4 XFRM tunnel driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 733ace18806c..055230b669cf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -708,6 +708,22 @@ errout:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet6_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
+
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -741,8 +757,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet6_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -5362,7 +5377,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
}
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
+ cb->seq = inet6_base_seq(tgt_net);
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &tgt_net->dev_index_head[h];
@@ -5494,9 +5509,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr)
- return -EINVAL;
-
+ if (!addr) {
+ err = -EINVAL;
+ goto errout;
+ }
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
dev = dev_get_by_index(tgt_net, ifm->ifa_index);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 2016e90e6e1d..eb474f0987ae 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -800,5 +800,6 @@ static void __exit ah6_fini(void)
module_init(ah6_init);
module_exit(ah6_fini);
+MODULE_DESCRIPTION("IPv6 AH transformation helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2cc1a45742d8..6e6efe026cdc 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1301,5 +1301,6 @@ static void __exit esp6_fini(void)
module_init(esp6_init);
module_exit(esp6_fini);
+MODULE_DESCRIPTION("IPv6 ESP transformation helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4952ae792450..02e9ffb63af1 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop,
case IPV6_TLV_IOAM:
if (!ipv6_hop_ioam(skb, off))
return false;
+
+ nh = skb_network_header(skb);
break;
case IPV6_TLV_JUMBO:
if (!ipv6_hop_jumbo(skb, off))
@@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
if (!skb_valid_dst(skb))
ip6_route_input(skb);
+ /* About to mangle packet header */
+ if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len))
+ goto drop;
+
+ /* Trace pointer may have changed */
+ trace = (struct ioam6_trace_hdr *)(skb_network_header(skb)
+ + optoff + sizeof(*hdr));
+
ioam6_fill_trace_data(skb, ns, trace, true);
break;
default:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a722a43dd668..31b86fe661aa 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1424,11 +1424,11 @@ static int __ip6_append_data(struct sock *sk,
bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = (struct rt6_info *)cork->dst;
+ bool paged, hold_tskey, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
- bool paged, extra_uref = false;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1440,10 +1440,6 @@ static int __ip6_append_data(struct sock *sk,
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
orig_mtu = mtu;
- if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
-
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
@@ -1538,6 +1534,11 @@ emsgsize:
flags &= ~MSG_SPLICE_PAGES;
}
+ hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+ if (hold_tskey)
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1794,6 +1795,8 @@ error:
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+ if (hold_tskey)
+ atomic_dec(&sk->sk_tskey);
return err;
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index a7bf0327b380..c99053189ea8 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -182,4 +182,5 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup);
+MODULE_DESCRIPTION("IPv6 Foo over UDP tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 83d2a8be263f..6a16a5bd0d91 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -405,6 +405,7 @@ static void __exit mip6_fini(void)
module_init(mip6_init);
module_exit(mip6_fini);
+MODULE_DESCRIPTION("IPv6 Mobility driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ea1dec8448fc..ef815ba583a8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5332,19 +5332,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
- fib6_info_release(nh->fib6_info);
-
- if (!err) {
- /* save reference to last route successfully inserted */
- rt_last = nh->fib6_info;
-
- /* save reference to first route for notification */
- if (!rt_notif)
- rt_notif = nh->fib6_info;
- }
- /* nh->fib6_info is used or freed at this point, reset to NULL*/
- nh->fib6_info = NULL;
if (err) {
if (replace && nhn)
NL_SET_ERR_MSG_MOD(extack,
@@ -5352,6 +5340,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = nh;
goto add_errout;
}
+ /* save reference to last route successfully inserted */
+ rt_last = nh->fib6_info;
+
+ /* save reference to first route for notification */
+ if (!rt_notif)
+ rt_notif = nh->fib6_info;
/* Because each route is added like a single route we remove
* these flags after the first nexthop: if there is a collision,
@@ -5412,8 +5406,7 @@ add_errout:
cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
- if (nh->fib6_info)
- fib6_info_release(nh->fib6_info);
+ fib6_info_release(nh->fib6_info);
list_del(&nh->next);
kfree(nh);
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 29346a6eec9f..35508abd76f4 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -512,22 +512,24 @@ int __init seg6_init(void)
{
int err;
- err = genl_register_family(&seg6_genl_family);
+ err = register_pernet_subsys(&ip6_segments_ops);
if (err)
goto out;
- err = register_pernet_subsys(&ip6_segments_ops);
+ err = genl_register_family(&seg6_genl_family);
if (err)
- goto out_unregister_genl;
+ goto out_unregister_pernet;
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
err = seg6_iptunnel_init();
if (err)
- goto out_unregister_pernet;
+ goto out_unregister_genl;
err = seg6_local_init();
- if (err)
- goto out_unregister_pernet;
+ if (err) {
+ seg6_iptunnel_exit();
+ goto out_unregister_genl;
+ }
#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
@@ -548,11 +550,11 @@ out_unregister_iptun:
#endif
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
-out_unregister_pernet:
- unregister_pernet_subsys(&ip6_segments_ops);
-#endif
out_unregister_genl:
genl_unregister_family(&seg6_genl_family);
+#endif
+out_unregister_pernet:
+ unregister_pernet_subsys(&ip6_segments_ops);
goto out;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cc24cefdb85c..5e9f625b76e3 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1956,6 +1956,7 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
+MODULE_DESCRIPTION("IPv6-in-IPv4 tunnel SIT driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 00e8d8b1c9a7..dc4ea9b11794 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -302,4 +302,5 @@ static void __exit tunnel6_fini(void)
module_init(tunnel6_init);
module_exit(tunnel6_fini);
+MODULE_DESCRIPTION("IP-in-IPv6 tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 1323f2f6928e..f6cb94f82cc3 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -401,5 +401,6 @@ static void __exit xfrm6_tunnel_fini(void)
module_init(xfrm6_tunnel_init);
module_exit(xfrm6_tunnel_fini);
+MODULE_DESCRIPTION("IPv6 XFRM tunnel driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 6334f64f04d5..b0b3e9c5af44 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID";
static LIST_HEAD(iucv_handler_list);
/*
- * iucv_path_table: an array of iucv_path structures.
+ * iucv_path_table: array of pointers to iucv_path structures.
*/
static struct iucv_path **iucv_path_table;
static unsigned long iucv_max_pathid;
@@ -544,7 +544,7 @@ static int iucv_enable(void)
cpus_read_lock();
rc = -ENOMEM;
- alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
+ alloc_size = iucv_max_pathid * sizeof(*iucv_path_table);
iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
if (!iucv_path_table)
goto out;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d68d01804dc7..f79fb99271ed 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3924,5 +3924,6 @@ out_unregister_key_proto:
module_init(ipsec_pfkey_init);
module_exit(ipsec_pfkey_exit);
+MODULE_DESCRIPTION("PF_KEY socket helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_KEY);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index dd3153966173..7bf14cf9ffaa 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
- ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
+ ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 489dd97f5172..327682995c92 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -987,7 +987,8 @@ static int
ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
struct cfg80211_unsol_bcast_probe_resp *params,
struct ieee80211_link_data *link,
- struct ieee80211_bss_conf *link_conf)
+ struct ieee80211_bss_conf *link_conf,
+ u64 *changed)
{
struct unsol_bcast_probe_resp_data *new, *old = NULL;
@@ -1011,7 +1012,8 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL);
}
- return BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+ *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+ return 0;
}
static int ieee80211_set_ftm_responder_params(
@@ -1450,10 +1452,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
goto error;
- changed |= err;
err = drv_start_ap(sdata->local, sdata, link_conf);
if (err) {
@@ -1525,10 +1526,9 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
return err;
- changed |= err;
if (beacon->he_bss_color_valid &&
beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
@@ -1869,6 +1869,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
sband->band);
}
+ ieee80211_sta_set_rx_nss(link_sta);
+
return ret;
}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index dce5606ed66d..68596ef78b15 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -997,8 +997,8 @@ static void add_link_files(struct ieee80211_link_data *link,
}
}
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
- bool mld_vif)
+static void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
+ bool mld_vif)
{
char buf[10+IFNAMSIZ];
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index b226b1aae88a..a02ec0a413f6 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -11,8 +11,6 @@
#include "ieee80211_i.h"
#ifdef CONFIG_MAC80211_DEBUGFS
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
- bool mld_vif);
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata,
@@ -24,9 +22,6 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link);
#else
-static inline void ieee80211_debugfs_add_netdev(
- struct ieee80211_sub_if_data *sdata, bool mld_vif)
-{}
static inline void ieee80211_debugfs_remove_netdev(
struct ieee80211_sub_if_data *sdata)
{}
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e4e7c0b38cb6..11c4caa4748e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1783,7 +1783,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
/* need to do this after the switch so vif.type is correct */
ieee80211_link_setup(&sdata->deflink);
- ieee80211_debugfs_add_netdev(sdata, false);
+ ieee80211_debugfs_recreate_netdev(sdata, false);
}
static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 073105deb424..2022a26eb881 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#include <linux/delay.h>
@@ -2918,6 +2918,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* other links will be destroyed */
sdata->deflink.u.mgd.bss = NULL;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
netif_carrier_off(sdata->dev);
@@ -5045,9 +5046,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
if (!link)
return 0;
- /* will change later if needed */
- link->smps_mode = IEEE80211_SMPS_OFF;
-
/*
* If this fails (possibly due to channel context sharing
* on incompatible channels, e.g. 80+80 and 160 sharing the
@@ -7096,6 +7094,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
link->u.mgd.p2p_noa_index = -1;
link->u.mgd.conn_flags = 0;
link->conf->bssid = link->u.mgd.bssid;
+ link->smps_mode = IEEE80211_SMPS_OFF;
wiphy_work_init(&link->u.mgd.request_smps_work,
ieee80211_request_smps_mgd_work);
@@ -7309,6 +7308,75 @@ out_err:
return err;
}
+static bool ieee80211_mgd_csa_present(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_bss_ies *ies,
+ u8 cur_channel, bool ignore_ecsa)
+{
+ const struct element *csa_elem, *ecsa_elem;
+ struct ieee80211_channel_sw_ie *csa = NULL;
+ struct ieee80211_ext_chansw_ie *ecsa = NULL;
+
+ if (!ies)
+ return false;
+
+ csa_elem = cfg80211_find_elem(WLAN_EID_CHANNEL_SWITCH,
+ ies->data, ies->len);
+ if (csa_elem && csa_elem->datalen == sizeof(*csa))
+ csa = (void *)csa_elem->data;
+
+ ecsa_elem = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ ies->data, ies->len);
+ if (ecsa_elem && ecsa_elem->datalen == sizeof(*ecsa))
+ ecsa = (void *)ecsa_elem->data;
+
+ if (csa && csa->count == 0)
+ csa = NULL;
+ if (csa && !csa->mode && csa->new_ch_num == cur_channel)
+ csa = NULL;
+
+ if (ecsa && ecsa->count == 0)
+ ecsa = NULL;
+ if (ecsa && !ecsa->mode && ecsa->new_ch_num == cur_channel)
+ ecsa = NULL;
+
+ if (ignore_ecsa && ecsa) {
+ sdata_info(sdata,
+ "Ignoring ECSA in probe response - was considered stuck!\n");
+ return csa;
+ }
+
+ return csa || ecsa;
+}
+
+static bool ieee80211_mgd_csa_in_process(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_bss *bss)
+{
+ u8 cur_channel;
+ bool ret;
+
+ cur_channel = ieee80211_frequency_to_channel(bss->channel->center_freq);
+
+ rcu_read_lock();
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->beacon_ies),
+ cur_channel, false)) {
+ ret = true;
+ goto out;
+ }
+
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->proberesp_ies),
+ cur_channel, bss->proberesp_ecsa_stuck)) {
+ ret = true;
+ goto out;
+ }
+
+ ret = false;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
/* config hooks */
int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct cfg80211_auth_request *req)
@@ -7317,7 +7385,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_auth_data *auth_data;
struct ieee80211_link_data *link;
- const struct element *csa_elem, *ecsa_elem;
u16 auth_alg;
int err;
bool cont_auth;
@@ -7360,21 +7427,10 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
if (ifmgd->assoc_data)
return -EBUSY;
- rcu_read_lock();
- csa_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_CHANNEL_SWITCH);
- ecsa_elem = ieee80211_bss_get_elem(req->bss,
- WLAN_EID_EXT_CHANSWITCH_ANN);
- if ((csa_elem &&
- csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
- ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
- (ecsa_elem &&
- ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
- ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
- rcu_read_unlock();
+ if (ieee80211_mgd_csa_in_process(sdata, req->bss)) {
sdata_info(sdata, "AP is in CSA process, reject auth\n");
return -EINVAL;
}
- rcu_read_unlock();
auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
req->ie_len, GFP_KERNEL);
@@ -7684,7 +7740,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_assoc_data *assoc_data;
- const struct element *ssid_elem, *csa_elem, *ecsa_elem;
+ const struct element *ssid_elem;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
ieee80211_conn_flags_t conn_flags = 0;
struct ieee80211_link_data *link;
@@ -7707,23 +7763,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss;
- rcu_read_lock();
- ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
- if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
- rcu_read_unlock();
+ if (ieee80211_mgd_csa_in_process(sdata, cbss)) {
+ sdata_info(sdata, "AP is in CSA process, reject assoc\n");
kfree(assoc_data);
return -EINVAL;
}
- csa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_CHANNEL_SWITCH);
- ecsa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_EXT_CHANSWITCH_ANN);
- if ((csa_elem &&
- csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
- ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
- (ecsa_elem &&
- ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
- ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
- sdata_info(sdata, "AP is in CSA process, reject assoc\n");
+ rcu_read_lock();
+ ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
+ if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
kfree(assoc_data);
return -EINVAL;
@@ -7998,8 +8046,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
beacon_ies = rcu_dereference(req->bss->beacon_ies);
-
- if (beacon_ies) {
+ if (!beacon_ies) {
/*
* Wait up to one beacon interval ...
* should this be more if we miss one?
@@ -8080,6 +8127,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index d5ea5f5bcf3a..9d33fd2377c8 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local,
rcu_read_unlock();
}
- drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
+ if (sta->uploaded)
+ drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
}
int ieee80211_rate_control_register(const struct rate_control_ops *ops)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 645355e5f1bc..f9d5842601fa 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/if_arp.h>
@@ -237,14 +237,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
}
static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
u32 scan_flags, const u8 *da)
{
if (!sdata)
return false;
- /* accept broadcast for OCE */
- if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
- is_broadcast_ether_addr(da))
+
+ /* accept broadcast on 6 GHz and for OCE */
+ if (is_broadcast_ether_addr(da) &&
+ (channel->band == NL80211_BAND_6GHZ ||
+ scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP))
return true;
+
if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
return true;
return ether_addr_equal(da, sdata->vif.addr);
@@ -293,6 +297,12 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
}
+ channel = ieee80211_get_channel_khz(local->hw.wiphy,
+ ieee80211_rx_status_to_khz(rx_status));
+
+ if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+ return;
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
@@ -310,19 +320,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
/* ignore ProbeResp to foreign address or non-bcast (OCE)
* unless scanning with randomised address
*/
- if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+ if (!ieee80211_scan_accept_presp(sdata1, channel,
+ scan_req_flags,
mgmt->da) &&
- !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+ !ieee80211_scan_accept_presp(sdata2, channel,
+ sched_scan_req_flags,
mgmt->da))
return;
}
- channel = ieee80211_get_channel_khz(local->hw.wiphy,
- ieee80211_rx_status_to_khz(rx_status));
-
- if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
- return;
-
bss = ieee80211_bss_info_update(local, rx_status,
mgmt, skb->len,
channel);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 68a48abc7287..6fbb15b65902 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -3100,10 +3100,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
/* DA SA BSSID */
build.da_offs = offsetof(struct ieee80211_hdr, addr1);
build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ rcu_read_lock();
link = rcu_dereference(sdata->link[tdls_link_id]);
- if (WARN_ON_ONCE(!link))
- break;
- memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ if (!WARN_ON_ONCE(!link))
+ memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ rcu_read_unlock();
build.hdr_len = 24;
break;
}
@@ -3926,6 +3927,7 @@ begin:
goto begin;
skb = __skb_dequeue(&tx.skbs);
+ info = IEEE80211_SKB_CB(skb);
if (!skb_queue_empty(&tx.skbs)) {
spin_lock_bh(&fq->lock);
@@ -3970,7 +3972,7 @@ begin:
}
encap_out:
- IEEE80211_SKB_CB(skb)->control.vif = vif;
+ info->control.vif = vif;
if (tx.sta &&
wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c
index a05c5b971789..3a8612309137 100644
--- a/net/mac80211/wbrf.c
+++ b/net/mac80211/wbrf.c
@@ -23,8 +23,6 @@ void ieee80211_check_wbrf_support(struct ieee80211_local *local)
return;
local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev);
- dev_dbg(dev, "WBRF is %s supported\n",
- local->wbrf_supported ? "" : "not");
}
static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end)
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 7a47a58aa54b..ceee44ea09d9 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
spin_unlock_irqrestore(&mns->keys_lock, flags);
if (!tagbits) {
- kfree(key);
+ mctp_key_unref(key);
return ERR_PTR(-EBUSY);
}
@@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
if (!dev) {
rcu_read_unlock();
- return rc;
+ goto out_free;
}
rt->dev = __mctp_dev_get(dev);
rcu_read_unlock();
@@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rt->mtu = 0;
} else {
- return -EINVAL;
+ rc = -EINVAL;
+ goto out_free;
}
spin_lock_irqsave(&rt->dev->addrs_lock, flags);
@@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rc = mctp_do_fragment_route(rt, skb, mtu, tag);
}
+ /* route output functions consume the skb, even on error */
+ skb = NULL;
+
out_release:
if (!ext_rt)
mctp_route_release(rt);
mctp_dev_put(tmp_rt.dev);
+out_free:
+ kfree_skb(skb);
return rc;
}
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index a536586742f2..7017dd60659d 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -13,17 +13,22 @@
#include <uapi/linux/mptcp.h>
#include "protocol.h"
-static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
+static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *sf;
struct nlattr *start;
u32 flags = 0;
+ bool slow;
int err;
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return 0;
+
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
+ slow = lock_sock_fast(sk);
rcu_read_lock();
sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
if (!sf) {
@@ -63,17 +68,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
sf->map_data_len) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
+ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
err = -EMSGSIZE;
goto nla_failure;
}
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_end(skb, start);
return 0;
nla_failure:
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_cancel(skb, start);
return err;
}
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index 74698582a285..ad28da655f8b 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_data_unlock(sk);
}
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
struct sock *sk = (struct sock *)msk;
struct sk_buff *skb;
- mptcp_data_lock(sk);
skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
@@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
}
pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
- mptcp_data_unlock(sk);
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index d2527d189a79..63fc0758c22d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
- subflow->fully_established = 1;
- WRITE_ONCE(msk->fully_established, true);
- goto check_notify;
+ goto set_fully_established;
}
/* If the first established packet does not contain MP_CAPABLE + data
@@ -983,10 +981,13 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
-set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
- mptcp_subflow_fully_established(subflow, mp_opt);
+
+set_fully_established:
+ mptcp_data_lock((struct sock *)msk);
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+ mptcp_data_unlock((struct sock *)msk);
check_notify:
/* if the subflow is not already linked into the conn_list, we can't
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 287a60381eae..58d17d9604e7 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
- const struct mptcp_addr_info *addr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- if (addrs[i].id == addr->id)
- return true;
- }
-
- return false;
-}
-
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
@@ -440,18 +427,34 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
msk->pm.subflows++;
addrs[i++] = remote;
} else {
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ /* Forbid creation of new subflows matching existing
+ * ones, possibly already created by incoming ADD_ADDR
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->local_id) == local->id)
+ __set_bit(subflow->remote_id, unavail_id);
+
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- addrs[i].id = subflow->remote_id;
+ addrs[i].id = READ_ONCE(subflow->remote_id);
if (deny_id0 && !addrs[i].id)
continue;
+ if (test_bit(addrs[i].id, unavail_id))
+ continue;
+
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
continue;
- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
- msk->pm.subflows < subflows_max) {
+ if (msk->pm.subflows < subflows_max) {
+ /* forbid creating multiple address towards
+ * this id
+ */
+ __set_bit(addrs[i].id, unavail_id);
msk->pm.subflows++;
i++;
}
@@ -799,18 +802,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ u8 remote_id = READ_ONCE(subflow->remote_id);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
- u8 id = subflow->local_id;
+ u8 id = subflow_get_local_id(subflow);
- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;
pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_id, subflow->local_id, subflow->remote_id,
- msk->mpc_endpoint_id);
+ i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
@@ -901,7 +904,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
}
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
@@ -949,7 +953,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
}
}
- if (!entry->addr.id) {
+ if (!entry->addr.id && needs_id) {
find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
@@ -960,7 +964,7 @@ find_next:
}
}
- if (!entry->addr.id)
+ if (!entry->addr.id && needs_id)
goto out;
__set_bit(entry->addr.id, pernet->id_bitmap);
@@ -1092,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
entry->ifindex = 0;
entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
if (ret < 0)
kfree(entry);
@@ -1285,6 +1289,18 @@ next:
return 0;
}
+static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
+ struct genl_info *info)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
+ mptcp_pm_address_nl_policy, info->extack) &&
+ tb[MPTCP_PM_ADDR_ATTR_ID])
+ return true;
+ return false;
+}
+
int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
@@ -1326,7 +1342,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
goto out_free;
}
}
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
+ !mptcp_pm_has_addr_attr_id(attr, info));
if (ret < 0) {
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
goto out_free;
@@ -1980,7 +1997,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
if (WARN_ON_ONCE(!sf))
return -EINVAL;
- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
return -EMSGSIZE;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index efecbe3cf415..bc97cc30f013 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
@@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
- if (addr_match && entry->addr.id == 0)
+ if (addr_match && entry->addr.id == 0 && needs_id)
entry->addr.id = e->addr.id;
id_match = (e->addr.id == entry->addr.id);
if (addr_match && id_match) {
@@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
}
*e = *entry;
- if (!e->addr.id)
+ if (!e->addr.id && needs_id)
e->addr.id = find_next_zero_bit(id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
1);
@@ -130,10 +131,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
- struct mptcp_pm_addr_entry new_entry;
+ struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
__be16 msk_sport = ((struct inet_sock *)
inet_sk((struct sock *)msk))->inet_sport;
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&e->addr, skc, false)) {
+ entry = e;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (entry)
+ return entry->addr.id;
+
memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
new_entry.addr = *skc;
new_entry.addr.id = 0;
@@ -142,7 +154,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
if (new_entry.addr.port == msk_sport)
new_entry.addr.port = 0;
- return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
@@ -187,7 +199,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
goto announce_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto announce_err;
@@ -222,7 +234,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->local_id == 0) {
+ if (READ_ONCE(subflow->local_id) == 0) {
has_id_0 = true;
break;
}
@@ -367,7 +379,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
}
local.addr = addr_l;
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
@@ -483,6 +495,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
goto destroy_err;
}
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
+ ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
+ addr_l.family = AF_INET6;
+ }
+ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
+ ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
+ addr_r.family = AF_INET6;
+ }
+#endif
if (addr_l.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match");
err = -EINVAL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 028e8b473626..7833a49f6214 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -85,7 +85,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
- subflow->local_id_valid = 1;
+ WRITE_ONCE(subflow->local_id, 0);
mptcp_sock_graft(msk->first, sk->sk_socket);
iput(SOCK_INODE(ssock));
@@ -1260,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
+ tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -1505,8 +1506,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk))
- mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
+ if (mptcp_send_head(sk)) {
+ mptcp_data_lock(sk);
+ mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
+ mptcp_data_unlock(sk);
+ }
}
static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
@@ -1960,6 +1964,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (copied <= 0)
return;
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, msk->first);
+
msk->rcvq_space.copied += copied;
mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
@@ -3142,7 +3149,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
- msk->push_pending = 0;
msk->recovery = false;
msk->can_ack = false;
msk->fully_established = false;
@@ -3158,6 +3164,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
+ msk->rcvspace_init = 0;
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
@@ -3171,8 +3178,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+
+static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
+{
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ struct ipv6_pinfo *newnp;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
#endif
+static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
+{
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+ const struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *newinet;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
struct sock *mptcp_sk_clone_init(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct sock *ssk,
@@ -3180,6 +3229,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
if (!nsk)
@@ -3192,6 +3242,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_init_sock(nsk);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (nsk->sk_family == AF_INET6)
+ mptcp_copy_ip6_options(nsk, sk);
+ else
+#endif
+ mptcp_copy_ip_options(nsk, sk);
+
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
@@ -3203,7 +3260,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
@@ -3220,7 +3277,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
/* The msk maintain a ref to each subflow in the connections list */
WRITE_ONCE(msk->first, ssk);
- list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+ subflow = mptcp_subflow_ctx(ssk);
+ list_add(&subflow->node, &msk->conn_list);
sock_hold(ssk);
/* new mpc subflow takes ownership of the newly
@@ -3235,6 +3293,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_propagate_sndbuf(nsk, ssk);
mptcp_rcv_space_init(msk, ssk);
+
+ if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
bh_unlock_sock(nsk);
/* note: the newly allocated socket refcount is 2 now */
@@ -3245,6 +3306,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
{
const struct tcp_sock *tp = tcp_sk(ssk);
+ msk->rcvspace_init = 1;
msk->rcvq_space.copied = 0;
msk->rcvq_space.rtt_us = 0;
@@ -3255,8 +3317,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
TCP_INIT_CWND * tp->advmss);
if (msk->rcvq_space.space == 0)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
-
- WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
@@ -3330,8 +3390,7 @@ static void mptcp_release_cb(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
for (;;) {
- unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
- msk->push_pending;
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
struct list_head join_list;
if (!flags)
@@ -3347,7 +3406,6 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
- msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
@@ -3475,13 +3533,8 @@ void mptcp_finish_connect(struct sock *ssk)
* accessing the field below
*/
WRITE_ONCE(msk->local_key, subflow->local_key);
- WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
- WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, ssk, 0);
-
- mptcp_rcv_space_init(msk, ssk);
}
void mptcp_sock_graft(struct sock *sk, struct socket *parent)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3517f2d24a22..07f6242afc1a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -286,7 +286,6 @@ struct mptcp_sock {
int rmem_released;
unsigned long flags;
unsigned long cb_flags;
- unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
@@ -305,7 +304,8 @@ struct mptcp_sock {
nodelay:1,
fastopening:1,
in_accept_queue:1,
- free_first:1;
+ free_first:1,
+ rcvspace_init:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -491,10 +491,9 @@ struct mptcp_subflow_context {
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 9;
+ __unused : 10;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -505,7 +504,7 @@ struct mptcp_subflow_context {
u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
u64 iasn; /* initial ack sequence number, MPC subflows only */
};
- u8 local_id;
+ s16 local_id; /* if negative not initialized yet */
u8 remote_id;
u8 reset_seen:1;
u8 reset_transient:1;
@@ -556,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
+ WRITE_ONCE(subflow->local_id, -1);
}
static inline u64
@@ -622,8 +622,9 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -789,6 +790,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
+static inline void mptcp_write_space(struct sock *sk)
+{
+ if (sk_stream_is_writeable(sk)) {
+ /* pairs with memory barrier in mptcp_poll */
+ smp_mb();
+ if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+ sk_stream_write_space(sk);
+ }
+}
+
static inline void __mptcp_sync_sndbuf(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -807,6 +818,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+ mptcp_write_space(sk);
}
/* The called held both the msk socket and the subflow socket locks,
@@ -837,16 +849,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
local_bh_enable();
}
-static inline void mptcp_write_space(struct sock *sk)
-{
- if (sk_stream_is_writeable(sk)) {
- /* pairs with memory barrier in mptcp_poll */
- smp_mb();
- if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
- sk_stream_write_space(sk);
- }
-}
-
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
#define MPTCP_TOKEN_MAX_RETRIES 4
@@ -952,8 +954,8 @@ void mptcp_event_pm_listener(const struct sock *ssk,
enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
@@ -1021,6 +1023,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
+{
+ int local_id = READ_ONCE(subflow->local_id);
+
+ if (local_id < 0)
+ return 0;
+ return local_id;
+}
+
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
@@ -1128,7 +1139,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
+ return (1 << sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 0dcb721c89d1..71ba86246ff8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
void __mptcp_sync_state(struct sock *sk, int state)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sock *ssk = msk->first;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ __mptcp_propagate_sndbuf(sk, ssk);
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, ssk);
- __mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
+ /* subflow->idsn is always available is TCP_SYN_SENT state,
+ * even for the FASTOPEN scenarios
+ */
+ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}
-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk)) {
- __mptcp_sync_state(sk, ssk->sk_state);
- } else {
- msk->pending_state = ssk->sk_state;
- __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
- }
- mptcp_data_unlock(sk);
-}
-
static void subflow_set_remote_key(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
@@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
}
+static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_data_lock(sk);
+ if (mp_opt) {
+ /* Options are available only in the non fallback cases
+ * avoid updating rx path fields otherwise
+ */
+ WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+ WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+ subflow_set_remote_key(msk, subflow, mp_opt);
+ }
+
+ if (!sock_owned_by_user(sk)) {
+ __mptcp_sync_state(sk, ssk->sk_state);
+ } else {
+ msk->pending_state = ssk->sk_state;
+ __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+ }
+ mptcp_data_unlock(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -499,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -514,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- subflow->remote_id = mp_opt.join_id;
+ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(msk, sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
return;
@@ -557,8 +577,8 @@ do_reset:
static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
{
- subflow->local_id = local_id;
- subflow->local_id_valid = 1;
+ WARN_ON_ONCE(local_id < 0 || local_id > 255);
+ WRITE_ONCE(subflow->local_id, local_id);
}
static int subflow_chk_local_id(struct sock *sk)
@@ -567,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int err;
- if (likely(subflow->local_id_valid))
+ if (likely(subflow->local_id >= 0))
return 0;
err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
- mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+ __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -834,7 +853,6 @@ create_child:
* mpc option
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
- mptcp_subflow_fully_established(ctx, &mp_opt);
mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
@@ -1549,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->remote_id = remote_id;
+ WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
@@ -1713,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
pr_debug("subflow=%p", ctx);
ctx->tcp_sock = sk;
+ WRITE_ONCE(ctx->local_id, -1);
return ctx;
}
@@ -1744,10 +1763,9 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
/* as recvmsg() does not acquire the subflow socket for ssk selection
@@ -1949,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->idsn = subflow_req->idsn;
/* this is the first subflow, id is always 0 */
- new_ctx->local_id_valid = 1;
+ subflow_set_local_id(new_ctx, 0);
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->remote_id = subflow_req->remote_id;
+ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bcaad9c009fe..3184cc6be4c9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
return ret;
cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -2378,6 +2379,7 @@ ip_set_net_exit(struct net *net)
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
+ set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 1136510521a8..cf3ce72c3de6 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -432,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference(hbucket(t, i));
+ n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -452,7 +452,7 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
kfree(l);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2e5f3864d353..5b876fa7f9af 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
.get_tuple_skb = nf_conntrack_get_tuple_skb,
.attach = nf_conntrack_attach,
.set_closing = nf_conntrack_set_closing,
+ .confirm = __nf_conntrack_confirm,
};
void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index e697a824b001..540d97715bd2 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
/* Get fields bitmap */
if (nf_h323_error_boundary(bs, 0, f->sz))
return H323_ERROR_BOUND;
+ if (f->sz > 32)
+ return H323_ERROR_RANGE;
bmp = get_bitmap(bs, f->sz);
if (base)
*(unsigned int *)base = bmp;
@@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
bmp2_len = get_bits(bs, 7) + 1;
if (nf_h323_error_boundary(bs, 0, bmp2_len))
return H323_ERROR_BOUND;
+ if (bmp2_len > 32)
+ return H323_ERROR_RANGE;
bmp2 = get_bitmap(bs, bmp2_len);
bmp |= bmp2 >> f->sz;
if (base)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0c22a02c2035..3b846cbdc050 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -876,6 +876,7 @@ struct ctnetlink_filter_u32 {
struct ctnetlink_filter {
u8 family;
+ bool zone_filter;
u_int32_t orig_flags;
u_int32_t reply_flags;
@@ -992,9 +993,12 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
if (err)
goto err_filter;
- err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
- if (err < 0)
- goto err_filter;
+ if (cda[CTA_ZONE]) {
+ err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+ if (err < 0)
+ goto err_filter;
+ filter->zone_filter = true;
+ }
if (!cda[CTA_FILTER])
return filter;
@@ -1148,7 +1152,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
if (filter->family && nf_ct_l3num(ct) != filter->family)
goto ignore_entry;
- if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID &&
+ if (filter->zone_filter &&
!nf_ct_zone_equal_any(ct, &filter->zone))
goto ignore_entry;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 920a5a29ae1d..a0571339239c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
return 0;
}
+static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct dst_entry *dst = route->tuple[dir].dst;
+
+ route->tuple[dir].dst = NULL;
+
+ return dst;
+}
+
static int flow_offload_fill_route(struct flow_offload *flow,
- const struct nf_flow_route *route,
+ struct nf_flow_route *route,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
- struct dst_entry *dst = route->tuple[dir].dst;
+ struct dst_entry *dst = nft_route_dst_fetch(route, dir);
int i, j = 0;
switch (flow_tuple->l3proto) {
@@ -122,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
ETH_ALEN);
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+ dst_release(dst);
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
@@ -146,7 +157,7 @@ static void nft_flow_dst_release(struct flow_offload *flow,
}
void flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route)
+ struct nf_flow_route *route)
{
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c3d7ecbc777c..016c816d91cb 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -551,8 +551,11 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
find_free_id:
if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
- else
+ else if ((range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL) ||
+ maniptype != NF_NAT_MANIP_DST)
off = get_random_u16();
+ else
+ off = 0;
attempts = range_size;
if (attempts > NF_NAT_MAX_ATTEMPTS)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fc016befb46f..1683dc196b59 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -684,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
}
-static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
- struct nft_flowtable *flowtable)
+static struct nft_trans *
+nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_flowtable *flowtable)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type,
sizeof(struct nft_trans_flowtable));
if (trans == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
@@ -701,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
- return 0;
+ return trans;
}
static int nft_delflowtable(struct nft_ctx *ctx,
struct nft_flowtable *flowtable)
{
- int err;
+ struct nft_trans *trans;
- err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
- if (err < 0)
- return err;
+ trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
nft_deactivate_next(ctx->net, flowtable);
nft_use_dec(&ctx->table->use);
- return err;
+ return 0;
}
static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg)
@@ -1251,6 +1252,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return 0;
err_register_hooks:
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
nft_trans_destroy(trans);
return ret;
}
@@ -2080,7 +2082,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
struct nft_hook *hook;
int err;
- hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
+ hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
if (!hook) {
err = -ENOMEM;
goto err_hook_alloc;
@@ -2503,19 +2505,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
RCU_INIT_POINTER(chain->blob_gen_0, blob);
RCU_INIT_POINTER(chain->blob_gen_1, blob);
- err = nf_tables_register_hook(net, table, chain);
- if (err < 0)
- goto err_destroy_chain;
-
if (!nft_use_inc(&table->use)) {
err = -EMFILE;
- goto err_use;
+ goto err_destroy_chain;
}
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto err_unregister_hook;
+ goto err_trans;
}
nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
@@ -2523,17 +2521,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
nft_trans_chain_policy(trans) = policy;
err = nft_chain_add(table, chain);
- if (err < 0) {
- nft_trans_destroy(trans);
- goto err_unregister_hook;
- }
+ if (err < 0)
+ goto err_chain_add;
+
+ /* This must be LAST to ensure no packets are walking over this chain. */
+ err = nf_tables_register_hook(net, table, chain);
+ if (err < 0)
+ goto err_register_hook;
return 0;
-err_unregister_hook:
+err_register_hook:
+ nft_chain_del(chain);
+err_chain_add:
+ nft_trans_destroy(trans);
+err_trans:
nft_use_dec_restore(&table->use);
-err_use:
- nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -4998,6 +5001,12 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
(NFT_SET_EVAL | NFT_SET_OBJECT))
return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) ==
+ (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) ==
+ (NFT_SET_CONSTANT | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
}
desc.dtype = 0;
@@ -5421,6 +5430,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
list_del_rcu(&set->list);
+ set->dead = 1;
if (event)
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
GFP_KERNEL);
@@ -8455,9 +8465,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
u8 family = info->nfmsg->nfgen_family;
const struct nf_flowtable_type *type;
struct nft_flowtable *flowtable;
- struct nft_hook *hook, *next;
struct net *net = info->net;
struct nft_table *table;
+ struct nft_trans *trans;
struct nft_ctx ctx;
int err;
@@ -8537,34 +8547,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable,
extack, true);
if (err < 0)
- goto err4;
+ goto err_flowtable_parse_hooks;
list_splice(&flowtable_hook.list, &flowtable->hook_list);
flowtable->data.priority = flowtable_hook.priority;
flowtable->hooknum = flowtable_hook.num;
+ trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto err_flowtable_trans;
+ }
+
+ /* This must be LAST to ensure no packets are walking over this flowtable. */
err = nft_register_flowtable_net_hooks(ctx.net, table,
&flowtable->hook_list,
flowtable);
- if (err < 0) {
- nft_hooks_destroy(&flowtable->hook_list);
- goto err4;
- }
-
- err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err5;
+ goto err_flowtable_hooks;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
return 0;
-err5:
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- nft_unregister_flowtable_hook(net, flowtable, hook);
- list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
- }
-err4:
+
+err_flowtable_hooks:
+ nft_trans_destroy(trans);
+err_flowtable_trans:
+ nft_hooks_destroy(&flowtable->hook_list);
+err_flowtable_parse_hooks:
flowtable->data.type->free(&flowtable->data);
err3:
module_put(type->owner);
@@ -9827,6 +9837,7 @@ dead_elem:
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
{
struct nft_set_elem_catchall *catchall, *next;
+ u64 tstamp = nft_net_tstamp(gc->net);
const struct nft_set *set = gc->set;
struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
@@ -9836,7 +9847,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_expired(ext))
+ if (!__nft_set_elem_expired(ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -10622,6 +10633,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
bool genid_ok;
mutex_lock(&nft_net->commit_mutex);
+ nft_net->tstamp = get_jiffies_64();
genid_ok = genid == 0 || nft_net->base_seq == genid;
if (!genid_ok)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 171d1f52d3dd..5cf38fc0a366 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -232,18 +232,25 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
+ unsigned int ct_verdict = verdict;
+
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook)
- verdict = ct_hook->update(entry->state.net, entry->skb);
+ ct_verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
- switch (verdict & NF_VERDICT_MASK) {
+ switch (ct_verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ /* follow userspace verdict, could be REPEAT */
+ break;
case NF_STOLEN:
nf_queue_entry_free(entry);
return;
+ default:
+ verdict = ct_verdict & NF_VERDICT_MASK;
+ break;
}
-
}
nf_reinject(entry, verdict);
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f0eeda97bfcd..d3d11dede545 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_TARGET_REV] = { .type = NLA_U32 },
+ [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
{
struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+ u32 l4proto;
u32 flags;
int err;
@@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return -EINVAL;
flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
- if (flags & ~NFT_RULE_COMPAT_F_MASK)
+ if (flags & NFT_RULE_COMPAT_F_UNUSED ||
+ flags & ~NFT_RULE_COMPAT_F_MASK)
return -EINVAL;
if (flags & NFT_RULE_COMPAT_F_INV)
*inv = true;
- *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ if (l4proto > U16_MAX)
+ return -EINVAL;
+
+ *proto = l4proto;
+
return 0;
}
@@ -352,10 +359,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -419,7 +436,7 @@ static void nft_match_eval(const struct nft_expr *expr,
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_MATCH_REV] = { .type = NLA_U32 },
+ [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@@ -603,10 +620,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -724,7 +751,7 @@ out_put:
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
[NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
.len = NFT_COMPAT_NAME_MAX-1 },
- [NFTA_COMPAT_REV] = { .type = NLA_U32 },
+ [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index aac98a3c966e..255640013ab8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
break;
#endif
case NFT_CT_ID:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+
len = sizeof(u32);
break;
default:
@@ -1253,14 +1256,13 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
switch (priv->l3num) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
- if (priv->l3num != ctx->family)
- return -EINVAL;
+ if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET)
+ break;
- fallthrough;
- case NFPROTO_INET:
- break;
+ return -EINVAL;
+ case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */
default:
- return -EOPNOTSUPP;
+ return -EAFNOSUPPORT;
}
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 397351fa4d5f..ab9576098701 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -361,6 +361,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
ret = flow_offload_add(flowtable, flow);
if (ret < 0)
goto err_flow_add;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6c2061bfdae6..6968a3b34236 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -36,6 +36,7 @@ struct nft_rhash_cmp_arg {
const struct nft_set *set;
const u32 *key;
u8 genmask;
+ u64 tstamp;
};
static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
@@ -62,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
return 1;
if (nft_set_elem_is_dead(&he->ext))
return 1;
- if (nft_set_elem_expired(&he->ext))
+ if (__nft_set_elem_expired(&he->ext, x->tstamp))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
@@ -87,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -106,6 +108,7 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -131,6 +134,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
.genmask = NFT_GENMASK_ANY,
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -175,6 +179,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
struct nft_rhash_elem *prev;
@@ -216,6 +221,7 @@ nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
rcu_read_lock();
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index efd523496be4..aa1d9e93a9a0 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -342,9 +342,6 @@
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
local_bh_disable();
- map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
goto out;
- res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
- fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+ scratch = *raw_cpu_ptr(m->scratch);
+
+ map_index = scratch->map_index;
+
+ res_map = scratch->map + (map_index ? m->bsize_max : 0);
+ fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
@@ -460,7 +460,7 @@ next_match:
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0) {
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return false;
@@ -477,7 +477,7 @@ next_match:
* current inactive bitmap is clean and can be reused as
* *next* bitmap (not initial) for the next packet.
*/
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return true;
@@ -504,6 +504,7 @@ out:
* @set: nftables API set representation
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@@ -513,7 +514,8 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
- const u8 *data, u8 genmask)
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
struct nft_pipapo *priv = nft_set_priv(set);
@@ -566,7 +568,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext))
+ if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@@ -603,10 +605,10 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
- static struct nft_pipapo_elem *e;
+ struct nft_pipapo_elem *e;
e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ nft_genmask_cur(net), get_jiffies_64());
if (IS_ERR(e))
return ERR_CAST(e);
@@ -1109,6 +1111,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m: Matching data
+ * @cpu: CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+ struct nft_pipapo_scratch *s;
+ void *mem;
+
+ s = *per_cpu_ptr(m->scratch, cpu);
+ if (!s)
+ return;
+
+ mem = s;
+ mem -= s->align_off;
+ kfree(mem);
+}
+
+/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
* @bsize_max: Maximum bucket size, scratch maps cover two buckets
@@ -1121,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
int i;
for_each_possible_cpu(i) {
- unsigned long *scratch;
+ struct nft_pipapo_scratch *scratch;
#ifdef NFT_PIPAPO_ALIGN
- unsigned long *scratch_aligned;
+ void *scratch_aligned;
+ u32 align_off;
#endif
-
- scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+ scratch = kzalloc_node(struct_size(scratch, map,
+ bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
@@ -1140,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return -ENOMEM;
}
- kfree(*per_cpu_ptr(clone->scratch, i));
-
- *per_cpu_ptr(clone->scratch, i) = scratch;
+ pipapo_free_scratch(clone, i);
#ifdef NFT_PIPAPO_ALIGN
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
- *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+ /* Align &scratch->map (not the struct itself): the extra
+ * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+ * above guarantee we can waste up to those bytes in order
+ * to align the map field regardless of its offset within
+ * the struct.
+ */
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+ scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+ scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+ align_off = scratch_aligned - (void *)scratch;
+
+ scratch = scratch_aligned;
+ scratch->align_off = align_off;
#endif
+ *per_cpu_ptr(clone->scratch, i) = scratch;
}
return 0;
@@ -1173,6 +1206,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
+ u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@@ -1182,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, start, genmask);
+ dup = pipapo_get(net, set, start, genmask, tstamp);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1204,7 +1238,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
+ dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
}
if (PTR_ERR(dup) != -ENOENT) {
@@ -1301,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
-#ifdef NFT_PIPAPO_ALIGN
- new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
- if (!new->scratch_aligned)
- goto out_scratch;
-#endif
for_each_possible_cpu(i)
*per_cpu_ptr(new->scratch, i) = NULL;
@@ -1357,10 +1386,7 @@ out_lt:
}
out_scratch_realloc:
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(new->scratch_aligned);
-#endif
+ pipapo_free_scratch(new, i);
out_scratch:
free_percpu(new->scratch);
kfree(new);
@@ -1560,6 +1586,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@@ -1594,7 +1621,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
- if (nft_set_elem_expired(&e->ext)) {
+ if (__nft_set_elem_expired(&e->ext, tstamp)) {
priv->dirty = true;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -1640,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
int i;
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(m->scratch, i));
+ pipapo_free_scratch(m, i);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
-
pipapo_free_fields(m);
kfree(m);
@@ -1769,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net));
+ e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
if (IS_ERR(e))
return NULL;
@@ -2132,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set,
m->field_count = field_count;
m->bsize_max = 0;
- m->scratch = alloc_percpu(unsigned long *);
+ m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
if (!m->scratch) {
err = -ENOMEM;
goto out_scratch;
@@ -2140,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set,
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
-#ifdef NFT_PIPAPO_ALIGN
- m->scratch_aligned = alloc_percpu(unsigned long *);
- if (!m->scratch_aligned) {
- err = -ENOMEM;
- goto out_free;
- }
- for_each_possible_cpu(i)
- *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
@@ -2180,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2236,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(m->scratch, cpu));
+ pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@@ -2253,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(priv->clone->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+ pipapo_free_scratch(priv->clone, cpu);
free_percpu(priv->clone->scratch);
pipapo_free_fields(priv->clone);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 1040223da5fa..3842c7341a9f 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -131,20 +131,28 @@ struct nft_pipapo_field {
};
/**
+ * struct nft_pipapo_scratch - percpu data used for lookup and matching
+ * @map_index: Current working bitmap index, toggled between field matches
+ * @align_off: Offset to get the originally allocated address
+ * @map: store partial matching results during lookup
+ */
+struct nft_pipapo_scratch {
+ u8 map_index;
+ u32 align_off;
+ unsigned long map[];
+};
+
+/**
* struct nft_pipapo_match - Data used for lookup and matching
- * @field_count Amount of fields in set
+ * @field_count: Amount of fields in set
* @scratch: Preallocated per-CPU maps for partial matching results
- * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
- * @rcu Matching data is swapped on commits
+ * @rcu: Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
int field_count;
-#ifdef NFT_PIPAPO_ALIGN
- unsigned long * __percpu *scratch_aligned;
-#endif
- unsigned long * __percpu *scratch;
+ struct nft_pipapo_scratch * __percpu *scratch;
size_t bsize_max;
struct rcu_head rcu;
struct nft_pipapo_field f[] __counted_by(field_count);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 52e0d026d30a..a3a8ddca9918 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -57,7 +57,7 @@
/* Jump to label if @reg is zero */
#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
- asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
+ asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
"je %l[" #label "]" : : : : label)
/* Store 256 bits from YMM register into memory. Contrary to bucket load
@@ -71,9 +71,6 @@
#define NFT_PIPAPO_AVX2_ZERO(reg) \
asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
-
/**
* nft_pipapo_avx2_prepare() - Prepare before main algorithm body
*
@@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
- unsigned long *res, *fill, *scratch;
+ struct nft_pipapo_scratch *scratch;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
+ unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
@@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
*/
kernel_fpu_begin_mask(0);
- scratch = *raw_cpu_ptr(m->scratch_aligned);
+ scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
return false;
}
- map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
- res = scratch + (map_index ? m->bsize_max : 0);
- fill = scratch + (map_index ? 0 : m->bsize_max);
+ map_index = scratch->map_index;
+
+ res = scratch->map + (map_index ? m->bsize_max : 0);
+ fill = scratch->map + (map_index ? 0 : m->bsize_max);
/* Starting map doesn't need to be set for this implementation */
@@ -1221,7 +1220,7 @@ next_match:
out:
if (i % 2)
- raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
+ scratch->map_index = !map_index;
kernel_fpu_end();
return ret >= 0;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index baa3fea4fe65..9944fe479e53 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -234,7 +234,7 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
static const struct nft_rbtree_elem *
nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe, u8 genmask)
+ struct nft_rbtree_elem *rbe)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -253,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev) &&
- nft_set_elem_active(&rbe_prev->ext, genmask))
+ nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
break;
prev = rb_prev(prev);
@@ -313,6 +313,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
/* Descend the tree to search for an existing element greater than the
@@ -360,11 +361,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports
* but skip new elements in this transaction.
*/
- if (nft_set_elem_expired(&rbe->ext) &&
+ if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
const struct nft_rbtree_elem *removed_end;
- removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);
@@ -551,6 +552,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
while (parent != NULL) {
@@ -571,7 +573,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
- } else if (nft_set_elem_expired(&rbe->ext)) {
+ } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
@@ -624,9 +626,10 @@ static void nft_rbtree_gc(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
struct rb_node *node, *next;
struct nft_trans_gc *gc;
- struct net *net;
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
@@ -648,7 +651,7 @@ static void nft_rbtree_gc(struct nft_set *set)
rbe_end = rbe;
continue;
}
- if (!nft_set_elem_expired(&rbe->ext))
+ if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9c962347cf85..ff315351269f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group)
static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
gfp_t gfp_mask)
{
- unsigned int len = skb_end_offset(skb);
+ unsigned int len = skb->len;
struct sk_buff *new;
new = alloc_skb(len, gfp_mask);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 0eed00184adf..104a80b75477 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -453,16 +453,16 @@ static int nr_create(struct net *net, struct socket *sock, int protocol,
nr_init_timers(sk);
nr->t1 =
- msecs_to_jiffies(sysctl_netrom_transport_timeout);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout));
nr->t2 =
- msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay));
nr->n2 =
- msecs_to_jiffies(sysctl_netrom_transport_maximum_tries);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries));
nr->t4 =
- msecs_to_jiffies(sysctl_netrom_transport_busy_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay));
nr->idle =
- msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout);
- nr->window = sysctl_netrom_transport_requested_window_size;
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout));
+ nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size);
nr->bpqext = 1;
nr->state = NR_STATE_0;
@@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
* G8PZT's Xrouter which is sending packets with command type 7
* as an extension of the protocol.
*/
- if (sysctl_netrom_reset_circuit &&
+ if (READ_ONCE(sysctl_netrom_reset_circuit) &&
(frametype != NR_RESET || flags != 0))
nr_transmit_reset(skb, 1);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 3aaac4a22b38..2c34389c3ce6 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev,
buff[6] |= AX25_SSSID_SPARE;
buff += AX25_ADDR_LEN;
- *buff++ = sysctl_netrom_network_ttl_initialiser;
+ *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
*buff++ = NR_PROTO_IP;
*buff++ = NR_PROTO_IP;
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 2f084b6f69d7..97944db6b5ac 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 44929657f5b7..5e531394a724 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (!nr_route_frame(skb, NULL)) {
kfree_skb(skb);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index baea3cbd76ca..70480869ad1c 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
nr_neigh->digipeat = NULL;
nr_neigh->ax25 = NULL;
nr_neigh->dev = dev;
- nr_neigh->quality = sysctl_netrom_default_path_quality;
+ nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality);
nr_neigh->locked = 0;
nr_neigh->count = 0;
nr_neigh->number = nr_neigh_no++;
@@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason)
nr_neigh->ax25 = NULL;
ax25_cb_put(ax25);
- if (++nr_neigh->failed < sysctl_netrom_link_fails_count) {
+ if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) {
nr_neigh_put(nr_neigh);
return;
}
@@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
if (ax25 != NULL) {
ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat,
ax25->ax25_dev->dev, 0,
- sysctl_netrom_obsolescence_count_initialiser);
+ READ_ONCE(sysctl_netrom_obsolescence_count_initialiser));
if (ret)
return ret;
}
@@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
return ret;
}
- if (!sysctl_netrom_routing_control && ax25 != NULL)
+ if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL)
return 0;
/* Its Time-To-Live has expired */
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index e2d2af924cff..c3bbd5880850 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype)
*dptr++ = nr->my_id;
*dptr++ = frametype;
*dptr++ = nr->window;
- if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ if (nr->bpqext)
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
break;
case NR_DISCREQ:
@@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (mine) {
*dptr++ = 0;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 88965e2068ac..ebc5728aab4e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
+#define OVS_COPY_ACTIONS_MAX_DEPTH 16
static bool actions_may_change_flow(const struct nlattr *actions)
{
@@ -2545,13 +2546,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log);
+ u32 mpls_label_count, bool log,
+ u32 depth);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -2602,7 +2605,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return err;
err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2617,7 +2621,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
int start, action_start, err, rem;
@@ -2660,7 +2665,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
return action_start;
err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
- vlan_tci, mpls_label_count, log);
+ vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2674,7 +2680,8 @@ static int validate_and_copy_clone(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
int start, err;
u32 exec;
@@ -2694,7 +2701,8 @@ static int validate_and_copy_clone(struct net *net,
return err;
err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3063,7 +3071,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
u32 mpls_label_count,
- bool log, bool last)
+ bool log, bool last, u32 depth)
{
const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
@@ -3111,7 +3119,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3124,7 +3133,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3152,12 +3162,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
+ if (depth > OVS_COPY_ACTIONS_MAX_DEPTH)
+ return -EOVERFLOW;
+
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -3355,7 +3369,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3426,7 +3440,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_clone(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3440,7 +3454,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
eth_type,
vlan_tci,
mpls_label_count,
- log, last);
+ log, last,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3450,7 +3465,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_DEC_TTL:
err = validate_and_copy_dec_ttl(net, a, key, sfa,
eth_type, vlan_tci,
- mpls_label_count, log);
+ mpls_label_count, log,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3495,7 +3511,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, mpls_label_count, log);
+ key->eth.vlan.tci, mpls_label_count, log,
+ 0);
if (err)
ovs_nla_free_flow_actions(*sfa);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 3aa50dc7535b..976fe250b509 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg)
switch (cmd) {
case SIOCINQ:
- lock_sock(sk);
+ spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
*karg = skb ? skb->len : 0;
- release_sock(sk);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
return 0;
case SIOCPNADDRESOURCE:
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index faba31f2eff2..3dd5f52bc1b5 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
return 0;
}
+static unsigned int pep_first_packet_length(struct sock *sk)
+{
+ struct pep_sock *pn = pep_sk(sk);
+ struct sk_buff_head *q;
+ struct sk_buff *skb;
+ unsigned int len = 0;
+ bool found = false;
+
+ if (sock_flag(sk, SOCK_URGINLINE)) {
+ q = &pn->ctrlreq_queue;
+ spin_lock_bh(&q->lock);
+ skb = skb_peek(q);
+ if (skb) {
+ len = skb->len;
+ found = true;
+ }
+ spin_unlock_bh(&q->lock);
+ }
+
+ if (likely(!found)) {
+ q = &sk->sk_receive_queue;
+ spin_lock_bh(&q->lock);
+ skb = skb_peek(q);
+ if (skb)
+ len = skb->len;
+ spin_unlock_bh(&q->lock);
+ }
+
+ return len;
+}
+
static int pep_ioctl(struct sock *sk, int cmd, int *karg)
{
struct pep_sock *pn = pep_sk(sk);
@@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg)
break;
}
- lock_sock(sk);
- if (sock_flag(sk, SOCK_URGINLINE) &&
- !skb_queue_empty(&pn->ctrlreq_queue))
- *karg = skb_peek(&pn->ctrlreq_queue)->len;
- else if (!skb_queue_empty(&sk->sk_receive_queue))
- *karg = skb_peek(&sk->sk_receive_queue)->len;
- else
- *karg = 0;
- release_sock(sk);
+ *karg = pep_first_packet_length(sk);
ret = 0;
break;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index fba82d36593a..a4e3c5de998b 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
kfree(sg);
}
ret = PTR_ERR(trans_private);
+ /* Trigger connection so that its ready for the next retry */
+ if (ret == -ENODEV)
+ rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c71b923764fd..5627f80013f8 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -425,6 +425,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
struct sock *sk = rds_rs_to_sk(rs);
int ret = 0;
unsigned long flags;
+ struct rds_incoming *to_drop = NULL;
write_lock_irqsave(&rs->rs_recv_lock, flags);
if (!list_empty(&inc->i_item)) {
@@ -435,11 +436,14 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
-be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
list_del_init(&inc->i_item);
- rds_inc_put(inc);
+ to_drop = inc;
}
}
write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+ if (to_drop)
+ rds_inc_put(to_drop);
+
rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop);
return ret;
}
@@ -758,16 +762,21 @@ void rds_clear_recv_queue(struct rds_sock *rs)
struct sock *sk = rds_rs_to_sk(rs);
struct rds_incoming *inc, *tmp;
unsigned long flags;
+ LIST_HEAD(to_drop);
write_lock_irqsave(&rs->rs_recv_lock, flags);
list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) {
rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
-be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
+ list_move(&inc->i_item, &to_drop);
+ }
+ write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+
+ list_for_each_entry_safe(inc, tmp, &to_drop, i_item) {
list_del_init(&inc->i_item);
rds_inc_put(inc);
}
- write_unlock_irqrestore(&rs->rs_recv_lock, flags);
}
/*
diff --git a/net/rds/send.c b/net/rds/send.c
index 5e57a1581dc6..2899def23865 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1313,12 +1313,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Parse any control messages the user may have included. */
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct);
- if (ret) {
- /* Trigger connection so that its ready for the next retry */
- if (ret == -EAGAIN)
- rds_conn_connect_if_down(conn);
+ if (ret)
goto out;
- }
if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index dbeb75c29857..7818aae1be8e 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -199,11 +199,19 @@ struct rxrpc_host_header {
*/
struct rxrpc_skb_priv {
struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
- u16 offset; /* Offset of data */
- u16 len; /* Length of data */
- u8 flags;
+ union {
+ struct {
+ u16 offset; /* Offset of data */
+ u16 len; /* Length of data */
+ u8 flags;
#define RXRPC_RX_VERIFIED 0x01
-
+ };
+ struct {
+ rxrpc_seq_t first_ack; /* First packet in acks table */
+ u8 nr_acks; /* Number of acks+nacks */
+ u8 nr_nacks; /* Number of nacks */
+ };
+ };
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
@@ -510,7 +518,7 @@ struct rxrpc_connection {
enum rxrpc_call_completion completion; /* Completion condition */
s32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
- atomic_t serial; /* packet serial number counter */
+ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 service_id; /* Service ID, possibly upgraded */
u32 security_level; /* Security level selected */
@@ -692,11 +700,11 @@ struct rxrpc_call {
u8 cong_dup_acks; /* Count of ACKs showing missing packets */
u8 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */
+ struct sk_buff *cong_last_nack; /* Last ACK with nacks received */
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
u16 ackr_sack_base; /* Starting slot in SACK table ring */
- rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
rxrpc_seq_t ackr_window; /* Base of SACK window */
rxrpc_seq_t ackr_wtop; /* Base of SACK window */
unsigned int ackr_nr_unacked; /* Number of unacked packets */
@@ -730,7 +738,8 @@ struct rxrpc_call {
struct rxrpc_ack_summary {
u16 nr_acks; /* Number of ACKs in packet */
u16 nr_new_acks; /* Number of new ACKs in packet */
- u16 nr_rot_new_acks; /* Number of rotated new ACKs */
+ u16 nr_new_nacks; /* Number of new nacks in packet */
+ u16 nr_retained_nacks; /* Number of nacks retained between ACKs */
u8 ack_reason;
bool saw_nacks; /* Saw NACKs in packet */
bool new_low_nack; /* T if new low NACK found */
@@ -823,6 +832,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
#include <trace/events/rxrpc.h>
/*
+ * Allocate the next serial number on a connection. 0 must be skipped.
+ */
+static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn)
+{
+ rxrpc_serial_t serial;
+
+ serial = conn->tx_serial;
+ if (serial == 0)
+ serial = 1;
+ conn->tx_serial = serial + 1;
+ return serial;
+}
+
+/*
* af_rxrpc.c
*/
extern atomic_t rxrpc_n_rx_skbs;
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index e363f21a2014..0f78544d043b 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -43,8 +43,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
unsigned long expiry = rxrpc_soft_ack_delay;
unsigned long now = jiffies, ack_at;
- call->ackr_serial = serial;
-
if (rxrpc_soft_ack_delay < expiry)
expiry = rxrpc_soft_ack_delay;
if (call->peer->srtt_us != 0)
@@ -114,6 +112,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
{
struct rxrpc_ackpacket *ack = NULL;
+ struct rxrpc_skb_priv *sp;
struct rxrpc_txbuf *txb;
unsigned long resend_at;
rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
@@ -141,14 +140,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
* explicitly NAK'd packets.
*/
if (ack_skb) {
+ sp = rxrpc_skb(ack_skb);
ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- for (i = 0; i < ack->nAcks; i++) {
+ for (i = 0; i < sp->nr_acks; i++) {
rxrpc_seq_t seq;
if (ack->acks[i] & 1)
continue;
- seq = ntohl(ack->firstPacket) + i;
+ seq = sp->first_ack + i;
if (after(txb->seq, transmitted))
break;
if (after(txb->seq, seq))
@@ -373,7 +373,6 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
unsigned long now, next, t;
- rxrpc_serial_t ackr_serial;
bool resend = false, expired = false;
s32 abort_code;
@@ -423,8 +422,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
- ackr_serial = xchg(&call->ackr_serial, 0);
- rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial,
+ rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0,
rxrpc_propose_ack_ping_for_lost_ack);
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 0943e54370ba..9fc9a6c3f685 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -686,6 +686,7 @@ static void rxrpc_destroy_call(struct work_struct *work)
del_timer_sync(&call->timer);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
rxrpc_cleanup_ring(call);
while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
struct rxrpc_txbuf, call_link))) {
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 95f4bc206b3d..1f251d758cb9 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -95,6 +95,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
_enter("%d", conn->debug_id);
+ if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &pkt.ack, sizeof(pkt.ack)) < 0)
+ return;
+ if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE)
+ return;
+ }
+
chan = &conn->channels[channel];
/* If the last call got moved on whilst we were waiting to run, just
@@ -117,7 +125,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[2].iov_base = &ack_info;
iov[2].iov_len = sizeof(ack_info);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 92495e73b869..9691de00ade7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
}
cumulative_acks += summary->nr_new_acks;
- cumulative_acks += summary->nr_rot_new_acks;
if (cumulative_acks > 255)
cumulative_acks = 255;
- summary->mode = call->cong_mode;
summary->cwnd = call->cong_cwnd;
summary->ssthresh = call->cong_ssthresh;
summary->cumulative_acks = cumulative_acks;
@@ -151,6 +149,7 @@ out_no_clear_ca:
cwnd = RXRPC_TX_MAX_WINDOW;
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
+ summary->mode = call->cong_mode;
trace_rxrpc_congest(call, summary, acked_serial, change);
if (resend)
rxrpc_resend(call, skb);
@@ -213,7 +212,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
if (before_eq(txb->seq, call->acks_hard_ack))
continue;
- summary->nr_rot_new_acks++;
if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
@@ -254,6 +252,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
{
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+ if (unlikely(call->cong_last_nack)) {
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ }
+
switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
@@ -703,6 +706,43 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
}
/*
+ * Determine how many nacks from the previous ACK have now been satisfied.
+ */
+static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ rxrpc_seq_t seq)
+{
+ struct sk_buff *skb = call->cong_last_nack;
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, new_acks = 0, retained_nacks = 0;
+ rxrpc_seq_t old_seq = sp->first_ack;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack);
+
+ if (after_eq(seq, old_seq + sp->nr_acks)) {
+ summary->nr_new_acks += sp->nr_nacks;
+ summary->nr_new_acks += seq - (old_seq + sp->nr_acks);
+ summary->nr_retained_nacks = 0;
+ } else if (seq == old_seq) {
+ summary->nr_retained_nacks = sp->nr_nacks;
+ } else {
+ for (i = 0; i < sp->nr_acks; i++) {
+ if (acks[i] == RXRPC_ACK_TYPE_NACK) {
+ if (before(old_seq + i, seq))
+ new_acks++;
+ else
+ retained_nacks++;
+ }
+ }
+
+ summary->nr_new_acks += new_acks;
+ summary->nr_retained_nacks = retained_nacks;
+ }
+
+ return old_seq + sp->nr_acks;
+}
+
+/*
* Process individual soft ACKs.
*
* Each ACK in the array corresponds to one packet and can be either an ACK or
@@ -711,25 +751,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
* the timer on the basis that the peer might just not have processed them at
* the time the ACK was sent.
*/
-static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
- rxrpc_seq_t seq, int nr_acks,
- struct rxrpc_ack_summary *summary)
+static void rxrpc_input_soft_acks(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct sk_buff *skb,
+ rxrpc_seq_t seq,
+ rxrpc_seq_t since)
{
- unsigned int i;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, old_nacks = 0;
+ rxrpc_seq_t lowest_nak = seq + sp->nr_acks;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- for (i = 0; i < nr_acks; i++) {
+ for (i = 0; i < sp->nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_ACK) {
summary->nr_acks++;
- summary->nr_new_acks++;
+ if (after_eq(seq, since))
+ summary->nr_new_acks++;
} else {
- if (!summary->saw_nacks &&
- call->acks_lowest_nak != seq + i) {
- call->acks_lowest_nak = seq + i;
- summary->new_low_nack = true;
- }
summary->saw_nacks = true;
+ if (before(seq, since)) {
+ /* Overlap with previous ACK */
+ old_nacks++;
+ } else {
+ summary->nr_new_nacks++;
+ sp->nr_nacks++;
+ }
+
+ if (before(seq, lowest_nak))
+ lowest_nak = seq;
}
+ seq++;
+ }
+
+ if (lowest_nak != call->acks_lowest_nak) {
+ call->acks_lowest_nak = lowest_nak;
+ summary->new_low_nack = true;
}
+
+ /* We *can* have more nacks than we did - the peer is permitted to drop
+ * packets it has soft-acked and re-request them. Further, it is
+ * possible for the nack distribution to change whilst the number of
+ * nacks stays the same or goes down.
+ */
+ if (old_nacks < summary->nr_retained_nacks)
+ summary->nr_new_acks += summary->nr_retained_nacks - old_nacks;
+ summary->nr_retained_nacks = old_nacks;
}
/*
@@ -773,7 +839,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_ackinfo info;
rxrpc_serial_t ack_serial, acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
int nr_acks, offset, ioffset;
_enter("");
@@ -789,6 +855,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
prev_pkt = ntohl(ack.previousPacket);
hard_ack = first_soft_ack - 1;
nr_acks = ack.nAcks;
+ sp->first_ack = first_soft_ack;
+ sp->nr_acks = nr_acks;
summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
ack.reason : RXRPC_ACK__INVALID);
@@ -858,6 +926,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0)
skb_condense(skb);
+ if (call->cong_last_nack) {
+ since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ } else {
+ summary.nr_new_acks = first_soft_ack - call->acks_first_seq;
+ call->acks_lowest_nak = first_soft_ack + nr_acks;
+ since = first_soft_ack;
+ }
+
call->acks_latest_ts = skb->tstamp;
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
@@ -866,7 +944,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_ACK_PING:
break;
default:
- if (after(acked_serial, call->acks_highest_serial))
+ if (acked_serial && after(acked_serial, call->acks_highest_serial))
call->acks_highest_serial = acked_serial;
break;
}
@@ -905,8 +983,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
- rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
- nr_acks, &summary);
+ rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since);
+ rxrpc_get_skb(skb, rxrpc_skb_get_last_nack);
+ call->cong_last_nack = skb;
}
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a0906145e829..4a292f860ae3 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -216,7 +216,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
len = iov[0].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
txb->wire.serial = htonl(serial);
trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(txb->ack.firstPacket),
@@ -302,7 +302,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
iov[0].iov_base = &pkt;
iov[0].iov_len = sizeof(pkt);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
@@ -334,7 +334,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
_enter("%x,{%d}", txb->seq, txb->len);
/* Each transmission of a Tx packet needs a new serial number */
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
txb->wire.serial = htonl(serial);
if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
@@ -558,7 +558,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 6c86cbb98d1d..26dc2f26d92d 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -181,7 +181,7 @@ print:
atomic_read(&conn->active),
state,
key_serial(conn->key),
- atomic_read(&conn->serial),
+ conn->tx_serial,
conn->hi_serial,
conn->channels[0].call_id,
conn->channels[1].call_id,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index b52dedcebce0..6b32d61d4cdc 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -664,7 +664,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
@@ -721,7 +721,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
rxrpc_local_dont_fragment(conn->local, false);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 12386f590b0f..6faa7d00da09 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -232,18 +232,14 @@ release_idr:
return err;
}
-static bool is_mirred_nested(void)
-{
- return unlikely(__this_cpu_read(mirred_nest_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
- else if (is_mirred_nested())
+ else if (!at_ingress)
err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -270,8 +266,7 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
- err = -ENODEV;
- goto out;
+ goto err_cant_do;
}
/* we could easily avoid the clone only if called by ingress and clsact;
@@ -283,10 +278,8 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
tcf_mirred_can_reinsert(retval);
if (!dont_clone) {
skb_to_send = skb_clone(skb, GFP_ATOMIC);
- if (!skb_to_send) {
- err = -ENOMEM;
- goto out;
- }
+ if (!skb_to_send)
+ goto err_cant_do;
}
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
@@ -319,19 +312,20 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
- err = tcf_mirred_forward(want_ingress, skb_to_send);
+ err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
} else {
- err = tcf_mirred_forward(want_ingress, skb_to_send);
+ err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
}
-
- if (err) {
-out:
+ if (err)
tcf_action_inc_overlimit_qstats(&m->common);
- if (is_redirect)
- retval = TC_ACT_SHOT;
- }
return retval;
+
+err_cant_do:
+ if (is_redirect)
+ retval = TC_ACT_SHOT;
+ tcf_action_inc_overlimit_qstats(&m->common);
+ return retval;
}
static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m,
@@ -533,8 +527,6 @@ static int mirred_device_event(struct notifier_block *unused,
* net_device are already rcu protected.
*/
RCU_INIT_POINTER(m->tcfm_dev, NULL);
- } else if (m->tcfm_blockid) {
- m->tcfm_blockid = 0;
}
spin_unlock_bh(&m->tcf_lock);
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index efb9d2811b73..6ee7064c82fc 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2460,8 +2460,11 @@ unbind_filter:
}
errout_idr:
- if (!fold)
+ if (!fold) {
+ spin_lock(&tp->lock);
idr_remove(&head->handle_idr, fnew->handle);
+ spin_unlock(&tp->lock);
+ }
__fl_put(fnew);
errout_tb:
kfree(tb);
diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c
index 5ea84decec19..5337bc462755 100644
--- a/net/sched/em_canid.c
+++ b/net/sched/em_canid.c
@@ -222,6 +222,7 @@ static void __exit exit_em_canid(void)
tcf_em_unregister(&em_canid_ops);
}
+MODULE_DESCRIPTION("ematch classifier to match CAN IDs embedded in skb CAN frames");
MODULE_LICENSE("GPL");
module_init(init_em_canid);
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index f17b049ea530..c90ad7ea26b4 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -87,6 +87,7 @@ static void __exit exit_em_cmp(void)
tcf_em_unregister(&em_cmp_ops);
}
+MODULE_DESCRIPTION("ematch classifier for basic data types(8/16/32 bit) against skb data");
MODULE_LICENSE("GPL");
module_init(init_em_cmp);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 09d8afd04a2a..8996c73c9779 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -1006,6 +1006,7 @@ static void __exit exit_em_meta(void)
tcf_em_unregister(&em_meta_ops);
}
+MODULE_DESCRIPTION("ematch classifier for various internal kernel metadata, skb metadata and sk metadata");
MODULE_LICENSE("GPL");
module_init(init_em_meta);
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index a83b237cbeb0..4f9f21a05d5e 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -68,6 +68,7 @@ static void __exit exit_em_nbyte(void)
tcf_em_unregister(&em_nbyte_ops);
}
+MODULE_DESCRIPTION("ematch classifier for arbitrary skb multi-bytes");
MODULE_LICENSE("GPL");
module_init(init_em_nbyte);
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index f176afb70559..420c66203b17 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -147,6 +147,7 @@ static void __exit exit_em_text(void)
tcf_em_unregister(&em_text_ops);
}
+MODULE_DESCRIPTION("ematch classifier for embedded text in skbs");
MODULE_LICENSE("GPL");
module_init(init_em_text);
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 71b070da0437..fdec4db5ec89 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -52,6 +52,7 @@ static void __exit exit_em_u32(void)
tcf_em_unregister(&em_u32_ops);
}
+MODULE_DESCRIPTION("ematch skb classifier using 32 bit chunks of data");
MODULE_LICENSE("GPL");
module_init(init_em_u32);
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 7182c5a450fb..5c1652181805 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -38,6 +38,14 @@ void sctp_inq_init(struct sctp_inq *queue)
INIT_WORK(&queue->immediate, NULL);
}
+/* Properly release the chunk which is being worked on. */
+static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk)
+{
+ if (chunk->head_skb)
+ chunk->skb = chunk->head_skb;
+ sctp_chunk_free(chunk);
+}
+
/* Release the memory associated with an SCTP inqueue. */
void sctp_inq_free(struct sctp_inq *queue)
{
@@ -53,7 +61,7 @@ void sctp_inq_free(struct sctp_inq *queue)
* free it as well.
*/
if (queue->in_progress) {
- sctp_chunk_free(queue->in_progress);
+ sctp_inq_chunk_free(queue->in_progress);
queue->in_progress = NULL;
}
}
@@ -130,9 +138,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
goto new_skb;
}
- if (chunk->head_skb)
- chunk->skb = chunk->head_skb;
- sctp_chunk_free(chunk);
+ sctp_inq_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
/* Nothing to do. Next chunk in the packet, please. */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a2cb30af46cb..0f53a5c6fd9d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -924,6 +924,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->clcsock->file->private_data = smc->clcsock;
smc->clcsock->wq.fasync_list =
smc->sk.sk_socket->wq.fasync_list;
+ smc->sk.sk_socket->wq.fasync_list = NULL;
/* There might be some wait entries remaining
* in smc sk->sk_wq and they should be woken up
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 5b045284849e..c9189a970eec 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -19,6 +19,35 @@
#include <linux/rtnetlink.h>
#include <net/switchdev.h>
+static bool switchdev_obj_eq(const struct switchdev_obj *a,
+ const struct switchdev_obj *b)
+{
+ const struct switchdev_obj_port_vlan *va, *vb;
+ const struct switchdev_obj_port_mdb *ma, *mb;
+
+ if (a->id != b->id || a->orig_dev != b->orig_dev)
+ return false;
+
+ switch (a->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ va = SWITCHDEV_OBJ_PORT_VLAN(a);
+ vb = SWITCHDEV_OBJ_PORT_VLAN(b);
+ return va->flags == vb->flags &&
+ va->vid == vb->vid &&
+ va->changed == vb->changed;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ ma = SWITCHDEV_OBJ_PORT_MDB(a);
+ mb = SWITCHDEV_OBJ_PORT_MDB(b);
+ return ma->vid == mb->vid &&
+ ether_addr_equal(ma->addr, mb->addr);
+ default:
+ break;
+ }
+
+ BUG();
+}
+
static LIST_HEAD(deferred);
static DEFINE_SPINLOCK(deferred_lock);
@@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
+/**
+ * switchdev_port_obj_act_is_deferred - Is object action pending?
+ *
+ * @dev: port device
+ * @nt: type of action; add or delete
+ * @obj: object to test
+ *
+ * Returns true if a deferred item is pending, which is
+ * equivalent to the action @nt on an object @obj.
+ *
+ * rtnl_lock must be held.
+ */
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj)
+{
+ struct switchdev_deferred_item *dfitem;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ spin_lock_bh(&deferred_lock);
+
+ list_for_each_entry(dfitem, &deferred, list) {
+ if (dfitem->dev != dev)
+ continue;
+
+ if ((dfitem->func == switchdev_port_obj_add_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_ADD) ||
+ (dfitem->func == switchdev_port_obj_del_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_DEL)) {
+ if (switchdev_obj_eq((const void *)dfitem->data, obj)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ spin_unlock_bh(&deferred_lock);
+
+ return found;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
+
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2cde375477e3..878415c43527 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1086,6 +1086,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
#ifdef CONFIG_TIPC_MEDIA_UDP
if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ rtnl_unlock();
+ NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
+ return -EINVAL;
+ }
+
err = tipc_udp_nl_bearer_add(b,
attrs[TIPC_NLA_BEARER_UDP_OPTS]);
if (err) {
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 1c2c6800949d..b4674f03d71a 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -1003,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx)
return 0;
}
-static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+static int tls_get_info(struct sock *sk, struct sk_buff *skb)
{
u16 version, cipher_type;
struct tls_context *ctx;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 31e8a94dfc11..211f57164cb6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -52,6 +52,7 @@ struct tls_decrypt_arg {
struct_group(inargs,
bool zc;
bool async;
+ bool async_done;
u8 tail;
);
@@ -63,6 +64,7 @@ struct tls_decrypt_ctx {
u8 iv[TLS_MAX_IV_SIZE];
u8 aad[TLS_MAX_AAD_SIZE];
u8 tail;
+ bool free_sgout;
struct scatterlist sg[];
};
@@ -187,7 +189,6 @@ static void tls_decrypt_done(void *data, int err)
struct aead_request *aead_req = data;
struct crypto_aead *aead = crypto_aead_reqtfm(aead_req);
struct scatterlist *sgout = aead_req->dst;
- struct scatterlist *sgin = aead_req->src;
struct tls_sw_context_rx *ctx;
struct tls_decrypt_ctx *dctx;
struct tls_context *tls_ctx;
@@ -196,6 +197,17 @@ static void tls_decrypt_done(void *data, int err)
struct sock *sk;
int aead_size;
+ /* If requests get too backlogged crypto API returns -EBUSY and calls
+ * ->complete(-EINPROGRESS) immediately followed by ->complete(0)
+ * to make waiting for backlog to flush with crypto_wait_req() easier.
+ * First wait converts -EBUSY -> -EINPROGRESS, and the second one
+ * -EINPROGRESS -> 0.
+ * We have a single struct crypto_async_request per direction, this
+ * scheme doesn't help us, so just ignore the first ->complete().
+ */
+ if (err == -EINPROGRESS)
+ return;
+
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead);
aead_size = ALIGN(aead_size, __alignof__(*dctx));
dctx = (void *)((u8 *)aead_req + aead_size);
@@ -213,7 +225,7 @@ static void tls_decrypt_done(void *data, int err)
}
/* Free the destination pages if skb was not decrypted inplace */
- if (sgout != sgin) {
+ if (dctx->free_sgout) {
/* Skip the first S/G entry as it points to AAD */
for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
if (!sg)
@@ -224,10 +236,17 @@ static void tls_decrypt_done(void *data, int err)
kfree(aead_req);
- spin_lock_bh(&ctx->decrypt_compl_lock);
- if (!atomic_dec_return(&ctx->decrypt_pending))
+ if (atomic_dec_and_test(&ctx->decrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
+}
+
+static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->decrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->decrypt_pending);
+
+ return ctx->async_wait.err;
}
static int tls_do_decryption(struct sock *sk,
@@ -253,20 +272,33 @@ static int tls_do_decryption(struct sock *sk,
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
tls_decrypt_done, aead_req);
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1);
atomic_inc(&ctx->decrypt_pending);
} else {
+ DECLARE_CRYPTO_WAIT(wait);
+
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &ctx->async_wait);
+ crypto_req_done, &wait);
+ ret = crypto_aead_decrypt(aead_req);
+ if (ret == -EINPROGRESS || ret == -EBUSY)
+ ret = crypto_wait_req(ret, &wait);
+ return ret;
}
ret = crypto_aead_decrypt(aead_req);
- if (ret == -EINPROGRESS) {
- if (darg->async)
- return 0;
+ if (ret == -EINPROGRESS)
+ return 0;
- ret = crypto_wait_req(ret, &ctx->async_wait);
+ if (ret == -EBUSY) {
+ ret = tls_decrypt_async_wait(ctx);
+ darg->async_done = true;
+ /* all completions have run, we're not doing async anymore */
+ darg->async = false;
+ return ret;
}
+
+ atomic_dec(&ctx->decrypt_pending);
darg->async = false;
return ret;
@@ -439,9 +471,10 @@ static void tls_encrypt_done(void *data, int err)
struct tls_rec *rec = data;
struct scatterlist *sge;
struct sk_msg *msg_en;
- bool ready = false;
struct sock *sk;
- int pending;
+
+ if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */
+ return;
msg_en = &rec->msg_encrypted;
@@ -476,23 +509,25 @@ static void tls_encrypt_done(void *data, int err)
/* If received record is at head of tx_list, schedule tx */
first_rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
- if (rec == first_rec)
- ready = true;
+ if (rec == first_rec) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED,
+ &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 1);
+ }
}
- spin_lock_bh(&ctx->encrypt_compl_lock);
- pending = atomic_dec_return(&ctx->encrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (atomic_dec_and_test(&ctx->encrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
+}
- if (!ready)
- return;
+static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->encrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->encrypt_pending);
- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
- schedule_delayed_work(&ctx->tx_work.work, 1);
+ return ctx->async_wait.err;
}
static int tls_do_encryption(struct sock *sk,
@@ -541,9 +576,14 @@ static int tls_do_encryption(struct sock *sk,
/* Add the record in tx_list */
list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1);
atomic_inc(&ctx->encrypt_pending);
rc = crypto_aead_encrypt(aead_req);
+ if (rc == -EBUSY) {
+ rc = tls_encrypt_async_wait(ctx);
+ rc = rc ?: -EINPROGRESS;
+ }
if (!rc || rc != -EINPROGRESS) {
atomic_dec(&ctx->encrypt_pending);
sge->offset -= prot->prepend_size;
@@ -984,7 +1024,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
int num_zc = 0;
int orig_size;
int ret = 0;
- int pending;
if (!eor && (msg->msg_flags & MSG_EOR))
return -EINVAL;
@@ -1163,24 +1202,12 @@ trim_sgl:
if (!num_async) {
goto send_end;
} else if (num_zc) {
- /* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no
- * pending encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
+ int err;
- if (ctx->async_wait.err) {
- ret = ctx->async_wait.err;
+ /* Wait for pending encryptions to get completed */
+ err = tls_encrypt_async_wait(ctx);
+ if (err) {
+ ret = err;
copied = 0;
}
}
@@ -1229,7 +1256,6 @@ void tls_sw_splice_eof(struct socket *sock)
ssize_t copied = 0;
bool retrying = false;
int ret = 0;
- int pending;
if (!ctx->open_rec)
return;
@@ -1264,22 +1290,7 @@ retry:
}
/* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no pending
- * encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
-
- if (ctx->async_wait.err)
+ if (tls_encrypt_async_wait(ctx))
goto unlock;
/* Transmit if any encryptions have completed */
@@ -1581,12 +1592,16 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
} else if (out_sg) {
memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
}
+ dctx->free_sgout = !!pages;
/* Prepare and submit AEAD request */
err = tls_do_decryption(sk, sgin, sgout, dctx->iv,
data_len + prot->tail_size, aead_req, darg);
- if (err)
+ if (err) {
+ if (darg->async_done)
+ goto exit_free_skb;
goto exit_free_pages;
+ }
darg->skb = clear_skb ?: tls_strp_msg(ctx);
clear_skb = NULL;
@@ -1598,6 +1613,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
return err;
}
+ if (unlikely(darg->async_done))
+ return 0;
+
if (prot->tail_size)
darg->tail = dctx->tail;
@@ -1769,7 +1787,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
u8 *control,
size_t skip,
size_t len,
- bool is_peek)
+ bool is_peek,
+ bool *more)
{
struct sk_buff *skb = skb_peek(&ctx->rx_list);
struct tls_msg *tlm;
@@ -1782,7 +1801,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
err = tls_record_content_type(msg, tlm, control);
if (err <= 0)
- goto out;
+ goto more;
if (skip < rxm->full_len)
break;
@@ -1800,12 +1819,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
err = tls_record_content_type(msg, tlm, control);
if (err <= 0)
- goto out;
+ goto more;
err = skb_copy_datagram_msg(skb, rxm->offset + skip,
msg, chunk);
if (err < 0)
- goto out;
+ goto more;
len = len - chunk;
copied = copied + chunk;
@@ -1841,6 +1860,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
out:
return copied ? : err;
+more:
+ if (more)
+ *more = true;
+ goto out;
}
static bool
@@ -1940,10 +1963,12 @@ int tls_sw_recvmsg(struct sock *sk,
struct strp_msg *rxm;
struct tls_msg *tlm;
ssize_t copied = 0;
+ ssize_t peeked = 0;
bool async = false;
int target, err;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
+ bool rx_more = false;
bool released = true;
bool bpf_strp_enabled;
bool zc_capable;
@@ -1963,12 +1988,12 @@ int tls_sw_recvmsg(struct sock *sk,
goto end;
/* Process pending decrypted records. It must be non-zero-copy */
- err = process_rx_list(ctx, msg, &control, 0, len, is_peek);
+ err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more);
if (err < 0)
goto end;
copied = err;
- if (len <= copied)
+ if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more)
goto end;
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -2061,6 +2086,8 @@ put_on_rx_list:
decrypted += chunk;
len -= chunk;
__skb_queue_tail(&ctx->rx_list, skb);
+ if (unlikely(control != TLS_RECORD_TYPE_DATA))
+ break;
continue;
}
@@ -2084,8 +2111,10 @@ put_on_rx_list:
if (err < 0)
goto put_on_rx_list_err;
- if (is_peek)
+ if (is_peek) {
+ peeked += chunk;
goto put_on_rx_list;
+ }
if (partially_consumed) {
rxm->offset += chunk;
@@ -2109,16 +2138,10 @@ put_on_rx_list:
recv_end:
if (async) {
- int ret, pending;
+ int ret;
/* Wait for all previously submitted records to be decrypted */
- spin_lock_bh(&ctx->decrypt_compl_lock);
- reinit_completion(&ctx->async_wait.completion);
- pending = atomic_read(&ctx->decrypt_pending);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
- ret = 0;
- if (pending)
- ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ ret = tls_decrypt_async_wait(ctx);
__skb_queue_purge(&ctx->async_hold);
if (ret) {
@@ -2130,12 +2153,11 @@ recv_end:
/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
- err = process_rx_list(ctx, msg, &control, copied,
- decrypted, is_peek);
+ err = process_rx_list(ctx, msg, &control, copied + peeked,
+ decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
- async_copy_bytes, is_peek);
- decrypted += max(err, 0);
+ async_copy_bytes, is_peek, NULL);
}
copied += decrypted;
@@ -2435,16 +2457,9 @@ void tls_sw_release_resources_tx(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec, *tmp;
- int pending;
/* Wait for any pending async encryptions to complete */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ tls_encrypt_async_wait(ctx);
tls_tx_records(sk, -1);
@@ -2607,7 +2622,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc
}
crypto_init_wait(&sw_ctx_tx->async_wait);
- spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
+ atomic_set(&sw_ctx_tx->encrypt_pending, 1);
INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
sw_ctx_tx->tx_work.sk = sk;
@@ -2628,7 +2643,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx)
}
crypto_init_wait(&sw_ctx_rx->async_wait);
- spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+ atomic_set(&sw_ctx_rx->decrypt_pending, 1);
init_waitqueue_head(&sw_ctx_rx->wq);
skb_queue_head_init(&sw_ctx_rx->rx_list);
skb_queue_head_init(&sw_ctx_rx->async_hold);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 30b178ebba60..0748e7ea5210 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
int);
-static int unix_set_peek_off(struct sock *sk, int val)
-{
- struct unix_sock *u = unix_sk(sk);
-
- if (mutex_lock_interruptible(&u->iolock))
- return -EINTR;
-
- WRITE_ONCE(sk->sk_peek_off, val);
- mutex_unlock(&u->iolock);
-
- return 0;
-}
-
#ifdef CONFIG_PROC_FS
static int unix_count_nr_fds(struct sock *sk)
{
@@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = {
.read_skb = unix_stream_read_skb,
.mmap = sock_no_mmap,
.splice_read = unix_stream_splice_read,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = {
.read_skb = unix_read_skb,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.sendmsg = unix_seqpacket_sendmsg,
.recvmsg = unix_seqpacket_recvmsg,
.mmap = sock_no_mmap,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2405f0f9af31..2a81880dac7b 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -284,9 +284,17 @@ void unix_gc(void)
* which are creating the cycle(s).
*/
skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link)
+ list_for_each_entry(u, &gc_candidates, link) {
scan_children(&u->sk, inc_inflight, &hitlist);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
+ }
+
/* not_cycle_list contains those sockets which do not make up a
* cycle. Restore these to the inflight list.
*/
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 409d74c57ca0..3fb1b637352a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1661,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
unsigned long delay)
{
if (!delay) {
+ del_timer(&dwork->timer);
wiphy_work_queue(wiphy, &dwork->work);
return;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b09700400d09..bd54a928bab4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4197,6 +4197,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
+ if (otype != NL80211_IFTYPE_MESH_POINT)
+ return -EINVAL;
if (netif_running(dev))
return -EBUSY;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 2249b1a89d1c..389a52c29bfc 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1731,6 +1731,61 @@ static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
}
}
+static void cfg80211_check_stuck_ecsa(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *known,
+ const struct cfg80211_bss_ies *old)
+{
+ const struct ieee80211_ext_chansw_ie *ecsa;
+ const struct element *elem_new, *elem_old;
+ const struct cfg80211_bss_ies *new, *bcn;
+
+ if (known->pub.proberesp_ecsa_stuck)
+ return;
+
+ new = rcu_dereference_protected(known->pub.proberesp_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (WARN_ON(!new))
+ return;
+
+ if (new->tsf - old->tsf < USEC_PER_SEC)
+ return;
+
+ elem_old = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ old->data, old->len);
+ if (!elem_old)
+ return;
+
+ elem_new = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ new->data, new->len);
+ if (!elem_new)
+ return;
+
+ bcn = rcu_dereference_protected(known->pub.beacon_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (bcn &&
+ cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ bcn->data, bcn->len))
+ return;
+
+ if (elem_new->datalen != elem_old->datalen)
+ return;
+ if (elem_new->datalen < sizeof(struct ieee80211_ext_chansw_ie))
+ return;
+ if (memcmp(elem_new->data, elem_old->data, elem_new->datalen))
+ return;
+
+ ecsa = (void *)elem_new->data;
+
+ if (!ecsa->mode)
+ return;
+
+ if (ecsa->new_ch_num !=
+ ieee80211_frequency_to_channel(known->pub.channel->center_freq))
+ return;
+
+ known->pub.proberesp_ecsa_stuck = 1;
+}
+
static bool
cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *known,
@@ -1750,8 +1805,10 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
/* Override possible earlier Beacon frame IEs */
rcu_assign_pointer(known->pub.ies,
new->pub.proberesp_ies);
- if (old)
+ if (old) {
+ cfg80211_check_stuck_ecsa(rdev, known, old);
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+ }
}
if (rcu_access_pointer(new->pub.beacon_ies)) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 1eadfac03cc4..b78c0e095e22 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
memcpy(vaddr, buffer, len);
kunmap_local(vaddr);
- skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
+ skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
+ refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
}
if (first_frag && desc->options & XDP_TX_METADATA) {
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 41533c631431..e6da7e8495c9 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -858,4 +858,5 @@ int xfrm_count_pfkey_enc_supported(void)
}
EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
+MODULE_DESCRIPTION("XFRM Algorithm interface");
MODULE_LICENSE("GPL");
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 3784534c9185..653e51ae3964 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -407,7 +407,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct net_device *dev = x->xso.dev;
- if (!x->type_offload || x->encap)
+ if (!x->type_offload)
return false;
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 662c83beb345..e5722c95b8bb 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -704,9 +704,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct xfrm_state *x = skb_dst(skb)->xfrm;
+ int family;
int err;
- switch (x->outer_mode.family) {
+ family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family
+ : skb_dst(skb)->ops->family;
+
+ switch (family) {
case AF_INET:
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1b7e75159727..da6ecc6b3e15 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2694,7 +2694,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
- family = xfrm[i]->props.family;
+ if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ family = xfrm[i]->props.family;
+
oif = fl->flowi_oif ? : fl->flowi_l3mdev;
dst = xfrm_dst_lookup(xfrm[i], tos, oif,
&saddr, &daddr, family, mark);
@@ -3416,7 +3418,7 @@ decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reve
}
fl4->flowi4_proto = flkeys->basic.ip_proto;
- fl4->flowi4_tos = flkeys->ip.tos;
+ fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK;
}
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ad01997c3aa9..912c1189ba41 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2017,6 +2017,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
if (xp->xfrm_nr == 0)
return 0;
+ if (xp->xfrm_nr > XFRM_MAX_DEPTH)
+ return -ENOBUFS;
+
for (i = 0; i < xp->xfrm_nr; i++) {
struct xfrm_user_tmpl *up = &vec[i];
struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
@@ -3888,5 +3891,6 @@ static void __exit xfrm_user_exit(void)
module_init(xfrm_user_init);
module_exit(xfrm_user_exit);
+MODULE_DESCRIPTION("XFRM User interface");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);