summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/hci_core.c7
-rw-r--r--net/bluetooth/hci_event.c13
-rw-r--r--net/bluetooth/hci_sync.c7
-rw-r--r--net/bluetooth/l2cap_core.c8
-rw-r--r--net/bluetooth/mgmt.c4
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c96
-rw-r--r--net/bridge/br_switchdev.c84
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c30
-rw-r--r--net/ceph/messenger_v2.c3
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/page_pool_user.c3
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/core/skmsg.c7
-rw-r--r--net/core/sock.c23
-rw-r--r--net/devlink/core.c12
-rw-r--r--net/devlink/port.c2
-rw-r--r--net/hsr/hsr_forward.c4
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c21
-rw-r--r--net/ipv4/inet_hashtables.c25
-rw-r--r--net/ipv4/ip_tunnel.c28
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv6/addrconf.c28
-rw-r--r--net/ipv6/exthdrs.c10
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/seg6.c20
-rw-r--r--net/iucv/iucv.c4
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/mac80211/rate.c3
-rw-r--r--net/mctp/route.c12
-rw-r--r--net/mptcp/diag.c11
-rw-r--r--net/mptcp/options.c2
-rw-r--r--net/mptcp/pm_netlink.c69
-rw-r--r--net/mptcp/pm_userspace.c25
-rw-r--r--net/mptcp/protocol.c54
-rw-r--r--net/mptcp/protocol.h36
-rw-r--r--net/mptcp/subflow.c15
-rw-r--r--net/netfilter/nf_conntrack_core.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c4
-rw-r--r--net/netfilter/nf_flow_table_core.c17
-rw-r--r--net/netfilter/nf_tables_api.c88
-rw-r--r--net/netfilter/nft_compat.c20
-rw-r--r--net/netfilter/nft_ct.c11
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netrom/af_netrom.c14
-rw-r--r--net/netrom/nr_dev.c2
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/netrom/nr_out.c2
-rw-r--r--net/netrom/nr_route.c8
-rw-r--r--net/netrom/nr_subr.c5
-rw-r--r--net/phonet/datagram.c4
-rw-r--r--net/phonet/pep.c41
-rw-r--r--net/rds/rdma.c3
-rw-r--r--net/rds/send.c6
-rw-r--r--net/sched/act_mirred.c36
-rw-r--r--net/sched/cls_flower.c5
-rw-r--r--net/switchdev/switchdev.c73
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c62
-rw-r--r--net/unix/af_unix.c19
-rw-r--r--net/unix/garbage.c22
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/xdp/xsk.c3
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_output.c6
-rw-r--r--net/xfrm/xfrm_policy.c6
-rw-r--r--net/xfrm/xfrm_user.c3
68 files changed, 852 insertions, 361 deletions
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 65601aa52e0d..2821a42cefdc 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
+ hci_dev_hold(hdev);
BT_DBG("%s", hdev->name);
if (hdev->hw_error)
@@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work)
else
bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
- if (hci_dev_do_close(hdev))
- return;
+ if (!hci_dev_do_close(hdev))
+ hci_dev_do_open(hdev);
- hci_dev_do_open(hdev);
+ hci_dev_put(hdev);
}
void hci_uuids_clear(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ef8c3bed7361..2a5f5a7d2412 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5329,9 +5329,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn || !hci_conn_ssp_enabled(conn))
+ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
goto unlock;
+ /* Assume remote supports SSP since it has triggered this event */
+ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+
hci_conn_hold(conn);
if (!hci_dev_test_flag(hdev, HCI_MGMT))
@@ -6794,6 +6797,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_UNKNOWN_CONN_ID);
+ if (max > hcon->le_conn_max_interval)
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+
if (hci_check_conn_params(min, max, latency, timeout))
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_INVALID_LL_PARAMS);
@@ -7420,10 +7427,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
* keep track of the bdaddr of the connection event that woke us up.
*/
if (event == HCI_EV_CONN_REQUEST) {
- bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_request->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_CONN_COMPLETE) {
- bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_LE_META) {
struct hci_ev_le_meta *le_ev = (void *)skb->data;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index a6fc8a2a5c67..5716345a26df 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -2206,8 +2206,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
/* During suspend, only wakeable devices can be in acceptlist */
if (hdev->suspended &&
- !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) {
+ hci_le_del_accept_list_sync(hdev, &params->addr,
+ params->addr_type);
return 0;
+ }
/* Select filter policy to accept all advertising */
if (*num_entries >= hdev->le_accept_list_size)
@@ -5559,7 +5562,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
bt_dev_dbg(hdev, "");
- if (hci_dev_test_flag(hdev, HCI_INQUIRY))
+ if (test_bit(HCI_INQUIRY, &hdev->flags))
return 0;
hci_dev_lock(hdev);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 60298975d5c4..656f49b299d2 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5613,7 +5613,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
memset(&rsp, 0, sizeof(rsp));
- err = hci_check_conn_params(min, max, latency, to_multiplier);
+ if (max > hcon->le_conn_max_interval) {
+ BT_DBG("requested connection interval exceeds current bounds.");
+ err = -EINVAL;
+ } else {
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
+ }
+
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index bb72ff6eb22f..ee3b4aad8bd8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1045,6 +1045,8 @@ static void rpa_expired(struct work_struct *work)
hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL);
}
+static int set_discoverable_sync(struct hci_dev *hdev, void *data);
+
static void discov_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -1063,7 +1065,7 @@ static void discov_off(struct work_struct *work)
hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
hdev->discov_timeout = 0;
- hci_update_discoverable(hdev);
+ hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL);
mgmt_new_settings(hdev);
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 053ef8f25fae..1d34d8497033 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1941,7 +1941,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
/* Get data directly from socket receive queue without copying it. */
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
- if (!skb_linearize(skb)) {
+ if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) {
s = rfcomm_recv_frame(s, skb);
if (!s)
break;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index ed1720890757..35e10c5a766d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -43,6 +43,10 @@
#include <linux/sysctl.h>
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
static unsigned int brnf_net_id __read_mostly;
struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_STOLEN;
}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
+ * the same nf_conn entry, which will happen for multicast (broadcast)
+ * Frames on bridges.
+ *
+ * Example:
+ * macvlan0
+ * br0
+ * ethX ethY
+ *
+ * ethX (or Y) receives multicast or broadcast packet containing
+ * an IP packet, not yet in conntrack table.
+ *
+ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+ * -> skb->_nfct now references a unconfirmed entry
+ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+ * interface.
+ * 3. skb gets passed up the stack.
+ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+ * and schedules a work queue to send them out on the lower devices.
+ *
+ * The clone skb->_nfct is not a copy, it is the same entry as the
+ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
+ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+ *
+ * The Macvlan broadcast worker and normal confirm path will race.
+ *
+ * This race will not happen if step 2 already confirmed a clone. In that
+ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
+ * hash table). This works fine.
+ *
+ * But such confirmation won't happen when eb/ip/nftables rules dropped the
+ * packets before they reached the nf_confirm step in postrouting.
+ *
+ * Work around this problem by explicit confirmation of the entry at
+ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
+ * entry.
+ *
+ */
+static unsigned int br_nf_local_in(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conntrack *nfct = skb_nfct(skb);
+ const struct nf_ct_hook *ct_hook;
+ struct nf_conn *ct;
+ int ret;
+
+ if (!nfct || skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ if (likely(nf_ct_is_confirmed(ct)))
+ return NF_ACCEPT;
+
+ WARN_ON_ONCE(skb_shared(skb));
+ WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+
+ /* We can't call nf_confirm here, it would create a dependency
+ * on nf_conntrack module.
+ */
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (!ct_hook) {
+ skb->_nfct = 0ul;
+ nf_conntrack_put(nfct);
+ return NF_ACCEPT;
+ }
+
+ nf_bridge_pull_encap_header(skb);
+ ret = ct_hook->confirm(skb);
+ switch (ret & NF_VERDICT_MASK) {
+ case NF_STOLEN:
+ return NF_STOLEN;
+ default:
+ nf_bridge_push_encap_header(skb);
+ break;
+ }
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
+
+ return ret;
+}
+#endif
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ {
+ .hook = br_nf_local_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_BR_PRI_LAST,
+ },
+#endif
{
.hook = br_nf_forward,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ee84e783e1df..7b41ee8740cb 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
}
static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+ struct net_device *dev,
+ unsigned long action,
enum switchdev_obj_id id,
const struct net_bridge_mdb_entry *mp,
struct net_device *orig_dev)
{
- struct switchdev_obj_port_mdb *mdb;
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = id,
+ .orig_dev = orig_dev,
+ },
+ };
+ struct switchdev_obj_port_mdb *pmdb;
- mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
+ br_switchdev_mdb_populate(&mdb, mp);
+
+ if (action == SWITCHDEV_PORT_OBJ_ADD &&
+ switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) {
+ /* This event is already in the deferred queue of
+ * events, so this replay must be elided, lest the
+ * driver receives duplicate events for it. This can
+ * only happen when replaying additions, since
+ * modifications are always immediately visible in
+ * br->mdb_list, whereas actual event delivery may be
+ * delayed.
+ */
+ return 0;
+ }
- mdb->obj.id = id;
- mdb->obj.orig_dev = orig_dev;
- br_switchdev_mdb_populate(mdb, mp);
- list_add_tail(&mdb->obj.list, mdb_list);
+ pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC);
+ if (!pmdb)
+ return -ENOMEM;
+ list_add_tail(&pmdb->obj.list, mdb_list);
return 0;
}
@@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
- /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
- * because the write-side protection is br->multicast_lock. But we
- * need to emulate the [ blocking ] calling context of a regular
- * switchdev event, so since both br->multicast_lock and RCU read side
- * critical sections are atomic, we have no choice but to pick the RCU
- * read side lock, queue up all our events, leave the critical section
- * and notify switchdev from blocking context.
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ /* br_switchdev_mdb_queue_one() will take care to not queue a
+ * replay of an event that is already pending in the switchdev
+ * deferred queue. In order to safely determine that, there
+ * must be no new deferred MDB notifications enqueued for the
+ * duration of the MDB scan. Therefore, grab the write-side
+ * lock to avoid racing with any concurrent IGMP/MLD snooping.
*/
- rcu_read_lock();
+ spin_lock_bh(&br->multicast_lock);
- hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+ hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group __rcu * const *pp;
const struct net_bridge_port_group *p;
if (mp->host_joined) {
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_HOST_MDB,
mp, br_dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
- for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->key.port->dev != dev)
continue;
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_PORT_MDB,
mp, dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
}
- rcu_read_unlock();
-
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
+ spin_unlock_bh(&br->multicast_lock);
list_for_each_entry(obj, &mdb_list, list) {
err = br_switchdev_mdb_replay_one(nb, dev,
@@ -786,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
+
+ /* Make sure that the device leaving this bridge has seen all
+ * relevant events before it is disassociated. In the normal
+ * case, when the device is directly attached to the bridge,
+ * this is covered by del_nbp(). If the association was indirect
+ * however, e.g. via a team or bond, and the device is leaving
+ * that intermediate device, then the bridge port remains in
+ * place.
+ */
+ switchdev_deferred_process();
}
/* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index abb090f94ed2..6f877e31709b 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
return nf_conntrack_in(skb, &bridge_state);
}
+static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ if (skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ /* nf_conntrack_confirm() cannot handle concurrent clones,
+ * this happens for broad/multicast frames with e.g. macvlan on top
+ * of the bridge device.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ return NF_ACCEPT;
+
+ /* let inet prerouting call conntrack again */
+ skb->_nfct = 0;
+ nf_ct_put(ct);
+
+ return NF_ACCEPT;
+}
+
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
struct nf_bridge_frag_data *data)
{
@@ -386,6 +410,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
.priority = NF_IP_PRI_CONNTRACK,
},
{
+ .hook = nf_ct_bridge_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+ {
.hook = nf_ct_bridge_post,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index a0ca5414b333..bd608ffa0627 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -2034,6 +2034,9 @@ static int prepare_sparse_read_data(struct ceph_connection *con)
if (!con_secure(con))
con->in_data_crc = -1;
+ ceph_msg_data_cursor_init(&con->v2.in_cursor, msg,
+ msg->sparse_read_total);
+
reset_in_kvecs(con);
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
con->v2.data_len_remain = data_len(msg);
diff --git a/net/core/dev.c b/net/core/dev.c
index 73a021973007..76e6438f4858 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9074,28 +9074,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
}
EXPORT_SYMBOL(netdev_port_same_parent_id);
-static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
-{
-#if IS_ENABLED(CONFIG_DPLL)
- rtnl_lock();
- dev->dpll_pin = dpll_pin;
- rtnl_unlock();
-#endif
-}
-
-void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
-{
- WARN_ON(!dpll_pin);
- netdev_dpll_pin_assign(dev, dpll_pin);
-}
-EXPORT_SYMBOL(netdev_dpll_pin_set);
-
-void netdev_dpll_pin_clear(struct net_device *dev)
-{
- netdev_dpll_pin_assign(dev, NULL);
-}
-EXPORT_SYMBOL(netdev_dpll_pin_clear);
-
/**
* dev_change_proto_down - set carrier according to proto_down.
*
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index ffe5244e5597..278294aca66a 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -94,11 +94,12 @@ netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
state->pp_id = pool->user.id;
err = fill(skb, pool, info);
if (err)
- break;
+ goto out;
}
state->pp_id = 0;
}
+out:
mutex_unlock(&page_pools_lock);
rtnl_unlock();
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9c4f427f3a50..bd50e9fe3234 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1057,7 +1057,7 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev)
{
size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */
- size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev));
+ size += dpll_netdev_pin_handle_size(dev);
return size;
}
@@ -1792,7 +1792,7 @@ static int rtnl_fill_dpll_pin(struct sk_buff *skb,
if (!dpll_pin_nest)
return -EMSGSIZE;
- ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev));
+ ret = dpll_netdev_add_pin_handle(skb, dev);
if (ret < 0)
goto nest_cancel;
@@ -5169,10 +5169,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
- struct nlattr *br_spec, *attr = NULL;
+ struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
int rem, err = -EOPNOTSUPP;
u16 flags = 0;
- bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
return -EINVAL;
@@ -5190,11 +5189,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
- have_flags = true;
+ br_flags_attr = attr;
flags = nla_get_u16(attr);
}
@@ -5238,8 +5237,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- if (have_flags)
- memcpy(nla_data(attr), &flags, sizeof(flags));
+ if (br_flags_attr)
+ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
out:
return err;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 93ecfceac1bc..4d75ef9d24bf 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
- if (psock)
- psock->saved_data_ready(sk);
+ if (psock) {
+ read_lock_bh(&sk->sk_callback_lock);
+ sk_psock_data_ready(sk, psock);
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
rcu_read_unlock();
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 0a7f46c37f0c..5e78798456fd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1188,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
*/
WRITE_ONCE(sk->sk_txrehash, (u8)val);
return 0;
+ case SO_PEEK_OFF:
+ {
+ int (*set_peek_off)(struct sock *sk, int val);
+
+ set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+ if (set_peek_off)
+ ret = set_peek_off(sk, val);
+ else
+ ret = -EOPNOTSUPP;
+ return ret;
+ }
}
sockopt_lock_sock(sk);
@@ -1430,18 +1441,6 @@ set_sndbuf:
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
break;
- case SO_PEEK_OFF:
- {
- int (*set_peek_off)(struct sock *sk, int val);
-
- set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
- if (set_peek_off)
- ret = set_peek_off(sk, val);
- else
- ret = -EOPNOTSUPP;
- break;
- }
-
case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 6a58342752b4..7f0b093208d7 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -529,14 +529,20 @@ static int __init devlink_init(void)
{
int err;
- err = genl_register_family(&devlink_nl_family);
- if (err)
- goto out;
err = register_pernet_subsys(&devlink_pernet_ops);
if (err)
goto out;
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out_unreg_pernet_subsys;
err = register_netdevice_notifier(&devlink_port_netdevice_nb);
+ if (!err)
+ return 0;
+
+ genl_unregister_family(&devlink_nl_family);
+out_unreg_pernet_subsys:
+ unregister_pernet_subsys(&devlink_pernet_ops);
out:
WARN_ON(err);
return err;
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 78592912f657..4b2d46ccfe48 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -583,7 +583,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
err = devlink_nl_port_fill(msg, devlink_port,
- DEVLINK_CMD_NEW,
+ DEVLINK_CMD_PORT_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags,
cb->extack);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 80cdc6f6b34c..5d68cb181695 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
return false;
/* Get next tlv */
- total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
+ total_length += hsr_sup_tag->tlv.HSR_TLV_length;
if (!pskb_may_pull(skb, total_length))
return false;
skb_pull(skb, total_length);
@@ -435,7 +435,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
continue;
/* Don't send frame over port where it has been sent before.
- * Also fro SAN, this shouldn't be done.
+ * Also for SAN, this shouldn't be done.
*/
if (!frame->is_from_san &&
hsr_register_frame_out(port, frame->node_src,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9456f5bb35e5..0d0d725b46ad 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
if (neigh) {
if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
r->arp_flags = arp_state_to_flags(neigh);
read_unlock_bh(&neigh->lock);
r->arp_ha.sa_family = dev->type;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ca0ff15dc8fa..bc74f131fe4d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1825,6 +1825,21 @@ done:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
@@ -1876,8 +1891,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
head = &tgt_net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
- tgt_net->dev_base_seq;
+ cb->seq = inet_base_seq(tgt_net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -2278,8 +2292,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 93e9193df544..308ff34002ea 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1130,10 +1130,33 @@ ok:
return 0;
error:
+ if (sk_hashed(sk)) {
+ spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash);
+
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+
+ spin_lock(lock);
+ sk_nulls_del_node_init_rcu(sk);
+ spin_unlock(lock);
+
+ sk->sk_hash = 0;
+ inet_sk(sk)->inet_sport = 0;
+ inet_sk(sk)->inet_num = 0;
+
+ if (tw)
+ inet_twsk_bind_unhash(tw, hinfo);
+ }
+
spin_unlock(&head2->lock);
if (tb_created)
inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
+ spin_unlock(&head->lock);
+
+ if (tw)
+ inet_twsk_deschedule_put(tw);
+
+ local_bh_enable();
+
return -ENOMEM;
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a4513ffb66cb..1b6981de3f29 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -554,6 +554,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
+static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
+{
+ /* we must cap headroom to some upperlimit, else pskb_expand_head
+ * will overflow header offsets in skb_headers_offset_update().
+ */
+ static const unsigned int max_allowed = 512;
+
+ if (headroom > max_allowed)
+ headroom = max_allowed;
+
+ if (headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, headroom);
+}
+
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{
@@ -632,13 +646,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, headroom);
-
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
+
+ ip_tunnel_adj_headroom(dev, headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -818,16 +832,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, max_headroom);
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, max_headroom)) {
ip_rt_put(rt);
DEV_STATS_INC(dev, tx_dropped);
kfree_skb(skb);
return;
}
+ ip_tunnel_adj_headroom(dev, max_headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f631b0a21af4..e474b201900f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk)
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
{
- if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
- bool slow = lock_sock_fast(sk);
-
- sk_peek_offset_bwd(sk, len);
- unlock_sock_fast(sk, slow);
- }
+ sk_peek_offset_bwd(sk, len);
if (!skb_unref(skb))
return;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 733ace18806c..055230b669cf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -708,6 +708,22 @@ errout:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet6_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
+
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -741,8 +757,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet6_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -5362,7 +5377,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
}
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
+ cb->seq = inet6_base_seq(tgt_net);
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &tgt_net->dev_index_head[h];
@@ -5494,9 +5509,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr)
- return -EINVAL;
-
+ if (!addr) {
+ err = -EINVAL;
+ goto errout;
+ }
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
dev = dev_get_by_index(tgt_net, ifm->ifa_index);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4952ae792450..02e9ffb63af1 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop,
case IPV6_TLV_IOAM:
if (!ipv6_hop_ioam(skb, off))
return false;
+
+ nh = skb_network_header(skb);
break;
case IPV6_TLV_JUMBO:
if (!ipv6_hop_jumbo(skb, off))
@@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
if (!skb_valid_dst(skb))
ip6_route_input(skb);
+ /* About to mangle packet header */
+ if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len))
+ goto drop;
+
+ /* Trace pointer may have changed */
+ trace = (struct ioam6_trace_hdr *)(skb_network_header(skb)
+ + optoff + sizeof(*hdr));
+
ioam6_fill_trace_data(skb, ns, trace, true);
break;
default:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ea1dec8448fc..ef815ba583a8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5332,19 +5332,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
- fib6_info_release(nh->fib6_info);
-
- if (!err) {
- /* save reference to last route successfully inserted */
- rt_last = nh->fib6_info;
-
- /* save reference to first route for notification */
- if (!rt_notif)
- rt_notif = nh->fib6_info;
- }
- /* nh->fib6_info is used or freed at this point, reset to NULL*/
- nh->fib6_info = NULL;
if (err) {
if (replace && nhn)
NL_SET_ERR_MSG_MOD(extack,
@@ -5352,6 +5340,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = nh;
goto add_errout;
}
+ /* save reference to last route successfully inserted */
+ rt_last = nh->fib6_info;
+
+ /* save reference to first route for notification */
+ if (!rt_notif)
+ rt_notif = nh->fib6_info;
/* Because each route is added like a single route we remove
* these flags after the first nexthop: if there is a collision,
@@ -5412,8 +5406,7 @@ add_errout:
cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
- if (nh->fib6_info)
- fib6_info_release(nh->fib6_info);
+ fib6_info_release(nh->fib6_info);
list_del(&nh->next);
kfree(nh);
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 29346a6eec9f..35508abd76f4 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -512,22 +512,24 @@ int __init seg6_init(void)
{
int err;
- err = genl_register_family(&seg6_genl_family);
+ err = register_pernet_subsys(&ip6_segments_ops);
if (err)
goto out;
- err = register_pernet_subsys(&ip6_segments_ops);
+ err = genl_register_family(&seg6_genl_family);
if (err)
- goto out_unregister_genl;
+ goto out_unregister_pernet;
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
err = seg6_iptunnel_init();
if (err)
- goto out_unregister_pernet;
+ goto out_unregister_genl;
err = seg6_local_init();
- if (err)
- goto out_unregister_pernet;
+ if (err) {
+ seg6_iptunnel_exit();
+ goto out_unregister_genl;
+ }
#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
@@ -548,11 +550,11 @@ out_unregister_iptun:
#endif
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
-out_unregister_pernet:
- unregister_pernet_subsys(&ip6_segments_ops);
-#endif
out_unregister_genl:
genl_unregister_family(&seg6_genl_family);
+#endif
+out_unregister_pernet:
+ unregister_pernet_subsys(&ip6_segments_ops);
goto out;
}
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 6334f64f04d5..b0b3e9c5af44 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID";
static LIST_HEAD(iucv_handler_list);
/*
- * iucv_path_table: an array of iucv_path structures.
+ * iucv_path_table: array of pointers to iucv_path structures.
*/
static struct iucv_path **iucv_path_table;
static unsigned long iucv_max_pathid;
@@ -544,7 +544,7 @@ static int iucv_enable(void)
cpus_read_lock();
rc = -ENOMEM;
- alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
+ alloc_size = iucv_max_pathid * sizeof(*iucv_path_table);
iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
if (!iucv_path_table)
goto out;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index dd3153966173..7bf14cf9ffaa 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
- ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
+ ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index d5ea5f5bcf3a..9d33fd2377c8 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local,
rcu_read_unlock();
}
- drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
+ if (sta->uploaded)
+ drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
}
int ieee80211_rate_control_register(const struct rate_control_ops *ops)
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 7a47a58aa54b..ceee44ea09d9 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
spin_unlock_irqrestore(&mns->keys_lock, flags);
if (!tagbits) {
- kfree(key);
+ mctp_key_unref(key);
return ERR_PTR(-EBUSY);
}
@@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
if (!dev) {
rcu_read_unlock();
- return rc;
+ goto out_free;
}
rt->dev = __mctp_dev_get(dev);
rcu_read_unlock();
@@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rt->mtu = 0;
} else {
- return -EINVAL;
+ rc = -EINVAL;
+ goto out_free;
}
spin_lock_irqsave(&rt->dev->addrs_lock, flags);
@@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rc = mctp_do_fragment_route(rt, skb, mtu, tag);
}
+ /* route output functions consume the skb, even on error */
+ skb = NULL;
+
out_release:
if (!ext_rt)
mctp_route_release(rt);
mctp_dev_put(tmp_rt.dev);
+out_free:
+ kfree_skb(skb);
return rc;
}
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index a536586742f2..7017dd60659d 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -13,17 +13,22 @@
#include <uapi/linux/mptcp.h>
#include "protocol.h"
-static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
+static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *sf;
struct nlattr *start;
u32 flags = 0;
+ bool slow;
int err;
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return 0;
+
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
+ slow = lock_sock_fast(sk);
rcu_read_lock();
sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
if (!sf) {
@@ -63,17 +68,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
sf->map_data_len) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
+ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
err = -EMSGSIZE;
goto nla_failure;
}
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_end(skb, start);
return 0;
nla_failure:
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_cancel(skb, start);
return err;
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e3e96a49f922..63fc0758c22d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -981,10 +981,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
-set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
+set_fully_established:
mptcp_data_lock((struct sock *)msk);
__mptcp_subflow_fully_established(msk, subflow, mp_opt);
mptcp_data_unlock((struct sock *)msk);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 287a60381eae..58d17d9604e7 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
- const struct mptcp_addr_info *addr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- if (addrs[i].id == addr->id)
- return true;
- }
-
- return false;
-}
-
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
@@ -440,18 +427,34 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
msk->pm.subflows++;
addrs[i++] = remote;
} else {
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ /* Forbid creation of new subflows matching existing
+ * ones, possibly already created by incoming ADD_ADDR
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->local_id) == local->id)
+ __set_bit(subflow->remote_id, unavail_id);
+
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- addrs[i].id = subflow->remote_id;
+ addrs[i].id = READ_ONCE(subflow->remote_id);
if (deny_id0 && !addrs[i].id)
continue;
+ if (test_bit(addrs[i].id, unavail_id))
+ continue;
+
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
continue;
- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
- msk->pm.subflows < subflows_max) {
+ if (msk->pm.subflows < subflows_max) {
+ /* forbid creating multiple address towards
+ * this id
+ */
+ __set_bit(addrs[i].id, unavail_id);
msk->pm.subflows++;
i++;
}
@@ -799,18 +802,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ u8 remote_id = READ_ONCE(subflow->remote_id);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
- u8 id = subflow->local_id;
+ u8 id = subflow_get_local_id(subflow);
- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;
pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_id, subflow->local_id, subflow->remote_id,
- msk->mpc_endpoint_id);
+ i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
@@ -901,7 +904,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
}
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
@@ -949,7 +953,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
}
}
- if (!entry->addr.id) {
+ if (!entry->addr.id && needs_id) {
find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
@@ -960,7 +964,7 @@ find_next:
}
}
- if (!entry->addr.id)
+ if (!entry->addr.id && needs_id)
goto out;
__set_bit(entry->addr.id, pernet->id_bitmap);
@@ -1092,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
entry->ifindex = 0;
entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
if (ret < 0)
kfree(entry);
@@ -1285,6 +1289,18 @@ next:
return 0;
}
+static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
+ struct genl_info *info)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
+ mptcp_pm_address_nl_policy, info->extack) &&
+ tb[MPTCP_PM_ADDR_ATTR_ID])
+ return true;
+ return false;
+}
+
int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
@@ -1326,7 +1342,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
goto out_free;
}
}
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
+ !mptcp_pm_has_addr_attr_id(attr, info));
if (ret < 0) {
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
goto out_free;
@@ -1980,7 +1997,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
if (WARN_ON_ONCE(!sf))
return -EINVAL;
- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
return -EMSGSIZE;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 4f3901d5b8ef..bc97cc30f013 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
@@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
- if (addr_match && entry->addr.id == 0)
+ if (addr_match && entry->addr.id == 0 && needs_id)
entry->addr.id = e->addr.id;
id_match = (e->addr.id == entry->addr.id);
if (addr_match && id_match) {
@@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
}
*e = *entry;
- if (!e->addr.id)
+ if (!e->addr.id && needs_id)
e->addr.id = find_next_zero_bit(id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
1);
@@ -153,7 +154,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
if (new_entry.addr.port == msk_sport)
new_entry.addr.port = 0;
- return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
@@ -198,7 +199,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
goto announce_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto announce_err;
@@ -233,7 +234,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->local_id == 0) {
+ if (READ_ONCE(subflow->local_id) == 0) {
has_id_0 = true;
break;
}
@@ -378,7 +379,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
}
local.addr = addr_l;
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
@@ -494,6 +495,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
goto destroy_err;
}
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
+ ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
+ addr_l.family = AF_INET6;
+ }
+ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
+ ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
+ addr_r.family = AF_INET6;
+ }
+#endif
if (addr_l.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match");
err = -EINVAL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8ef2927ebca2..7833a49f6214 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -85,7 +85,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
- subflow->local_id_valid = 1;
+ WRITE_ONCE(subflow->local_id, 0);
mptcp_sock_graft(msk->first, sk->sk_socket);
iput(SOCK_INODE(ssock));
@@ -1260,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
+ tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -3177,8 +3178,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+
+static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
+{
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ struct ipv6_pinfo *newnp;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
#endif
+static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
+{
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+ const struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *newinet;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
struct sock *mptcp_sk_clone_init(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct sock *ssk,
@@ -3199,6 +3242,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_init_sock(nsk);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (nsk->sk_family == AF_INET6)
+ mptcp_copy_ip6_options(nsk, sk);
+ else
+#endif
+ mptcp_copy_ip_options(nsk, sk);
+
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
@@ -3210,7 +3260,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ed50f2015dc3..07f6242afc1a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -491,10 +491,9 @@ struct mptcp_subflow_context {
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 9;
+ __unused : 10;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -505,7 +504,7 @@ struct mptcp_subflow_context {
u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
u64 iasn; /* initial ack sequence number, MPC subflows only */
};
- u8 local_id;
+ s16 local_id; /* if negative not initialized yet */
u8 remote_id;
u8 reset_seen:1;
u8 reset_transient:1;
@@ -556,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
+ WRITE_ONCE(subflow->local_id, -1);
}
static inline u64
@@ -790,6 +790,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
+static inline void mptcp_write_space(struct sock *sk)
+{
+ if (sk_stream_is_writeable(sk)) {
+ /* pairs with memory barrier in mptcp_poll */
+ smp_mb();
+ if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+ sk_stream_write_space(sk);
+ }
+}
+
static inline void __mptcp_sync_sndbuf(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -808,6 +818,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+ mptcp_write_space(sk);
}
/* The called held both the msk socket and the subflow socket locks,
@@ -838,16 +849,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
local_bh_enable();
}
-static inline void mptcp_write_space(struct sock *sk)
-{
- if (sk_stream_is_writeable(sk)) {
- /* pairs with memory barrier in mptcp_poll */
- smp_mb();
- if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
- sk_stream_write_space(sk);
- }
-}
-
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
#define MPTCP_TOKEN_MAX_RETRIES 4
@@ -1022,6 +1023,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
+{
+ int local_id = READ_ONCE(subflow->local_id);
+
+ if (local_id < 0)
+ return 0;
+ return local_id;
+}
+
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index c34ecadee120..71ba86246ff8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -535,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- subflow->remote_id = mp_opt.join_id;
+ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -577,8 +577,8 @@ do_reset:
static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
{
- subflow->local_id = local_id;
- subflow->local_id_valid = 1;
+ WARN_ON_ONCE(local_id < 0 || local_id > 255);
+ WRITE_ONCE(subflow->local_id, local_id);
}
static int subflow_chk_local_id(struct sock *sk)
@@ -587,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int err;
- if (likely(subflow->local_id_valid))
+ if (likely(subflow->local_id >= 0))
return 0;
err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -1567,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->remote_id = remote_id;
+ WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
@@ -1731,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
pr_debug("subflow=%p", ctx);
ctx->tcp_sock = sk;
+ WRITE_ONCE(ctx->local_id, -1);
return ctx;
}
@@ -1966,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->idsn = subflow_req->idsn;
/* this is the first subflow, id is always 0 */
- new_ctx->local_id_valid = 1;
+ subflow_set_local_id(new_ctx, 0);
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->remote_id = subflow_req->remote_id;
+ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2e5f3864d353..5b876fa7f9af 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
.get_tuple_skb = nf_conntrack_get_tuple_skb,
.attach = nf_conntrack_attach,
.set_closing = nf_conntrack_set_closing,
+ .confirm = __nf_conntrack_confirm,
};
void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index e697a824b001..540d97715bd2 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
/* Get fields bitmap */
if (nf_h323_error_boundary(bs, 0, f->sz))
return H323_ERROR_BOUND;
+ if (f->sz > 32)
+ return H323_ERROR_RANGE;
bmp = get_bitmap(bs, f->sz);
if (base)
*(unsigned int *)base = bmp;
@@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
bmp2_len = get_bits(bs, 7) + 1;
if (nf_h323_error_boundary(bs, 0, bmp2_len))
return H323_ERROR_BOUND;
+ if (bmp2_len > 32)
+ return H323_ERROR_RANGE;
bmp2 = get_bitmap(bs, bmp2_len);
bmp |= bmp2 >> f->sz;
if (base)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 920a5a29ae1d..a0571339239c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
return 0;
}
+static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct dst_entry *dst = route->tuple[dir].dst;
+
+ route->tuple[dir].dst = NULL;
+
+ return dst;
+}
+
static int flow_offload_fill_route(struct flow_offload *flow,
- const struct nf_flow_route *route,
+ struct nf_flow_route *route,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
- struct dst_entry *dst = route->tuple[dir].dst;
+ struct dst_entry *dst = nft_route_dst_fetch(route, dir);
int i, j = 0;
switch (flow_tuple->l3proto) {
@@ -122,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
ETH_ALEN);
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+ dst_release(dst);
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
@@ -146,7 +157,7 @@ static void nft_flow_dst_release(struct flow_offload *flow,
}
void flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route)
+ struct nf_flow_route *route)
{
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f8e3f70c35bd..1683dc196b59 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -684,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
}
-static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
- struct nft_flowtable *flowtable)
+static struct nft_trans *
+nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_flowtable *flowtable)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type,
sizeof(struct nft_trans_flowtable));
if (trans == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
@@ -701,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
- return 0;
+ return trans;
}
static int nft_delflowtable(struct nft_ctx *ctx,
struct nft_flowtable *flowtable)
{
- int err;
+ struct nft_trans *trans;
- err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
- if (err < 0)
- return err;
+ trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
nft_deactivate_next(ctx->net, flowtable);
nft_use_dec(&ctx->table->use);
- return err;
+ return 0;
}
static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg)
@@ -1251,6 +1252,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return 0;
err_register_hooks:
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
nft_trans_destroy(trans);
return ret;
}
@@ -2080,7 +2082,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
struct nft_hook *hook;
int err;
- hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
+ hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
if (!hook) {
err = -ENOMEM;
goto err_hook_alloc;
@@ -2503,19 +2505,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
RCU_INIT_POINTER(chain->blob_gen_0, blob);
RCU_INIT_POINTER(chain->blob_gen_1, blob);
- err = nf_tables_register_hook(net, table, chain);
- if (err < 0)
- goto err_destroy_chain;
-
if (!nft_use_inc(&table->use)) {
err = -EMFILE;
- goto err_use;
+ goto err_destroy_chain;
}
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto err_unregister_hook;
+ goto err_trans;
}
nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
@@ -2523,17 +2521,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
nft_trans_chain_policy(trans) = policy;
err = nft_chain_add(table, chain);
- if (err < 0) {
- nft_trans_destroy(trans);
- goto err_unregister_hook;
- }
+ if (err < 0)
+ goto err_chain_add;
+
+ /* This must be LAST to ensure no packets are walking over this chain. */
+ err = nf_tables_register_hook(net, table, chain);
+ if (err < 0)
+ goto err_register_hook;
return 0;
-err_unregister_hook:
+err_register_hook:
+ nft_chain_del(chain);
+err_chain_add:
+ nft_trans_destroy(trans);
+err_trans:
nft_use_dec_restore(&table->use);
-err_use:
- nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -4998,6 +5001,12 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
(NFT_SET_EVAL | NFT_SET_OBJECT))
return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) ==
+ (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) ==
+ (NFT_SET_CONSTANT | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
}
desc.dtype = 0;
@@ -5421,6 +5430,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
list_del_rcu(&set->list);
+ set->dead = 1;
if (event)
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
GFP_KERNEL);
@@ -8455,9 +8465,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
u8 family = info->nfmsg->nfgen_family;
const struct nf_flowtable_type *type;
struct nft_flowtable *flowtable;
- struct nft_hook *hook, *next;
struct net *net = info->net;
struct nft_table *table;
+ struct nft_trans *trans;
struct nft_ctx ctx;
int err;
@@ -8537,34 +8547,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable,
extack, true);
if (err < 0)
- goto err4;
+ goto err_flowtable_parse_hooks;
list_splice(&flowtable_hook.list, &flowtable->hook_list);
flowtable->data.priority = flowtable_hook.priority;
flowtable->hooknum = flowtable_hook.num;
+ trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto err_flowtable_trans;
+ }
+
+ /* This must be LAST to ensure no packets are walking over this flowtable. */
err = nft_register_flowtable_net_hooks(ctx.net, table,
&flowtable->hook_list,
flowtable);
- if (err < 0) {
- nft_hooks_destroy(&flowtable->hook_list);
- goto err4;
- }
-
- err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err5;
+ goto err_flowtable_hooks;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
return 0;
-err5:
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- nft_unregister_flowtable_hook(net, flowtable, hook);
- list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
- }
-err4:
+
+err_flowtable_hooks:
+ nft_trans_destroy(trans);
+err_flowtable_trans:
+ nft_hooks_destroy(&flowtable->hook_list);
+err_flowtable_parse_hooks:
flowtable->data.type->free(&flowtable->data);
err3:
module_put(type->owner);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 1f9474fefe84..d3d11dede545 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -359,10 +359,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -610,10 +620,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bfd3e5a14dab..255640013ab8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1256,14 +1256,13 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
switch (priv->l3num) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
- if (priv->l3num != ctx->family)
- return -EINVAL;
+ if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET)
+ break;
- fallthrough;
- case NFPROTO_INET:
- break;
+ return -EINVAL;
+ case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */
default:
- return -EOPNOTSUPP;
+ return -EAFNOSUPPORT;
}
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9c962347cf85..ff315351269f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group)
static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
gfp_t gfp_mask)
{
- unsigned int len = skb_end_offset(skb);
+ unsigned int len = skb->len;
struct sk_buff *new;
new = alloc_skb(len, gfp_mask);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 0eed00184adf..104a80b75477 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -453,16 +453,16 @@ static int nr_create(struct net *net, struct socket *sock, int protocol,
nr_init_timers(sk);
nr->t1 =
- msecs_to_jiffies(sysctl_netrom_transport_timeout);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout));
nr->t2 =
- msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay));
nr->n2 =
- msecs_to_jiffies(sysctl_netrom_transport_maximum_tries);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries));
nr->t4 =
- msecs_to_jiffies(sysctl_netrom_transport_busy_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay));
nr->idle =
- msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout);
- nr->window = sysctl_netrom_transport_requested_window_size;
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout));
+ nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size);
nr->bpqext = 1;
nr->state = NR_STATE_0;
@@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
* G8PZT's Xrouter which is sending packets with command type 7
* as an extension of the protocol.
*/
- if (sysctl_netrom_reset_circuit &&
+ if (READ_ONCE(sysctl_netrom_reset_circuit) &&
(frametype != NR_RESET || flags != 0))
nr_transmit_reset(skb, 1);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 3aaac4a22b38..2c34389c3ce6 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev,
buff[6] |= AX25_SSSID_SPARE;
buff += AX25_ADDR_LEN;
- *buff++ = sysctl_netrom_network_ttl_initialiser;
+ *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
*buff++ = NR_PROTO_IP;
*buff++ = NR_PROTO_IP;
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 2f084b6f69d7..97944db6b5ac 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 44929657f5b7..5e531394a724 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (!nr_route_frame(skb, NULL)) {
kfree_skb(skb);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index baea3cbd76ca..70480869ad1c 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
nr_neigh->digipeat = NULL;
nr_neigh->ax25 = NULL;
nr_neigh->dev = dev;
- nr_neigh->quality = sysctl_netrom_default_path_quality;
+ nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality);
nr_neigh->locked = 0;
nr_neigh->count = 0;
nr_neigh->number = nr_neigh_no++;
@@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason)
nr_neigh->ax25 = NULL;
ax25_cb_put(ax25);
- if (++nr_neigh->failed < sysctl_netrom_link_fails_count) {
+ if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) {
nr_neigh_put(nr_neigh);
return;
}
@@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
if (ax25 != NULL) {
ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat,
ax25->ax25_dev->dev, 0,
- sysctl_netrom_obsolescence_count_initialiser);
+ READ_ONCE(sysctl_netrom_obsolescence_count_initialiser));
if (ret)
return ret;
}
@@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
return ret;
}
- if (!sysctl_netrom_routing_control && ax25 != NULL)
+ if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL)
return 0;
/* Its Time-To-Live has expired */
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index e2d2af924cff..c3bbd5880850 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype)
*dptr++ = nr->my_id;
*dptr++ = frametype;
*dptr++ = nr->window;
- if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ if (nr->bpqext)
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
break;
case NR_DISCREQ:
@@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (mine) {
*dptr++ = 0;
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 3aa50dc7535b..976fe250b509 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg)
switch (cmd) {
case SIOCINQ:
- lock_sock(sk);
+ spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
*karg = skb ? skb->len : 0;
- release_sock(sk);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
return 0;
case SIOCPNADDRESOURCE:
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index faba31f2eff2..3dd5f52bc1b5 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
return 0;
}
+static unsigned int pep_first_packet_length(struct sock *sk)
+{
+ struct pep_sock *pn = pep_sk(sk);
+ struct sk_buff_head *q;
+ struct sk_buff *skb;
+ unsigned int len = 0;
+ bool found = false;
+
+ if (sock_flag(sk, SOCK_URGINLINE)) {
+ q = &pn->ctrlreq_queue;
+ spin_lock_bh(&q->lock);
+ skb = skb_peek(q);
+ if (skb) {
+ len = skb->len;
+ found = true;
+ }
+ spin_unlock_bh(&q->lock);
+ }
+
+ if (likely(!found)) {
+ q = &sk->sk_receive_queue;
+ spin_lock_bh(&q->lock);
+ skb = skb_peek(q);
+ if (skb)
+ len = skb->len;
+ spin_unlock_bh(&q->lock);
+ }
+
+ return len;
+}
+
static int pep_ioctl(struct sock *sk, int cmd, int *karg)
{
struct pep_sock *pn = pep_sk(sk);
@@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg)
break;
}
- lock_sock(sk);
- if (sock_flag(sk, SOCK_URGINLINE) &&
- !skb_queue_empty(&pn->ctrlreq_queue))
- *karg = skb_peek(&pn->ctrlreq_queue)->len;
- else if (!skb_queue_empty(&sk->sk_receive_queue))
- *karg = skb_peek(&sk->sk_receive_queue)->len;
- else
- *karg = 0;
- release_sock(sk);
+ *karg = pep_first_packet_length(sk);
ret = 0;
break;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index fba82d36593a..a4e3c5de998b 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
kfree(sg);
}
ret = PTR_ERR(trans_private);
+ /* Trigger connection so that its ready for the next retry */
+ if (ret == -ENODEV)
+ rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
diff --git a/net/rds/send.c b/net/rds/send.c
index 5e57a1581dc6..2899def23865 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1313,12 +1313,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Parse any control messages the user may have included. */
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct);
- if (ret) {
- /* Trigger connection so that its ready for the next retry */
- if (ret == -EAGAIN)
- rds_conn_connect_if_down(conn);
+ if (ret)
goto out;
- }
if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0a1a9e40f237..6faa7d00da09 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -232,18 +232,14 @@ release_idr:
return err;
}
-static bool is_mirred_nested(void)
-{
- return unlikely(__this_cpu_read(mirred_nest_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
- else if (is_mirred_nested())
+ else if (!at_ingress)
err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -270,8 +266,7 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
- err = -ENODEV;
- goto out;
+ goto err_cant_do;
}
/* we could easily avoid the clone only if called by ingress and clsact;
@@ -283,10 +278,8 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
tcf_mirred_can_reinsert(retval);
if (!dont_clone) {
skb_to_send = skb_clone(skb, GFP_ATOMIC);
- if (!skb_to_send) {
- err = -ENOMEM;
- goto out;
- }
+ if (!skb_to_send)
+ goto err_cant_do;
}
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
@@ -319,19 +312,20 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
- err = tcf_mirred_forward(want_ingress, skb_to_send);
+ err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
} else {
- err = tcf_mirred_forward(want_ingress, skb_to_send);
+ err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
}
-
- if (err) {
-out:
+ if (err)
tcf_action_inc_overlimit_qstats(&m->common);
- if (is_redirect)
- retval = TC_ACT_SHOT;
- }
return retval;
+
+err_cant_do:
+ if (is_redirect)
+ retval = TC_ACT_SHOT;
+ tcf_action_inc_overlimit_qstats(&m->common);
+ return retval;
}
static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m,
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index efb9d2811b73..6ee7064c82fc 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2460,8 +2460,11 @@ unbind_filter:
}
errout_idr:
- if (!fold)
+ if (!fold) {
+ spin_lock(&tp->lock);
idr_remove(&head->handle_idr, fnew->handle);
+ spin_unlock(&tp->lock);
+ }
__fl_put(fnew);
errout_tb:
kfree(tb);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 5b045284849e..c9189a970eec 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -19,6 +19,35 @@
#include <linux/rtnetlink.h>
#include <net/switchdev.h>
+static bool switchdev_obj_eq(const struct switchdev_obj *a,
+ const struct switchdev_obj *b)
+{
+ const struct switchdev_obj_port_vlan *va, *vb;
+ const struct switchdev_obj_port_mdb *ma, *mb;
+
+ if (a->id != b->id || a->orig_dev != b->orig_dev)
+ return false;
+
+ switch (a->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ va = SWITCHDEV_OBJ_PORT_VLAN(a);
+ vb = SWITCHDEV_OBJ_PORT_VLAN(b);
+ return va->flags == vb->flags &&
+ va->vid == vb->vid &&
+ va->changed == vb->changed;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ ma = SWITCHDEV_OBJ_PORT_MDB(a);
+ mb = SWITCHDEV_OBJ_PORT_MDB(b);
+ return ma->vid == mb->vid &&
+ ether_addr_equal(ma->addr, mb->addr);
+ default:
+ break;
+ }
+
+ BUG();
+}
+
static LIST_HEAD(deferred);
static DEFINE_SPINLOCK(deferred_lock);
@@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
+/**
+ * switchdev_port_obj_act_is_deferred - Is object action pending?
+ *
+ * @dev: port device
+ * @nt: type of action; add or delete
+ * @obj: object to test
+ *
+ * Returns true if a deferred item is pending, which is
+ * equivalent to the action @nt on an object @obj.
+ *
+ * rtnl_lock must be held.
+ */
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj)
+{
+ struct switchdev_deferred_item *dfitem;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ spin_lock_bh(&deferred_lock);
+
+ list_for_each_entry(dfitem, &deferred, list) {
+ if (dfitem->dev != dev)
+ continue;
+
+ if ((dfitem->func == switchdev_port_obj_add_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_ADD) ||
+ (dfitem->func == switchdev_port_obj_del_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_DEL)) {
+ if (switchdev_obj_eq((const void *)dfitem->data, obj)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ spin_unlock_bh(&deferred_lock);
+
+ return found;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
+
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 1c2c6800949d..b4674f03d71a 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -1003,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx)
return 0;
}
-static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+static int tls_get_info(struct sock *sk, struct sk_buff *skb)
{
u16 version, cipher_type;
struct tls_context *ctx;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9fbc70200cd0..211f57164cb6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -52,6 +52,7 @@ struct tls_decrypt_arg {
struct_group(inargs,
bool zc;
bool async;
+ bool async_done;
u8 tail;
);
@@ -274,22 +275,30 @@ static int tls_do_decryption(struct sock *sk,
DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1);
atomic_inc(&ctx->decrypt_pending);
} else {
+ DECLARE_CRYPTO_WAIT(wait);
+
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &ctx->async_wait);
+ crypto_req_done, &wait);
+ ret = crypto_aead_decrypt(aead_req);
+ if (ret == -EINPROGRESS || ret == -EBUSY)
+ ret = crypto_wait_req(ret, &wait);
+ return ret;
}
ret = crypto_aead_decrypt(aead_req);
+ if (ret == -EINPROGRESS)
+ return 0;
+
if (ret == -EBUSY) {
ret = tls_decrypt_async_wait(ctx);
- ret = ret ?: -EINPROGRESS;
+ darg->async_done = true;
+ /* all completions have run, we're not doing async anymore */
+ darg->async = false;
+ return ret;
}
- if (ret == -EINPROGRESS) {
- if (darg->async)
- return 0;
- ret = crypto_wait_req(ret, &ctx->async_wait);
- }
+ atomic_dec(&ctx->decrypt_pending);
darg->async = false;
return ret;
@@ -1588,8 +1597,11 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
/* Prepare and submit AEAD request */
err = tls_do_decryption(sk, sgin, sgout, dctx->iv,
data_len + prot->tail_size, aead_req, darg);
- if (err)
+ if (err) {
+ if (darg->async_done)
+ goto exit_free_skb;
goto exit_free_pages;
+ }
darg->skb = clear_skb ?: tls_strp_msg(ctx);
clear_skb = NULL;
@@ -1601,6 +1613,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
return err;
}
+ if (unlikely(darg->async_done))
+ return 0;
+
if (prot->tail_size)
darg->tail = dctx->tail;
@@ -1772,7 +1787,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
u8 *control,
size_t skip,
size_t len,
- bool is_peek)
+ bool is_peek,
+ bool *more)
{
struct sk_buff *skb = skb_peek(&ctx->rx_list);
struct tls_msg *tlm;
@@ -1785,7 +1801,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
err = tls_record_content_type(msg, tlm, control);
if (err <= 0)
- goto out;
+ goto more;
if (skip < rxm->full_len)
break;
@@ -1803,12 +1819,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
err = tls_record_content_type(msg, tlm, control);
if (err <= 0)
- goto out;
+ goto more;
err = skb_copy_datagram_msg(skb, rxm->offset + skip,
msg, chunk);
if (err < 0)
- goto out;
+ goto more;
len = len - chunk;
copied = copied + chunk;
@@ -1844,6 +1860,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
out:
return copied ? : err;
+more:
+ if (more)
+ *more = true;
+ goto out;
}
static bool
@@ -1943,10 +1963,12 @@ int tls_sw_recvmsg(struct sock *sk,
struct strp_msg *rxm;
struct tls_msg *tlm;
ssize_t copied = 0;
+ ssize_t peeked = 0;
bool async = false;
int target, err;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
+ bool rx_more = false;
bool released = true;
bool bpf_strp_enabled;
bool zc_capable;
@@ -1966,12 +1988,12 @@ int tls_sw_recvmsg(struct sock *sk,
goto end;
/* Process pending decrypted records. It must be non-zero-copy */
- err = process_rx_list(ctx, msg, &control, 0, len, is_peek);
+ err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more);
if (err < 0)
goto end;
copied = err;
- if (len <= copied)
+ if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more)
goto end;
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -2064,6 +2086,8 @@ put_on_rx_list:
decrypted += chunk;
len -= chunk;
__skb_queue_tail(&ctx->rx_list, skb);
+ if (unlikely(control != TLS_RECORD_TYPE_DATA))
+ break;
continue;
}
@@ -2087,8 +2111,10 @@ put_on_rx_list:
if (err < 0)
goto put_on_rx_list_err;
- if (is_peek)
+ if (is_peek) {
+ peeked += chunk;
goto put_on_rx_list;
+ }
if (partially_consumed) {
rxm->offset += chunk;
@@ -2127,11 +2153,11 @@ recv_end:
/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
- err = process_rx_list(ctx, msg, &control, copied,
- decrypted, is_peek);
+ err = process_rx_list(ctx, msg, &control, copied + peeked,
+ decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
- async_copy_bytes, is_peek);
+ async_copy_bytes, is_peek, NULL);
}
copied += decrypted;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 30b178ebba60..0748e7ea5210 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
int);
-static int unix_set_peek_off(struct sock *sk, int val)
-{
- struct unix_sock *u = unix_sk(sk);
-
- if (mutex_lock_interruptible(&u->iolock))
- return -EINTR;
-
- WRITE_ONCE(sk->sk_peek_off, val);
- mutex_unlock(&u->iolock);
-
- return 0;
-}
-
#ifdef CONFIG_PROC_FS
static int unix_count_nr_fds(struct sock *sk)
{
@@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = {
.read_skb = unix_stream_read_skb,
.mmap = sock_no_mmap,
.splice_read = unix_stream_splice_read,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = {
.read_skb = unix_read_skb,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.sendmsg = unix_seqpacket_sendmsg,
.recvmsg = unix_seqpacket_recvmsg,
.mmap = sock_no_mmap,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2ff7ddbaa782..2a81880dac7b 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -284,9 +284,17 @@ void unix_gc(void)
* which are creating the cycle(s).
*/
skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link)
+ list_for_each_entry(u, &gc_candidates, link) {
scan_children(&u->sk, inc_inflight, &hitlist);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
+ }
+
/* not_cycle_list contains those sockets which do not make up a
* cycle. Restore these to the inflight list.
*/
@@ -314,18 +322,6 @@ void unix_gc(void)
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- while (!list_empty(&gc_candidates)) {
- u = list_entry(gc_candidates.next, struct unix_sock, link);
- if (u->oob_skb) {
- struct sk_buff *skb = u->oob_skb;
-
- u->oob_skb = NULL;
- kfree_skb(skb);
- }
- }
-#endif
-
spin_lock(&unix_gc_lock);
/* There could be io_uring registered files, just push them back to
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b09700400d09..bd54a928bab4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4197,6 +4197,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
+ if (otype != NL80211_IFTYPE_MESH_POINT)
+ return -EINVAL;
if (netif_running(dev))
return -EBUSY;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 1eadfac03cc4..b78c0e095e22 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
memcpy(vaddr, buffer, len);
kunmap_local(vaddr);
- skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
+ skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
+ refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
}
if (first_frag && desc->options & XDP_TX_METADATA) {
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 3784534c9185..653e51ae3964 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -407,7 +407,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct net_device *dev = x->xso.dev;
- if (!x->type_offload || x->encap)
+ if (!x->type_offload)
return false;
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 662c83beb345..e5722c95b8bb 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -704,9 +704,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct xfrm_state *x = skb_dst(skb)->xfrm;
+ int family;
int err;
- switch (x->outer_mode.family) {
+ family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family
+ : skb_dst(skb)->ops->family;
+
+ switch (family) {
case AF_INET:
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1b7e75159727..da6ecc6b3e15 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2694,7 +2694,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
- family = xfrm[i]->props.family;
+ if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ family = xfrm[i]->props.family;
+
oif = fl->flowi_oif ? : fl->flowi_l3mdev;
dst = xfrm_dst_lookup(xfrm[i], tos, oif,
&saddr, &daddr, family, mark);
@@ -3416,7 +3418,7 @@ decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reve
}
fl4->flowi4_proto = flkeys->basic.ip_proto;
- fl4->flowi4_tos = flkeys->ip.tos;
+ fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK;
}
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f037be190bae..912c1189ba41 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2017,6 +2017,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
if (xp->xfrm_nr == 0)
return 0;
+ if (xp->xfrm_nr > XFRM_MAX_DEPTH)
+ return -ENOBUFS;
+
for (i = 0; i < xp->xfrm_nr; i++) {
struct xfrm_user_tmpl *up = &vec[i];
struct xfrm_tmpl *kp = &xp->xfrm_vec[i];