summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Makefile2
-rw-r--r--net/batman-adv/bat_iv_ogm.c17
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c123
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h11
-rw-r--r--net/batman-adv/distributed-arp-table.c64
-rw-r--r--net/batman-adv/log.h5
-rw-r--r--net/batman-adv/main.c3
-rw-r--r--net/batman-adv/main.h18
-rw-r--r--net/batman-adv/multicast.c12
-rw-r--r--net/batman-adv/routing.c25
-rw-r--r--net/batman-adv/send.c76
-rw-r--r--net/batman-adv/send.h4
-rw-r--r--net/batman-adv/soft-interface.c238
-rw-r--r--net/batman-adv/tp_meter.c7
-rw-r--r--net/batman-adv/translation-table.c42
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/bpf/Makefile1
-rw-r--r--net/bpf/test_run.c172
-rw-r--r--net/bridge/br_fdb.c5
-rw-r--r--net/bridge/br_if.c1
-rw-r--r--net/can/af_can.c122
-rw-r--r--net/can/af_can.h4
-rw-r--r--net/can/bcm.c13
-rw-r--r--net/can/gw.c4
-rw-r--r--net/can/proc.c144
-rw-r--r--net/can/raw.c92
-rw-r--r--net/ceph/messenger.c6
-rw-r--r--net/core/datagram.c8
-rw-r--r--net/core/dev.c41
-rw-r--r--net/core/devlink.c836
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/flow.c29
-rw-r--r--net/core/flow_dissector.c2
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/netprio_cgroup.c1
-rw-r--r--net/core/rtnetlink.c31
-rw-r--r--net/core/secure_seq.c31
-rw-r--r--net/core/sock.c33
-rw-r--r--net/core/sock_reuseport.c4
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/core/utils.c2
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa.c30
-rw-r--r--net/dsa/dsa2.c50
-rw-r--r--net/dsa/dsa_priv.h8
-rw-r--r--net/dsa/slave.c1
-rw-r--r--net/dsa/switch.c15
-rw-r--r--net/dsa/tag_brcm.c27
-rw-r--r--net/dsa/tag_dsa.c27
-rw-r--r--net/dsa/tag_edsa.c27
-rw-r--r--net/dsa/tag_mtk.c100
-rw-r--r--net/dsa/tag_qca.c27
-rw-r--r--net/dsa/tag_trailer.c26
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/devinet.c64
-rw-r--r--net/ipv4/ip_input.c5
-rw-r--r--net/ipv4/ipconfig.c3
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c20
-rw-r--r--net/ipv4/ping.c5
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/route.c13
-rw-r--r--net/ipv4/sysctl_net_ipv4.c67
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c50
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c20
-rw-r--r--net/ipv4/tcp_recovery.c3
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c53
-rw-r--r--net/ipv6/af_inet6.c8
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6mr.c9
-rw-r--r--net/ipv6/mcast.c49
-rw-r--r--net/ipv6/protocol.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c41
-rw-r--r--net/ipv6/tcp_ipv6.c5
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/kcm/kcmsock.c6
-rw-r--r--net/l2tp/l2tp_core.c160
-rw-r--r--net/l2tp/l2tp_core.h9
-rw-r--r--net/l2tp/l2tp_debugfs.c10
-rw-r--r--net/l2tp/l2tp_eth.c65
-rw-r--r--net/l2tp/l2tp_ip.c22
-rw-r--r--net/l2tp/l2tp_ip6.c23
-rw-r--r--net/l2tp/l2tp_netlink.c52
-rw-r--r--net/l2tp/l2tp_ppp.c94
-rw-r--r--net/mac80211/iface.c3
-rw-r--r--net/mac802154/ieee802154_i.h1
-rw-r--r--net/mpls/af_mpls.c257
-rw-r--r--net/mpls/internal.h61
-rw-r--r--net/mpls/mpls_iptunnel.c13
-rw-r--r--net/netfilter/nf_conntrack_ecache.c2
-rw-r--r--net/netfilter/nf_conntrack_extend.c13
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nfnetlink_cthelper.c287
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c9
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/netlink/af_netlink.h8
-rw-r--r--net/netlink/diag.c25
-rw-r--r--net/openvswitch/conntrack.c4
-rw-r--r--net/openvswitch/flow.c10
-rw-r--r--net/packet/af_packet.c8
-rw-r--r--net/qrtr/Kconfig2
-rw-r--r--net/qrtr/smd.c42
-rw-r--r--net/rds/connection.c10
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rxrpc/ar-internal.h19
-rw-r--r--net/rxrpc/call_accept.c6
-rw-r--r--net/rxrpc/call_event.c2
-rw-r--r--net/rxrpc/call_object.c4
-rw-r--r--net/rxrpc/conn_client.c1
-rw-r--r--net/rxrpc/conn_event.c17
-rw-r--r--net/rxrpc/input.c17
-rw-r--r--net/rxrpc/insecure.c10
-rw-r--r--net/rxrpc/peer_event.c2
-rw-r--r--net/rxrpc/recvmsg.c8
-rw-r--r--net/rxrpc/rxkad.c184
-rw-r--r--net/rxrpc/sendmsg.c17
-rw-r--r--net/sched/cls_flow.c5
-rw-r--r--net/sched/sch_choke.c57
-rw-r--r--net/sched/sch_sfq.c5
-rw-r--r--net/sctp/associola.c13
-rw-r--r--net/sctp/chunk.c14
-rw-r--r--net/sctp/input.c4
-rw-r--r--net/sctp/output.c69
-rw-r--r--net/sctp/outqueue.c13
-rw-r--r--net/sctp/proc.c4
-rw-r--r--net/sctp/sm_make_chunk.c9
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/sctp/socket.c86
-rw-r--r--net/sctp/stream.c43
-rw-r--r--net/sctp/transport.c19
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/socket.c46
-rw-r--r--net/sunrpc/svcsock.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/socket.c28
-rw-r--r--net/tipc/subscr.c17
-rw-r--r--net/tipc/subscr.h3
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/vmw_vsock/vmci_transport.c22
-rw-r--r--net/wireless/sysfs.c10
-rw-r--r--net/xfrm/xfrm_user.c9
149 files changed, 3625 insertions, 1411 deletions
diff --git a/net/Makefile b/net/Makefile
index 9b681550e3a3..9086ffbb5085 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_NET) += $(tmp-y)
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
+obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_XFRM) += xfrm/
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 71343d0fec94..495ba7cdcb04 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -679,15 +679,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_forw_packet *forw_packet_aggr;
+ struct sk_buff *skb;
unsigned char *skb_buff;
unsigned int skb_size;
atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left;
- forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
- queue_left, bat_priv);
- if (!forw_packet_aggr)
- return;
-
if (atomic_read(&bat_priv->aggregated_ogms) &&
packet_len < BATADV_MAX_AGGREGATION_BYTES)
skb_size = BATADV_MAX_AGGREGATION_BYTES;
@@ -696,9 +692,14 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
skb_size += ETH_HLEN;
- forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
- if (!forw_packet_aggr->skb) {
- batadv_forw_packet_free(forw_packet_aggr, true);
+ skb = netdev_alloc_skb_ip_align(NULL, skb_size);
+ if (!skb)
+ return;
+
+ forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
+ queue_left, bat_priv, skb);
+ if (!forw_packet_aggr) {
+ kfree_skb(skb);
return;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ba8420d8a992..d07e89ec8467 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -395,7 +395,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): CLAIM %pM on vid %d\n", mac,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_UNCLAIM:
/* unclaim frame
@@ -404,7 +404,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): UNCLAIM %pM on vid %d\n", mac,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_ANNOUNCE:
/* announcement frame
@@ -413,7 +413,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): ANNOUNCE of %pM on vid %d\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid));
+ ethhdr->h_source, batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_REQUEST:
/* request frame
@@ -425,14 +425,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): REQUEST of %pM to %pM on vid %d\n",
ethhdr->h_source, ethhdr->h_dest,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
case BATADV_CLAIM_TYPE_LOOPDETECT:
ether_addr_copy(ethhdr->h_source, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n",
ethhdr->h_source, ethhdr->h_dest,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
break;
}
@@ -475,9 +475,9 @@ static void batadv_bla_loopdetect_report(struct work_struct *work)
batadv_info(bat_priv->soft_iface,
"Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n",
- BATADV_PRINT_VID(backbone_gw->vid));
+ batadv_print_vid(backbone_gw->vid));
snprintf(vid_str, sizeof(vid_str), "%d",
- BATADV_PRINT_VID(backbone_gw->vid));
+ batadv_print_vid(backbone_gw->vid));
vid_str[sizeof(vid_str) - 1] = 0;
batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT,
@@ -510,7 +510,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n",
- orig, BATADV_PRINT_VID(vid));
+ orig, batadv_print_vid(vid));
entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
@@ -719,7 +719,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
- mac, BATADV_PRINT_VID(vid));
+ mac, batadv_print_vid(vid));
kref_get(&claim->refcount);
hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
@@ -739,8 +739,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
goto claim_free_ref;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_add_claim(): changing ownership for %pM, vid %d\n",
- mac, BATADV_PRINT_VID(vid));
+ "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n",
+ mac, batadv_print_vid(vid), backbone_gw->orig);
remove_crc = true;
}
@@ -809,7 +809,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
return;
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n",
- mac, BATADV_PRINT_VID(vid));
+ mac, batadv_print_vid(vid));
batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
batadv_choose_claim, claim);
@@ -849,7 +849,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n",
- BATADV_PRINT_VID(vid), backbone_gw->orig, crc);
+ batadv_print_vid(vid), backbone_gw->orig, crc);
spin_lock_bh(&backbone_gw->crc_lock);
backbone_crc = backbone_gw->crc;
@@ -859,7 +859,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr,
batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv,
"handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n",
backbone_gw->orig,
- BATADV_PRINT_VID(backbone_gw->vid),
+ batadv_print_vid(backbone_gw->vid),
backbone_crc, crc);
batadv_bla_send_request(backbone_gw);
@@ -904,7 +904,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_request(): REQUEST vid %d (sent by %pM)...\n",
- BATADV_PRINT_VID(vid), ethhdr->h_source);
+ batadv_print_vid(vid), ethhdr->h_source);
batadv_bla_answer_request(bat_priv, primary_if, vid);
return true;
@@ -941,7 +941,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv,
/* this must be an UNCLAIM frame */
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n",
- claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig);
+ claim_addr, batadv_print_vid(vid), backbone_gw->orig);
batadv_bla_del_claim(bat_priv, claim_addr, vid);
batadv_backbone_gw_put(backbone_gw);
@@ -1161,7 +1161,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
if (ret == 1)
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src,
+ ethhdr->h_source, batadv_print_vid(vid), hw_src,
hw_dst);
if (ret < 2)
@@ -1197,7 +1197,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n",
- ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst);
+ ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst);
return true;
}
@@ -1295,10 +1295,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
goto skip;
batadv_dbg(BATADV_DBG_BLA, bat_priv,
- "bla_purge_claims(): %pM, vid %d, time out\n",
- claim->addr, claim->vid);
+ "bla_purge_claims(): timed out.\n");
purge_now:
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "bla_purge_claims(): %pM, vid %d\n",
+ claim->addr, claim->vid);
+
batadv_handle_unclaim(bat_priv, primary_if,
backbone_gw->orig,
claim->addr, claim->vid);
@@ -1846,6 +1849,13 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
/* possible optimization: race for a claim */
/* No claim exists yet, claim it for us!
*/
+
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n",
+ ethhdr->h_source,
+ batadv_is_my_client(bat_priv,
+ ethhdr->h_source, vid) ?
+ "yes" : "no");
batadv_handle_claim(bat_priv, primary_if,
primary_if->net_dev->dev_addr,
ethhdr->h_source, vid);
@@ -1963,10 +1973,22 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
/* if yes, the client has roamed and we have
* to unclaim it.
*/
- batadv_handle_unclaim(bat_priv, primary_if,
- primary_if->net_dev->dev_addr,
- ethhdr->h_source, vid);
- goto allow;
+ if (batadv_has_timed_out(claim->lasttime, 100)) {
+ /* only unclaim if the last claim entry is
+ * older than 100 ms to make sure we really
+ * have a roaming client here.
+ */
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n",
+ ethhdr->h_source);
+ batadv_handle_unclaim(bat_priv, primary_if,
+ primary_if->net_dev->dev_addr,
+ ethhdr->h_source, vid);
+ goto allow;
+ } else {
+ batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n",
+ ethhdr->h_source);
+ goto handled;
+ }
}
/* check if it is a multicast/broadcast frame */
@@ -2042,7 +2064,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
backbone_crc = backbone_gw->crc;
spin_unlock_bh(&backbone_gw->crc_lock);
seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
- claim->addr, BATADV_PRINT_VID(claim->vid),
+ claim->addr, batadv_print_vid(claim->vid),
backbone_gw->orig,
(is_own ? 'x' : ' '),
backbone_crc);
@@ -2274,7 +2296,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n",
backbone_gw->orig,
- BATADV_PRINT_VID(backbone_gw->vid), secs,
+ batadv_print_vid(backbone_gw->vid), secs,
msecs, backbone_crc);
}
rcu_read_unlock();
@@ -2449,3 +2471,52 @@ out:
return ret;
}
+
+#ifdef CONFIG_BATMAN_ADV_DAT
+/**
+ * batadv_bla_check_claim - check if address is claimed
+ *
+ * @bat_priv: the bat priv with all the soft interface information
+ * @addr: mac address of which the claim status is checked
+ * @vid: the VLAN ID
+ *
+ * addr is checked if this address is claimed by the local device itself.
+ *
+ * Return: true if bla is disabled or the mac is claimed by the device,
+ * false if the device addr is already claimed by another gateway
+ */
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv,
+ u8 *addr, unsigned short vid)
+{
+ struct batadv_bla_claim search_claim;
+ struct batadv_bla_claim *claim = NULL;
+ struct batadv_hard_iface *primary_if = NULL;
+ bool ret = true;
+
+ if (!atomic_read(&bat_priv->bridge_loop_avoidance))
+ return ret;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ return ret;
+
+ /* First look if the mac address is claimed */
+ ether_addr_copy(search_claim.addr, addr);
+ search_claim.vid = vid;
+
+ claim = batadv_claim_hash_find(bat_priv, &search_claim);
+
+ /* If there is a claim and we are not owner of the claim,
+ * return false.
+ */
+ if (claim) {
+ if (!batadv_compare_eth(claim->backbone_gw->orig,
+ primary_if->net_dev->dev_addr))
+ ret = false;
+ batadv_claim_put(claim);
+ }
+
+ batadv_hardif_put(primary_if);
+ return ret;
+}
+#endif
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index e157986bd01c..234775748b8e 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -69,6 +69,10 @@ void batadv_bla_status_update(struct net_device *net_dev);
int batadv_bla_init(struct batadv_priv *bat_priv);
void batadv_bla_free(struct batadv_priv *bat_priv);
int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
+#ifdef CONFIG_BATMAN_ADV_DAT
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
+ unsigned short vid);
+#endif
#define BATADV_BLA_CRC_INIT 0
#else /* ifdef CONFIG_BATMAN_ADV_BLA */
@@ -145,6 +149,13 @@ static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
return -EOPNOTSUPP;
}
+static inline
+bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
+ unsigned short vid)
+{
+ return true;
+}
+
#endif /* ifdef CONFIG_BATMAN_ADV_BLA */
#endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 1bfd1dbc2feb..013e970eff39 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -43,6 +43,7 @@
#include <linux/workqueue.h>
#include <net/arp.h>
+#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
@@ -330,7 +331,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
batadv_dbg(BATADV_DBG_DAT, bat_priv,
"Entry updated: %pI4 %pM (vid: %d)\n",
&dat_entry->ip, dat_entry->mac_addr,
- BATADV_PRINT_VID(vid));
+ batadv_print_vid(vid));
goto out;
}
@@ -356,7 +357,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
}
batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM (vid: %d)\n",
- &dat_entry->ip, dat_entry->mac_addr, BATADV_PRINT_VID(vid));
+ &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid));
out:
if (dat_entry)
@@ -835,7 +836,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n",
&dat_entry->ip, dat_entry->mac_addr,
- BATADV_PRINT_VID(dat_entry->vid),
+ batadv_print_vid(dat_entry->vid),
last_seen_mins, last_seen_secs);
}
rcu_read_unlock();
@@ -1002,6 +1003,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
bool ret = false;
struct batadv_dat_entry *dat_entry = NULL;
struct sk_buff *skb_new;
+ struct net_device *soft_iface = bat_priv->soft_iface;
int hdr_size = 0;
unsigned short vid;
@@ -1040,16 +1042,30 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
goto out;
}
+ /* If BLA is enabled, only send ARP replies if we have claimed
+ * the destination for the ARP request or if no one else of
+ * the backbone gws belonging to our backbone has claimed the
+ * destination.
+ */
+ if (!batadv_bla_check_claim(bat_priv,
+ dat_entry->mac_addr, vid)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "Device %pM claimed by another backbone gw. Don't send ARP reply!",
+ dat_entry->mac_addr);
+ ret = true;
+ goto out;
+ }
+
skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
dat_entry->mac_addr,
hw_src, vid);
if (!skb_new)
goto out;
- skb_new->protocol = eth_type_trans(skb_new,
- bat_priv->soft_iface);
- bat_priv->stats.rx_packets++;
- bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
+ skb_new->protocol = eth_type_trans(skb_new, soft_iface);
+
+ soft_iface->stats.rx_packets++;
+ soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size;
netif_rx(skb_new);
batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
@@ -1188,6 +1204,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
struct sk_buff *skb, int hdr_size)
{
+ struct batadv_dat_entry *dat_entry = NULL;
u16 type;
__be32 ip_src, ip_dst;
u8 *hw_src, *hw_dst;
@@ -1210,12 +1227,41 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
hw_dst = batadv_arp_hw_dst(skb, hdr_size);
ip_dst = batadv_arp_ip_dst(skb, hdr_size);
+ /* If ip_dst is already in cache and has the right mac address,
+ * drop this frame if this ARP reply is destined for us because it's
+ * most probably an ARP reply generated by another node of the DHT.
+ * We have most probably received already a reply earlier. Delivering
+ * this frame would lead to doubled receive of an ARP reply.
+ */
+ dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_src, vid);
+ if (dat_entry && batadv_compare_eth(hw_src, dat_entry->mac_addr)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv, "Doubled ARP reply removed: ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]; dat_entry: %pM-%pI4\n",
+ hw_src, &ip_src, hw_dst, &ip_dst,
+ dat_entry->mac_addr, &dat_entry->ip);
+ dropped = true;
+ goto out;
+ }
+
/* Update our internal cache with both the IP addresses the node got
* within the ARP reply
*/
batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid);
+ /* If BLA is enabled, only forward ARP replies if we have claimed the
+ * source of the ARP reply or if no one else of the same backbone has
+ * already claimed that client. This prevents that different gateways
+ * to the same backbone all forward the ARP reply leading to multiple
+ * replies in the backbone.
+ */
+ if (!batadv_bla_check_claim(bat_priv, hw_src, vid)) {
+ batadv_dbg(BATADV_DBG_DAT, bat_priv,
+ "Device %pM claimed by another backbone gw. Drop ARP reply.\n",
+ hw_src);
+ dropped = true;
+ goto out;
+ }
+
/* if this REPLY is directed to a client of mine, let's deliver the
* packet to the interface
*/
@@ -1228,6 +1274,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
out:
if (dropped)
kfree_skb(skb);
+ if (dat_entry)
+ batadv_dat_entry_put(dat_entry);
/* if dropped == false -> deliver to the interface */
return dropped;
}
@@ -1256,7 +1304,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
/* If this packet is an ARP_REQUEST and the node already has the
* information that it is going to ask, then the packet can be dropped
*/
- if (forw_packet->num_packets)
+ if (batadv_forw_packet_is_rebroadcast(forw_packet))
goto out;
vid = batadv_dat_get_vid(forw_packet->skb, &hdr_size);
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 7a2b9f4da078..65ce97efa6b5 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -73,9 +73,10 @@ __printf(2, 3);
/* possibly ratelimited debug output */
#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \
do { \
- if (atomic_read(&(bat_priv)->log_level) & (type) && \
+ struct batadv_priv *__batpriv = (bat_priv); \
+ if (atomic_read(&__batpriv->log_level) & (type) && \
(!(ratelimited) || net_ratelimit())) \
- batadv_debug_log(bat_priv, fmt, ## arg); \
+ batadv_debug_log(__batpriv, fmt, ## arg); \
} \
while (0)
#else /* !CONFIG_BATMAN_ADV_DEBUG */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 5000c540614d..fb381fb26a66 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -516,6 +516,9 @@ static void batadv_recv_handler_init(void)
BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12);
BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8);
+ i = FIELD_SIZEOF(struct sk_buff, cb);
+ BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i);
+
/* broadcast packet */
batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 57a8103dbce7..810f7d026f54 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.0"
+#define BATADV_SOURCE_VERSION "2017.1"
#endif
/* B.A.T.M.A.N. parameters */
@@ -193,6 +193,7 @@ enum batadv_uev_type {
#include <linux/percpu.h>
#include <linux/types.h>
+#include "packet.h"
#include "types.h"
struct net_device;
@@ -200,8 +201,19 @@ struct packet_type;
struct seq_file;
struct sk_buff;
-#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \
- (int)((vid) & VLAN_VID_MASK) : -1)
+/**
+ * batadv_print_vid - return printable version of vid information
+ * @vid: the VLAN identifier
+ *
+ * Return: -1 when no VLAN is used, VLAN id otherwise
+ */
+static inline int batadv_print_vid(unsigned short vid)
+{
+ if (vid & BATADV_VLAN_HAS_TAG)
+ return (int)(vid & VLAN_VID_MASK);
+ else
+ return -1;
+}
extern struct list_head batadv_hardif_list;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 952ba81a565b..d327670641ac 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -494,9 +494,8 @@ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv)
if (!bridged)
goto update;
-#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
- pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
-#endif
+ if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING))
+ pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n");
querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP);
querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP);
@@ -671,7 +670,6 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
return 0;
}
-#if IS_ENABLED(CONFIG_IPV6)
/**
* batadv_mcast_is_report_ipv6 - check for MLD reports
* @skb: the ethernet frame destined for the mesh
@@ -736,7 +734,6 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv,
return 0;
}
-#endif
/**
* batadv_mcast_forw_mode_check - check for optimized forwarding potential
@@ -765,11 +762,12 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv,
case ETH_P_IP:
return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb,
is_unsnoopable);
-#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return -EINVAL;
+
return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb,
is_unsnoopable);
-#endif
default:
return -EINVAL;
}
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7fd740b6e36d..e1ebe14ee2a6 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -941,15 +941,17 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_unicast_packet *unicast_packet;
struct batadv_unicast_4addr_packet *unicast_4addr_packet;
- u8 *orig_addr;
- struct batadv_orig_node *orig_node = NULL;
+ u8 *orig_addr, *orig_addr_gw;
+ struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL;
int check, hdr_size = sizeof(*unicast_packet);
enum batadv_subtype subtype;
- bool is4addr;
+ struct ethhdr *ethhdr;
int ret = NET_RX_DROP;
+ bool is4addr, is_gw;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
+ ethhdr = eth_hdr(skb);
is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR;
/* the caller function should have already pulled 2 bytes */
@@ -972,6 +974,23 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
/* packet for me */
if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
+ /* If this is a unicast packet from another backgone gw,
+ * drop it.
+ */
+ orig_addr_gw = ethhdr->h_source;
+ orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw);
+ if (orig_node_gw) {
+ is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw,
+ hdr_size);
+ batadv_orig_node_put(orig_node_gw);
+ if (is_gw) {
+ batadv_dbg(BATADV_DBG_BLA, bat_priv,
+ "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n",
+ orig_addr_gw);
+ return NET_RX_DROP;
+ }
+ }
+
if (is4addr) {
subtype = unicast_4addr_packet->subtype;
batadv_dat_inc_counter(bat_priv, subtype);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 1489ec27daff..403df596a73d 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -482,6 +482,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
* @if_outgoing: The (optional) if_outgoing to be grabbed
* @queue_left: The (optional) queue counter to decrease
* @bat_priv: The bat_priv for the mesh of this forw_packet
+ * @skb: The raw packet this forwarding packet shall contain
*
* Allocates a forwarding packet and tries to get a reference to the
* (optional) if_incoming, if_outgoing and queue_left. If queue_left
@@ -493,7 +494,8 @@ struct batadv_forw_packet *
batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing,
atomic_t *queue_left,
- struct batadv_priv *bat_priv)
+ struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
{
struct batadv_forw_packet *forw_packet;
const char *qname;
@@ -525,7 +527,7 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
INIT_HLIST_NODE(&forw_packet->list);
INIT_HLIST_NODE(&forw_packet->cleanup_list);
- forw_packet->skb = NULL;
+ forw_packet->skb = skb;
forw_packet->queue_left = queue_left;
forw_packet->if_incoming = if_incoming;
forw_packet->if_outgoing = if_outgoing;
@@ -756,22 +758,23 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
if (!primary_if)
goto err;
+ newskb = skb_copy(skb, GFP_ATOMIC);
+ if (!newskb) {
+ batadv_hardif_put(primary_if);
+ goto err;
+ }
+
forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
&bat_priv->bcast_queue_left,
- bat_priv);
+ bat_priv, newskb);
batadv_hardif_put(primary_if);
if (!forw_packet)
- goto err;
-
- newskb = skb_copy(skb, GFP_ATOMIC);
- if (!newskb)
goto err_packet_free;
/* as we have a copy now, it is safe to decrease the TTL */
bcast_packet = (struct batadv_bcast_packet *)newskb->data;
bcast_packet->ttl--;
- forw_packet->skb = newskb;
forw_packet->own = own_packet;
INIT_DELAYED_WORK(&forw_packet->delayed_work,
@@ -781,11 +784,60 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
return NETDEV_TX_OK;
err_packet_free:
- batadv_forw_packet_free(forw_packet, true);
+ kfree_skb(newskb);
err:
return NETDEV_TX_BUSY;
}
+/**
+ * batadv_forw_packet_bcasts_left - check if a retransmission is necessary
+ * @forw_packet: the forwarding packet to check
+ * @hard_iface: the interface to check on
+ *
+ * Checks whether a given packet has any (re)transmissions left on the provided
+ * interface.
+ *
+ * hard_iface may be NULL: In that case the number of transmissions this skb had
+ * so far is compared with the maximum amount of retransmissions independent of
+ * any interface instead.
+ *
+ * Return: True if (re)transmissions are left, false otherwise.
+ */
+static bool
+batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet,
+ struct batadv_hard_iface *hard_iface)
+{
+ unsigned int max;
+
+ if (hard_iface)
+ max = hard_iface->num_bcasts;
+ else
+ max = BATADV_NUM_BCASTS_MAX;
+
+ return BATADV_SKB_CB(forw_packet->skb)->num_bcasts < max;
+}
+
+/**
+ * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet
+ * @forw_packet: the packet to increase the counter for
+ */
+static void
+batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
+{
+ BATADV_SKB_CB(forw_packet->skb)->num_bcasts++;
+}
+
+/**
+ * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions
+ * @forw_packet: the packet to check
+ *
+ * Return: True if this packet was transmitted before, false otherwise.
+ */
+bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet)
+{
+ return BATADV_SKB_CB(forw_packet->skb)->num_bcasts > 0;
+}
+
static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
{
struct batadv_hard_iface *hard_iface;
@@ -826,7 +878,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
if (hard_iface->soft_iface != soft_iface)
continue;
- if (forw_packet->num_packets >= hard_iface->num_bcasts)
+ if (!batadv_forw_packet_bcasts_left(forw_packet, hard_iface))
continue;
if (forw_packet->own) {
@@ -884,10 +936,10 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
}
rcu_read_unlock();
- forw_packet->num_packets++;
+ batadv_forw_packet_bcasts_inc(forw_packet);
/* if we still have some more bcasts to send */
- if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) {
+ if (batadv_forw_packet_bcasts_left(forw_packet, NULL)) {
batadv_forw_packet_bcast_queue(bat_priv, forw_packet,
send_time);
return;
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index f21166d10323..a16b34f473ef 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -34,11 +34,13 @@ struct batadv_forw_packet *
batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
struct batadv_hard_iface *if_outgoing,
atomic_t *queue_left,
- struct batadv_priv *bat_priv);
+ struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l);
void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
struct batadv_forw_packet *forw_packet,
unsigned long send_time);
+bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet);
int batadv_send_skb_to_orig(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index d042c99af028..b25789abf7b9 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -64,28 +64,6 @@
#include "sysfs.h"
#include "translation-table.h"
-static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
-static void batadv_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info);
-static u32 batadv_get_msglevel(struct net_device *dev);
-static void batadv_set_msglevel(struct net_device *dev, u32 value);
-static u32 batadv_get_link(struct net_device *dev);
-static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data);
-static void batadv_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats, u64 *data);
-static int batadv_get_sset_count(struct net_device *dev, int stringset);
-
-static const struct ethtool_ops batadv_ethtool_ops = {
- .get_settings = batadv_get_settings,
- .get_drvinfo = batadv_get_drvinfo,
- .get_msglevel = batadv_get_msglevel,
- .set_msglevel = batadv_set_msglevel,
- .get_link = batadv_get_link,
- .get_strings = batadv_get_strings,
- .get_ethtool_stats = batadv_get_ethtool_stats,
- .get_sset_count = batadv_get_sset_count,
-};
-
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
{
int result;
@@ -140,7 +118,7 @@ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx)
static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
- struct net_device_stats *stats = &bat_priv->stats;
+ struct net_device_stats *stats = &dev->stats;
stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX);
stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES);
@@ -230,6 +208,9 @@ static int batadv_interface_tx(struct sk_buff *skb,
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
+ /* reset control block to avoid left overs from previous users */
+ memset(skb->cb, 0, sizeof(struct batadv_skb_cb));
+
netif_trans_update(soft_iface);
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
@@ -947,6 +928,98 @@ static const struct net_device_ops batadv_netdev_ops = {
.ndo_del_slave = batadv_softif_slave_del,
};
+static void batadv_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
+ strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
+ strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+ strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
+}
+
+/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702
+ * Declare each description string in struct.name[] to get fixed sized buffer
+ * and compile time checking for strings longer than ETH_GSTRING_LEN.
+ */
+static const struct {
+ const char name[ETH_GSTRING_LEN];
+} batadv_counters_strings[] = {
+ { "tx" },
+ { "tx_bytes" },
+ { "tx_dropped" },
+ { "rx" },
+ { "rx_bytes" },
+ { "forward" },
+ { "forward_bytes" },
+ { "mgmt_tx" },
+ { "mgmt_tx_bytes" },
+ { "mgmt_rx" },
+ { "mgmt_rx_bytes" },
+ { "frag_tx" },
+ { "frag_tx_bytes" },
+ { "frag_rx" },
+ { "frag_rx_bytes" },
+ { "frag_fwd" },
+ { "frag_fwd_bytes" },
+ { "tt_request_tx" },
+ { "tt_request_rx" },
+ { "tt_response_tx" },
+ { "tt_response_rx" },
+ { "tt_roam_adv_tx" },
+ { "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_DAT
+ { "dat_get_tx" },
+ { "dat_get_rx" },
+ { "dat_put_tx" },
+ { "dat_put_rx" },
+ { "dat_cached_reply_tx" },
+#endif
+#ifdef CONFIG_BATMAN_ADV_NC
+ { "nc_code" },
+ { "nc_code_bytes" },
+ { "nc_recode" },
+ { "nc_recode_bytes" },
+ { "nc_buffer" },
+ { "nc_decode" },
+ { "nc_decode_bytes" },
+ { "nc_decode_failed" },
+ { "nc_sniffed" },
+#endif
+};
+
+static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ if (stringset == ETH_SS_STATS)
+ memcpy(data, batadv_counters_strings,
+ sizeof(batadv_counters_strings));
+}
+
+static void batadv_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < BATADV_CNT_NUM; i++)
+ data[i] = batadv_sum_counter(bat_priv, i);
+}
+
+static int batadv_get_sset_count(struct net_device *dev, int stringset)
+{
+ if (stringset == ETH_SS_STATS)
+ return BATADV_CNT_NUM;
+
+ return -EOPNOTSUPP;
+}
+
+static const struct ethtool_ops batadv_ethtool_ops = {
+ .get_drvinfo = batadv_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_strings = batadv_get_strings,
+ .get_ethtool_stats = batadv_get_ethtool_stats,
+ .get_sset_count = batadv_get_sset_count,
+};
+
/**
* batadv_softif_free - Deconstructor of batadv_soft_interface
* @dev: Device to cleanup and remove
@@ -971,8 +1044,6 @@ static void batadv_softif_free(struct net_device *dev)
*/
static void batadv_softif_init_early(struct net_device *dev)
{
- struct batadv_priv *priv = netdev_priv(dev);
-
ether_setup(dev);
dev->netdev_ops = &batadv_netdev_ops;
@@ -989,8 +1060,6 @@ static void batadv_softif_init_early(struct net_device *dev)
eth_hw_addr_random(dev);
dev->ethtool_ops = &batadv_ethtool_ops;
-
- memset(priv, 0, sizeof(*priv));
}
struct net_device *batadv_softif_create(struct net *net, const char *name)
@@ -1083,118 +1152,3 @@ struct rtnl_link_ops batadv_link_ops __read_mostly = {
.setup = batadv_softif_init_early,
.dellink = batadv_softif_destroy_netlink,
};
-
-/* ethtool */
-static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
- cmd->supported = 0;
- cmd->advertising = 0;
- ethtool_cmd_speed_set(cmd, SPEED_10);
- cmd->duplex = DUPLEX_FULL;
- cmd->port = PORT_TP;
- cmd->phy_address = 0;
- cmd->transceiver = XCVR_INTERNAL;
- cmd->autoneg = AUTONEG_DISABLE;
- cmd->maxtxpkt = 0;
- cmd->maxrxpkt = 0;
-
- return 0;
-}
-
-static void batadv_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
-{
- strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver));
- strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version));
- strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
- strlcpy(info->bus_info, "batman", sizeof(info->bus_info));
-}
-
-static u32 batadv_get_msglevel(struct net_device *dev)
-{
- return -EOPNOTSUPP;
-}
-
-static void batadv_set_msglevel(struct net_device *dev, u32 value)
-{
-}
-
-static u32 batadv_get_link(struct net_device *dev)
-{
- return 1;
-}
-
-/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702
- * Declare each description string in struct.name[] to get fixed sized buffer
- * and compile time checking for strings longer than ETH_GSTRING_LEN.
- */
-static const struct {
- const char name[ETH_GSTRING_LEN];
-} batadv_counters_strings[] = {
- { "tx" },
- { "tx_bytes" },
- { "tx_dropped" },
- { "rx" },
- { "rx_bytes" },
- { "forward" },
- { "forward_bytes" },
- { "mgmt_tx" },
- { "mgmt_tx_bytes" },
- { "mgmt_rx" },
- { "mgmt_rx_bytes" },
- { "frag_tx" },
- { "frag_tx_bytes" },
- { "frag_rx" },
- { "frag_rx_bytes" },
- { "frag_fwd" },
- { "frag_fwd_bytes" },
- { "tt_request_tx" },
- { "tt_request_rx" },
- { "tt_response_tx" },
- { "tt_response_rx" },
- { "tt_roam_adv_tx" },
- { "tt_roam_adv_rx" },
-#ifdef CONFIG_BATMAN_ADV_DAT
- { "dat_get_tx" },
- { "dat_get_rx" },
- { "dat_put_tx" },
- { "dat_put_rx" },
- { "dat_cached_reply_tx" },
-#endif
-#ifdef CONFIG_BATMAN_ADV_NC
- { "nc_code" },
- { "nc_code_bytes" },
- { "nc_recode" },
- { "nc_recode_bytes" },
- { "nc_buffer" },
- { "nc_decode" },
- { "nc_decode_bytes" },
- { "nc_decode_failed" },
- { "nc_sniffed" },
-#endif
-};
-
-static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data)
-{
- if (stringset == ETH_SS_STATS)
- memcpy(data, batadv_counters_strings,
- sizeof(batadv_counters_strings));
-}
-
-static void batadv_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats, u64 *data)
-{
- struct batadv_priv *bat_priv = netdev_priv(dev);
- int i;
-
- for (i = 0; i < BATADV_CNT_NUM; i++)
- data[i] = batadv_sum_counter(bat_priv, i);
-}
-
-static int batadv_get_sset_count(struct net_device *dev, int stringset)
-{
- if (stringset == ETH_SS_STATS)
- return BATADV_CNT_NUM;
-
- return -EOPNOTSUPP;
-}
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index c94ebdecdc3d..556f9a865ddf 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -873,8 +873,8 @@ static int batadv_tp_send(void *arg)
/* something went wrong during the preparation/transmission */
if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) {
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
- "Meter: batadv_tp_send() cannot send packets (%d)\n",
- err);
+ "Meter: %s() cannot send packets (%d)\n",
+ __func__, err);
/* ensure nobody else tries to stop the thread now */
if (atomic_dec_and_test(&tp_vars->sending))
tp_vars->reason = err;
@@ -979,7 +979,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst,
if (!tp_vars) {
spin_unlock_bh(&bat_priv->tp_list_lock);
batadv_dbg(BATADV_DBG_TP_METER, bat_priv,
- "Meter: batadv_tp_start cannot allocate list elements\n");
+ "Meter: %s cannot allocate list elements\n",
+ __func__);
batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR,
dst, bat_priv, session_cookie);
return;
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 6077a87d46f0..e75b4937b497 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -617,7 +617,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(tt_global->common.vid), message);
+ batadv_print_vid(tt_global->common.vid), message);
batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
batadv_choose_tt, &tt_global->common);
@@ -671,7 +671,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) {
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Re-adding pending client %pM (vid: %d)\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
/* whatever the reason why the PENDING flag was set,
* this is a client which was enqueued to be removed in
* this orig_interval. Since it popped up again, the
@@ -684,7 +684,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) {
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Roaming client %pM (vid: %d) came back to its original location\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
/* the ROAM flag is set because this client roamed away
* and the node got a roaming_advertisement message. Now
* that the client popped up again at its original
@@ -716,7 +716,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (!vlan) {
net_ratelimited_function(batadv_info, soft_iface,
"adding TT local entry %pM to non-existent VLAN %d\n",
- addr, BATADV_PRINT_VID(vid));
+ addr, batadv_print_vid(vid));
kmem_cache_free(batadv_tl_cache, tt_local);
tt_local = NULL;
goto out;
@@ -724,7 +724,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
- addr, BATADV_PRINT_VID(vid),
+ addr, batadv_print_vid(vid),
(u8)atomic_read(&bat_priv->tt.vn));
ether_addr_copy(tt_local->common.addr, addr);
@@ -1097,7 +1097,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
" * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n",
tt_common_entry->addr,
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
((tt_common_entry->flags &
BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
no_purge ? 'P' : '.',
@@ -1296,7 +1296,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Local tt entry (%pM, vid: %d) pending to be removed: %s\n",
tt_local_entry->common.addr,
- BATADV_PRINT_VID(tt_local_entry->common.vid), message);
+ batadv_print_vid(tt_local_entry->common.vid), message);
}
/**
@@ -1727,7 +1727,7 @@ add_orig_entry:
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new global tt entry: %pM (vid: %d, via %pM)\n",
- common->addr, BATADV_PRINT_VID(common->vid),
+ common->addr, batadv_print_vid(common->vid),
orig_node->orig);
ret = true;
@@ -1835,7 +1835,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
if (!vlan) {
seq_printf(seq,
" * Cannot retrieve VLAN %d for originator %pM\n",
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
best_entry->orig_node->orig);
goto print_list;
}
@@ -1844,7 +1844,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
seq_printf(seq,
" %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n",
'*', tt_global_entry->common.addr,
- BATADV_PRINT_VID(tt_global_entry->common.vid),
+ batadv_print_vid(tt_global_entry->common.vid),
best_entry->ttvn, best_entry->orig_node->orig,
last_ttvn, vlan->tt.crc,
((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
@@ -1867,7 +1867,7 @@ print_list:
if (!vlan) {
seq_printf(seq,
" + Cannot retrieve VLAN %d for originator %pM\n",
- BATADV_PRINT_VID(tt_common_entry->vid),
+ batadv_print_vid(tt_common_entry->vid),
orig_entry->orig_node->orig);
continue;
}
@@ -1876,7 +1876,7 @@ print_list:
seq_printf(seq,
" %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n",
'+', tt_global_entry->common.addr,
- BATADV_PRINT_VID(tt_global_entry->common.vid),
+ batadv_print_vid(tt_global_entry->common.vid),
orig_entry->ttvn, orig_entry->orig_node->orig,
last_ttvn, vlan->tt.crc,
((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'),
@@ -2213,7 +2213,7 @@ batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv,
"Deleting %pM from global tt entry %pM (vid: %d): %s\n",
orig_node->orig,
tt_global_entry->common.addr,
- BATADV_PRINT_VID(vid), message);
+ batadv_print_vid(vid), message);
_batadv_tt_global_del_orig_entry(tt_global_entry,
orig_entry);
}
@@ -2253,12 +2253,13 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
/* its the last one, mark for roaming. */
tt_global_entry->common.flags |= BATADV_TT_CLIENT_ROAM;
tt_global_entry->roam_at = jiffies;
- } else
+ } else {
/* there is another entry, we can simply delete this
* one and can still use the other one.
*/
batadv_tt_global_del_orig_node(bat_priv, tt_global_entry,
orig_node, message);
+ }
}
/**
@@ -2314,10 +2315,11 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
/* local entry exists, case 2: client roamed to us. */
batadv_tt_global_del_orig_list(tt_global_entry);
batadv_tt_global_free(bat_priv, tt_global_entry, message);
- } else
+ } else {
/* no local entry exists, case 1: check for roaming */
batadv_tt_global_del_roaming(bat_priv, tt_global_entry,
orig_node, message);
+ }
out:
if (tt_global_entry)
@@ -2375,7 +2377,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(vid), message);
+ batadv_print_vid(vid), message);
hlist_del_rcu(&tt_common_entry->hash_entry);
batadv_tt_global_entry_put(tt_global);
}
@@ -2435,7 +2437,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting global tt entry %pM (vid: %d): %s\n",
tt_global->common.addr,
- BATADV_PRINT_VID(tt_global->common.vid),
+ batadv_print_vid(tt_global->common.vid),
msg);
hlist_del_rcu(&tt_common->hash_entry);
@@ -3650,7 +3652,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n",
- orig_node->orig, client, BATADV_PRINT_VID(vid));
+ orig_node->orig, client, batadv_print_vid(vid));
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX);
@@ -3773,7 +3775,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Deleting local tt entry (%pM, vid: %d): pending\n",
tt_common->addr,
- BATADV_PRINT_VID(tt_common->vid));
+ batadv_print_vid(tt_common->vid));
batadv_tt_local_size_dec(bat_priv, tt_common->vid);
hlist_del_rcu(&tt_common->hash_entry);
@@ -4017,7 +4019,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n",
- addr, BATADV_PRINT_VID(vid), orig_node->orig);
+ addr, batadv_print_vid(vid), orig_node->orig);
ret = true;
out:
return ret;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 246f21b4973b..ea43a6449247 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1000,7 +1000,6 @@ struct batadv_priv_bat_v {
* struct batadv_priv - per mesh interface data
* @mesh_state: current status of the mesh (inactive/active/deactivating)
* @soft_iface: net device which holds this struct as private data
- * @stats: structure holding the data for the ndo_get_stats() call
* @bat_counters: mesh internal traffic statistic counters (see batadv_counters)
* @aggregated_ogms: bool indicating whether OGM aggregation is enabled
* @bonding: bool indicating whether traffic bonding is enabled
@@ -1055,7 +1054,6 @@ struct batadv_priv_bat_v {
struct batadv_priv {
atomic_t mesh_state;
struct net_device *soft_iface;
- struct net_device_stats stats;
u64 __percpu *bat_counters; /* Per cpu counters */
atomic_t aggregated_ogms;
atomic_t bonding;
@@ -1377,9 +1375,11 @@ struct batadv_nc_packet {
* relevant to batman-adv in the skb->cb buffer in skbs.
* @decoded: Marks a skb as decoded, which is checked when searching for coding
* opportunities in network-coding.c
+ * @num_bcasts: Counter for broadcast packet retransmissions
*/
struct batadv_skb_cb {
bool decoded;
+ unsigned int num_bcasts;
};
/**
@@ -1392,7 +1392,7 @@ struct batadv_skb_cb {
* @skb: bcast packet's skb buffer
* @packet_len: size of aggregated OGM packet inside the skb buffer
* @direct_link_flags: direct link flags for aggregated OGM packets
- * @num_packets: counter for bcast packet retransmission
+ * @num_packets: counter for aggregated OGMv1 packets
* @delayed_work: work queue callback item for packet sending
* @if_incoming: pointer to incoming hard-iface or primary iface if
* locally generated packet
diff --git a/net/bpf/Makefile b/net/bpf/Makefile
new file mode 100644
index 000000000000..27b2992a0692
--- /dev/null
+++ b/net/bpf/Makefile
@@ -0,0 +1 @@
+obj-y := test_run.o
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
new file mode 100644
index 000000000000..8a6d0a37c30c
--- /dev/null
+++ b/net/bpf/test_run.c
@@ -0,0 +1,172 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/etherdevice.h>
+#include <linux/filter.h>
+#include <linux/sched/signal.h>
+
+static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx)
+{
+ u32 ret;
+
+ preempt_disable();
+ rcu_read_lock();
+ ret = BPF_PROG_RUN(prog, ctx);
+ rcu_read_unlock();
+ preempt_enable();
+
+ return ret;
+}
+
+static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
+{
+ u64 time_start, time_spent = 0;
+ u32 ret = 0, i;
+
+ if (!repeat)
+ repeat = 1;
+ time_start = ktime_get_ns();
+ for (i = 0; i < repeat; i++) {
+ ret = bpf_test_run_one(prog, ctx);
+ if (need_resched()) {
+ if (signal_pending(current))
+ break;
+ time_spent += ktime_get_ns() - time_start;
+ cond_resched();
+ time_start = ktime_get_ns();
+ }
+ }
+ time_spent += ktime_get_ns() - time_start;
+ do_div(time_spent, repeat);
+ *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+
+ return ret;
+}
+
+static int bpf_test_finish(union bpf_attr __user *uattr, const void *data,
+ u32 size, u32 retval, u32 duration)
+{
+ void __user *data_out = u64_to_user_ptr(uattr->test.data_out);
+ int err = -EFAULT;
+
+ if (data_out && copy_to_user(data_out, data, size))
+ goto out;
+ if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
+ goto out;
+ if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
+ goto out;
+ if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
+ goto out;
+ err = 0;
+out:
+ return err;
+}
+
+static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
+ u32 headroom, u32 tailroom)
+{
+ void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+ void *data;
+
+ if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
+ return ERR_PTR(-EINVAL);
+
+ data = kzalloc(size + headroom + tailroom, GFP_USER);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(data + headroom, data_in, size)) {
+ kfree(data);
+ return ERR_PTR(-EFAULT);
+ }
+ return data;
+}
+
+int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ bool is_l2 = false, is_direct_pkt_access = false;
+ u32 size = kattr->test.data_size_in;
+ u32 repeat = kattr->test.repeat;
+ u32 retval, duration;
+ struct sk_buff *skb;
+ void *data;
+ int ret;
+
+ data = bpf_test_init(kattr, size, NET_SKB_PAD,
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ switch (prog->type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ is_l2 = true;
+ /* fall through */
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ is_direct_pkt_access = true;
+ break;
+ default:
+ break;
+ }
+
+ skb = build_skb(data, 0);
+ if (!skb) {
+ kfree(data);
+ return -ENOMEM;
+ }
+
+ skb_reserve(skb, NET_SKB_PAD);
+ __skb_put(skb, size);
+ skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
+ skb_reset_network_header(skb);
+
+ if (is_l2)
+ __skb_push(skb, ETH_HLEN);
+ if (is_direct_pkt_access)
+ bpf_compute_data_end(skb);
+ retval = bpf_test_run(prog, skb, repeat, &duration);
+ if (!is_l2)
+ __skb_push(skb, ETH_HLEN);
+ size = skb->len;
+ /* bpf program can never convert linear skb to non-linear */
+ if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
+ size = skb_headlen(skb);
+ ret = bpf_test_finish(uattr, skb->data, size, retval, duration);
+ kfree_skb(skb);
+ return ret;
+}
+
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ u32 size = kattr->test.data_size_in;
+ u32 repeat = kattr->test.repeat;
+ struct xdp_buff xdp = {};
+ u32 retval, duration;
+ void *data;
+ int ret;
+
+ data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM, 0);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ xdp.data_hard_start = data;
+ xdp.data = data + XDP_PACKET_HEADROOM;
+ xdp.data_end = xdp.data + size;
+
+ retval = bpf_test_run(prog, &xdp, repeat, &duration);
+ if (xdp.data != data + XDP_PACKET_HEADROOM)
+ size = xdp.data_end - xdp.data;
+ ret = bpf_test_finish(uattr, xdp.data, size, retval, duration);
+ kfree(data);
+ return ret;
+}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6e08b7199dd7..5a40a87c4f4f 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -594,6 +594,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
fdb->updated = now;
if (unlikely(added_by_user))
fdb->added_by_user = 1;
+ /* Take over HW learned entry */
+ if (unlikely(fdb->added_by_external_learn))
+ fdb->added_by_external_learn = 0;
if (unlikely(fdb_modified))
fdb_notify(br, fdb, RTM_NEWNEIGH);
}
@@ -854,6 +857,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
br_fdb_update(br, p, addr, vid, true);
rcu_read_unlock();
local_bh_enable();
+ } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
+ err = br_fdb_external_learn_add(br, p, addr, vid);
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm->ndm_state,
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 8ac1770aa222..6eb52d422dd9 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -22,6 +22,7 @@
#include <linux/rtnetlink.h>
#include <linux/if_ether.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include <net/sock.h>
#include <linux/if_vlan.h>
#include <net/switchdev.h>
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 5488e4a6ccd0..abf7d854a94d 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -75,9 +75,7 @@ static int stats_timer __read_mostly = 1;
module_param(stats_timer, int, S_IRUGO);
MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
-/* receive filters subscribed for 'all' CAN devices */
-struct dev_rcv_lists can_rx_alldev_list;
-static DEFINE_SPINLOCK(can_rcvlists_lock);
+static int can_net_id;
static struct kmem_cache *rcv_cache __read_mostly;
@@ -145,9 +143,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
if (protocol < 0 || protocol >= CAN_NPROTO)
return -EINVAL;
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
cp = can_get_proto(protocol);
#ifdef CONFIG_MODULES
@@ -331,10 +326,11 @@ EXPORT_SYMBOL(can_send);
* af_can rx path
*/
-static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev)
+static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net,
+ struct net_device *dev)
{
if (!dev)
- return &can_rx_alldev_list;
+ return net->can.can_rx_alldev_list;
else
return (struct dev_rcv_lists *)dev->ml_priv;
}
@@ -467,9 +463,9 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
* -ENOMEM on missing cache mem to create subscription entry
* -ENODEV unknown device
*/
-int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
- void (*func)(struct sk_buff *, void *), void *data,
- char *ident, struct sock *sk)
+int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
+ canid_t mask, void (*func)(struct sk_buff *, void *),
+ void *data, char *ident, struct sock *sk)
{
struct receiver *r;
struct hlist_head *rl;
@@ -481,13 +477,16 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
if (dev && dev->type != ARPHRD_CAN)
return -ENODEV;
+ if (dev && !net_eq(net, dev_net(dev)))
+ return -ENODEV;
+
r = kmem_cache_alloc(rcv_cache, GFP_KERNEL);
if (!r)
return -ENOMEM;
- spin_lock(&can_rcvlists_lock);
+ spin_lock(&net->can.can_rcvlists_lock);
- d = find_dev_rcv_lists(dev);
+ d = find_dev_rcv_lists(net, dev);
if (d) {
rl = find_rcv_list(&can_id, &mask, d);
@@ -510,7 +509,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
err = -ENODEV;
}
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&net->can.can_rcvlists_lock);
return err;
}
@@ -540,8 +539,9 @@ static void can_rx_delete_receiver(struct rcu_head *rp)
* Description:
* Removes subscription entry depending on given (subscription) values.
*/
-void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
- void (*func)(struct sk_buff *, void *), void *data)
+void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
+ canid_t mask, void (*func)(struct sk_buff *, void *),
+ void *data)
{
struct receiver *r = NULL;
struct hlist_head *rl;
@@ -550,9 +550,12 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
if (dev && dev->type != ARPHRD_CAN)
return;
- spin_lock(&can_rcvlists_lock);
+ if (dev && !net_eq(net, dev_net(dev)))
+ return;
- d = find_dev_rcv_lists(dev);
+ spin_lock(&net->can.can_rcvlists_lock);
+
+ d = find_dev_rcv_lists(net, dev);
if (!d) {
pr_err("BUG: receive list not found for "
"dev %s, id %03X, mask %03X\n",
@@ -598,7 +601,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
}
out:
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&net->can.can_rcvlists_lock);
/* schedule the receiver item for deletion */
if (r) {
@@ -696,10 +699,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
/* deliver the packet to sockets listening on all devices */
- matches = can_rcv_filter(&can_rx_alldev_list, skb);
+ matches = can_rcv_filter(dev_net(dev)->can.can_rx_alldev_list, skb);
/* find receive list for this device */
- d = find_dev_rcv_lists(dev);
+ d = find_dev_rcv_lists(dev_net(dev), dev);
if (d)
matches += can_rcv_filter(d, skb);
@@ -719,9 +722,6 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (unlikely(!net_eq(dev_net(dev), &init_net)))
- goto drop;
-
if (WARN_ONCE(dev->type != ARPHRD_CAN ||
skb->len != CAN_MTU ||
cfd->len > CAN_MAX_DLEN,
@@ -743,9 +743,6 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (unlikely(!net_eq(dev_net(dev), &init_net)))
- goto drop;
-
if (WARN_ONCE(dev->type != ARPHRD_CAN ||
skb->len != CANFD_MTU ||
cfd->len > CANFD_MAX_DLEN,
@@ -835,9 +832,6 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct dev_rcv_lists *d;
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
if (dev->type != ARPHRD_CAN)
return NOTIFY_DONE;
@@ -855,7 +849,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
break;
case NETDEV_UNREGISTER:
- spin_lock(&can_rcvlists_lock);
+ spin_lock(&dev_net(dev)->can.can_rcvlists_lock);
d = dev->ml_priv;
if (d) {
@@ -869,7 +863,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
pr_err("can: notifier: receive list not found for dev "
"%s\n", dev->name);
- spin_unlock(&can_rcvlists_lock);
+ spin_unlock(&dev_net(dev)->can.can_rcvlists_lock);
break;
}
@@ -877,6 +871,40 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
return NOTIFY_DONE;
}
+static int can_pernet_init(struct net *net)
+{
+ net->can.can_rcvlists_lock =
+ __SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock);
+ net->can.can_rx_alldev_list =
+ kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
+
+ if (IS_ENABLED(CONFIG_PROC_FS))
+ can_init_proc(net);
+
+ return 0;
+}
+
+static void can_pernet_exit(struct net *net)
+{
+ struct net_device *dev;
+
+ if (IS_ENABLED(CONFIG_PROC_FS))
+ can_remove_proc(net);
+
+ /* remove created dev_rcv_lists from still registered CAN devices */
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
+ if (dev->type == ARPHRD_CAN && dev->ml_priv) {
+ struct dev_rcv_lists *d = dev->ml_priv;
+
+ BUG_ON(d->entries);
+ kfree(d);
+ dev->ml_priv = NULL;
+ }
+ }
+ rcu_read_unlock();
+}
+
/*
* af_can module init/exit functions
*/
@@ -902,6 +930,13 @@ static struct notifier_block can_netdev_notifier __read_mostly = {
.notifier_call = can_notifier,
};
+static struct pernet_operations can_pernet_ops __read_mostly = {
+ .init = can_pernet_init,
+ .exit = can_pernet_exit,
+ .id = &can_net_id,
+ .size = 0,
+};
+
static __init int can_init(void)
{
/* check for correct padding to be able to use the structs similarly */
@@ -912,8 +947,6 @@ static __init int can_init(void)
pr_info("can: controller area network core (" CAN_VERSION_STRING ")\n");
- memset(&can_rx_alldev_list, 0, sizeof(can_rx_alldev_list));
-
rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver),
0, 0, NULL);
if (!rcv_cache)
@@ -925,9 +958,10 @@ static __init int can_init(void)
setup_timer(&can_stattimer, can_stat_update, 0);
mod_timer(&can_stattimer, round_jiffies(jiffies + HZ));
}
- can_init_proc();
}
+ register_pernet_subsys(&can_pernet_ops);
+
/* protocol register */
sock_register(&can_family_ops);
register_netdevice_notifier(&can_netdev_notifier);
@@ -939,13 +973,9 @@ static __init int can_init(void)
static __exit void can_exit(void)
{
- struct net_device *dev;
-
if (IS_ENABLED(CONFIG_PROC_FS)) {
if (stats_timer)
del_timer_sync(&can_stattimer);
-
- can_remove_proc();
}
/* protocol unregister */
@@ -954,19 +984,7 @@ static __exit void can_exit(void)
unregister_netdevice_notifier(&can_netdev_notifier);
sock_unregister(PF_CAN);
- /* remove created dev_rcv_lists from still registered CAN devices */
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
-
- struct dev_rcv_lists *d = dev->ml_priv;
-
- BUG_ON(d->entries);
- kfree(d);
- dev->ml_priv = NULL;
- }
- }
- rcu_read_unlock();
+ unregister_pernet_subsys(&can_pernet_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/can/af_can.h b/net/can/af_can.h
index b86f5129e838..f273c9d9b129 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -114,8 +114,8 @@ struct s_pstats {
extern struct dev_rcv_lists can_rx_alldev_list;
/* function prototypes for the CAN networklayer procfs (proc.c) */
-void can_init_proc(void);
-void can_remove_proc(void);
+void can_init_proc(struct net *net);
+void can_remove_proc(struct net *net);
void can_stat_update(unsigned long data);
/* structures and variables from af_can.c needed in proc.c for reading */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 95d13b233c65..1976629a8463 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -764,8 +764,8 @@ static void bcm_remove_op(struct bcm_op *op)
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
- can_rx_unregister(dev, op->can_id, REGMASK(op->can_id),
- bcm_rx_handler, op);
+ can_rx_unregister(&init_net, dev, op->can_id,
+ REGMASK(op->can_id), bcm_rx_handler, op);
/* mark as removed subscription */
op->rx_reg_dev = NULL;
@@ -808,7 +808,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
}
}
} else
- can_rx_unregister(NULL, op->can_id,
+ can_rx_unregister(&init_net, NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op);
@@ -1222,7 +1222,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
dev = dev_get_by_index(&init_net, ifindex);
if (dev) {
- err = can_rx_register(dev, op->can_id,
+ err = can_rx_register(&init_net, dev,
+ op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op,
"bcm", sk);
@@ -1232,7 +1233,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
}
} else
- err = can_rx_register(NULL, op->can_id,
+ err = can_rx_register(&init_net, NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op, "bcm", sk);
if (err) {
@@ -1528,7 +1529,7 @@ static int bcm_release(struct socket *sock)
}
}
} else
- can_rx_unregister(NULL, op->can_id,
+ can_rx_unregister(&init_net, NULL, op->can_id,
REGMASK(op->can_id),
bcm_rx_handler, op);
diff --git a/net/can/gw.c b/net/can/gw.c
index 7056a1a2bb70..3c117a33e15f 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -440,14 +440,14 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
static inline int cgw_register_filter(struct cgw_job *gwj)
{
- return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
+ return can_rx_register(&init_net, gwj->src.dev, gwj->ccgw.filter.can_id,
gwj->ccgw.filter.can_mask, can_can_gw_rcv,
gwj, "gw", NULL);
}
static inline void cgw_unregister_filter(struct cgw_job *gwj)
{
- can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id,
+ can_rx_unregister(&init_net, gwj->src.dev, gwj->ccgw.filter.can_id,
gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj);
}
diff --git a/net/can/proc.c b/net/can/proc.c
index 85ef7bb0f176..9a8d54d57b22 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -62,17 +62,6 @@
#define CAN_PROC_RCVLIST_EFF "rcvlist_eff"
#define CAN_PROC_RCVLIST_ERR "rcvlist_err"
-static struct proc_dir_entry *can_dir;
-static struct proc_dir_entry *pde_version;
-static struct proc_dir_entry *pde_stats;
-static struct proc_dir_entry *pde_reset_stats;
-static struct proc_dir_entry *pde_rcvlist_all;
-static struct proc_dir_entry *pde_rcvlist_fil;
-static struct proc_dir_entry *pde_rcvlist_inv;
-static struct proc_dir_entry *pde_rcvlist_sff;
-static struct proc_dir_entry *pde_rcvlist_eff;
-static struct proc_dir_entry *pde_rcvlist_err;
-
static int user_reset;
static const char rx_list_name[][8] = {
@@ -351,20 +340,21 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
static int can_rcvlist_proc_show(struct seq_file *m, void *v)
{
/* double cast to prevent GCC warning */
- int idx = (int)(long)m->private;
+ int idx = (int)(long)PDE_DATA(m->file->f_inode);
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]);
rcu_read_lock();
/* receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_one(m, idx, NULL, d);
/* receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv)
can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv);
}
@@ -377,7 +367,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode));
+ return single_open_net(inode, file, can_rcvlist_proc_show);
}
static const struct file_operations can_rcvlist_proc_fops = {
@@ -417,6 +407,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
/* RX_SFF */
seq_puts(m, "\nreceive list 'rx_sff':\n");
@@ -424,11 +415,11 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* sff receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
/* sff receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
d = dev->ml_priv;
can_rcvlist_proc_show_array(m, dev, d->rx_sff,
@@ -444,7 +435,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_sff_proc_show, NULL);
+ return single_open_net(inode, file, can_rcvlist_sff_proc_show);
}
static const struct file_operations can_rcvlist_sff_proc_fops = {
@@ -460,6 +451,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
{
struct net_device *dev;
struct dev_rcv_lists *d;
+ struct net *net = m->private;
/* RX_EFF */
seq_puts(m, "\nreceive list 'rx_eff':\n");
@@ -467,11 +459,11 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
rcu_read_lock();
/* eff receive list for 'all' CAN devices (dev == NULL) */
- d = &can_rx_alldev_list;
+ d = net->can.can_rx_alldev_list;
can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
/* eff receive list for registered CAN devices */
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_CAN && dev->ml_priv) {
d = dev->ml_priv;
can_rcvlist_proc_show_array(m, dev, d->rx_eff,
@@ -487,7 +479,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, can_rcvlist_eff_proc_show, NULL);
+ return single_open_net(inode, file, can_rcvlist_eff_proc_show);
}
static const struct file_operations can_rcvlist_eff_proc_fops = {
@@ -499,81 +491,85 @@ static const struct file_operations can_rcvlist_eff_proc_fops = {
};
/*
- * proc utility functions
- */
-
-static void can_remove_proc_readentry(const char *name)
-{
- if (can_dir)
- remove_proc_entry(name, can_dir);
-}
-
-/*
* can_init_proc - create main CAN proc directory and procfs entries
*/
-void can_init_proc(void)
+void can_init_proc(struct net *net)
{
/* create /proc/net/can directory */
- can_dir = proc_mkdir("can", init_net.proc_net);
+ net->can.proc_dir = proc_net_mkdir(net, "can", net->proc_net);
- if (!can_dir) {
- pr_info("can: failed to create /proc/net/can.\n");
+ if (!net->can.proc_dir) {
+ printk(KERN_INFO "can: failed to create /proc/net/can . "
+ "CONFIG_PROC_FS missing?\n");
return;
}
/* own procfs entries from the AF_CAN core */
- pde_version = proc_create(CAN_PROC_VERSION, 0644, can_dir,
- &can_version_proc_fops);
- pde_stats = proc_create(CAN_PROC_STATS, 0644, can_dir,
- &can_stats_proc_fops);
- pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, can_dir,
- &can_reset_stats_proc_fops);
- pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_ERR);
- pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_ALL);
- pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_FIL);
- pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir,
- &can_rcvlist_proc_fops, (void *)RX_INV);
- pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir,
- &can_rcvlist_eff_proc_fops);
- pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir,
- &can_rcvlist_sff_proc_fops);
+ net->can.pde_version = proc_create(CAN_PROC_VERSION, 0644,
+ net->can.proc_dir,
+ &can_version_proc_fops);
+ net->can.pde_stats = proc_create(CAN_PROC_STATS, 0644,
+ net->can.proc_dir,
+ &can_stats_proc_fops);
+ net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644,
+ net->can.proc_dir,
+ &can_reset_stats_proc_fops);
+ net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_ERR);
+ net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_ALL);
+ net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_FIL);
+ net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_proc_fops,
+ (void *)RX_INV);
+ net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_eff_proc_fops);
+ net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644,
+ net->can.proc_dir,
+ &can_rcvlist_sff_proc_fops);
}
/*
* can_remove_proc - remove procfs entries and main CAN proc directory
*/
-void can_remove_proc(void)
+void can_remove_proc(struct net *net)
{
- if (pde_version)
- can_remove_proc_readentry(CAN_PROC_VERSION);
+ if (net->can.pde_version)
+ remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir);
- if (pde_stats)
- can_remove_proc_readentry(CAN_PROC_STATS);
+ if (net->can.pde_stats)
+ remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir);
- if (pde_reset_stats)
- can_remove_proc_readentry(CAN_PROC_RESET_STATS);
+ if (net->can.pde_reset_stats)
+ remove_proc_entry(CAN_PROC_RESET_STATS, net->can.proc_dir);
- if (pde_rcvlist_err)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR);
+ if (net->can.pde_rcvlist_err)
+ remove_proc_entry(CAN_PROC_RCVLIST_ERR, net->can.proc_dir);
- if (pde_rcvlist_all)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL);
+ if (net->can.pde_rcvlist_all)
+ remove_proc_entry(CAN_PROC_RCVLIST_ALL, net->can.proc_dir);
- if (pde_rcvlist_fil)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL);
+ if (net->can.pde_rcvlist_fil)
+ remove_proc_entry(CAN_PROC_RCVLIST_FIL, net->can.proc_dir);
- if (pde_rcvlist_inv)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_INV);
+ if (net->can.pde_rcvlist_inv)
+ remove_proc_entry(CAN_PROC_RCVLIST_INV, net->can.proc_dir);
- if (pde_rcvlist_eff)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF);
+ if (net->can.pde_rcvlist_eff)
+ remove_proc_entry(CAN_PROC_RCVLIST_EFF, net->can.proc_dir);
- if (pde_rcvlist_sff)
- can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF);
+ if (net->can.pde_rcvlist_sff)
+ remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir);
- if (can_dir)
- remove_proc_entry("can", init_net.proc_net);
+ if (net->can.proc_dir)
+ remove_proc_entry("can", net->proc_net);
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 6dc546a06673..864c80dbdb72 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -181,20 +181,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
kfree_skb(skb);
}
-static int raw_enable_filters(struct net_device *dev, struct sock *sk,
- struct can_filter *filter, int count)
+static int raw_enable_filters(struct net *net, struct net_device *dev,
+ struct sock *sk, struct can_filter *filter,
+ int count)
{
int err = 0;
int i;
for (i = 0; i < count; i++) {
- err = can_rx_register(dev, filter[i].can_id,
+ err = can_rx_register(net, dev, filter[i].can_id,
filter[i].can_mask,
raw_rcv, sk, "raw", sk);
if (err) {
/* clean up successfully registered filters */
while (--i >= 0)
- can_rx_unregister(dev, filter[i].can_id,
+ can_rx_unregister(net, dev, filter[i].can_id,
filter[i].can_mask,
raw_rcv, sk);
break;
@@ -204,57 +205,62 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk,
return err;
}
-static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
- can_err_mask_t err_mask)
+static int raw_enable_errfilter(struct net *net, struct net_device *dev,
+ struct sock *sk, can_err_mask_t err_mask)
{
int err = 0;
if (err_mask)
- err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
+ err = can_rx_register(net, dev, 0, err_mask | CAN_ERR_FLAG,
raw_rcv, sk, "raw", sk);
return err;
}
-static void raw_disable_filters(struct net_device *dev, struct sock *sk,
- struct can_filter *filter, int count)
+static void raw_disable_filters(struct net *net, struct net_device *dev,
+ struct sock *sk, struct can_filter *filter,
+ int count)
{
int i;
for (i = 0; i < count; i++)
- can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask,
- raw_rcv, sk);
+ can_rx_unregister(net, dev, filter[i].can_id,
+ filter[i].can_mask, raw_rcv, sk);
}
-static inline void raw_disable_errfilter(struct net_device *dev,
+static inline void raw_disable_errfilter(struct net *net,
+ struct net_device *dev,
struct sock *sk,
can_err_mask_t err_mask)
{
if (err_mask)
- can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG,
+ can_rx_unregister(net, dev, 0, err_mask | CAN_ERR_FLAG,
raw_rcv, sk);
}
-static inline void raw_disable_allfilters(struct net_device *dev,
+static inline void raw_disable_allfilters(struct net *net,
+ struct net_device *dev,
struct sock *sk)
{
struct raw_sock *ro = raw_sk(sk);
- raw_disable_filters(dev, sk, ro->filter, ro->count);
- raw_disable_errfilter(dev, sk, ro->err_mask);
+ raw_disable_filters(net, dev, sk, ro->filter, ro->count);
+ raw_disable_errfilter(net, dev, sk, ro->err_mask);
}
-static int raw_enable_allfilters(struct net_device *dev, struct sock *sk)
+static int raw_enable_allfilters(struct net *net, struct net_device *dev,
+ struct sock *sk)
{
struct raw_sock *ro = raw_sk(sk);
int err;
- err = raw_enable_filters(dev, sk, ro->filter, ro->count);
+ err = raw_enable_filters(net, dev, sk, ro->filter, ro->count);
if (!err) {
- err = raw_enable_errfilter(dev, sk, ro->err_mask);
+ err = raw_enable_errfilter(net, dev, sk, ro->err_mask);
if (err)
- raw_disable_filters(dev, sk, ro->filter, ro->count);
+ raw_disable_filters(net, dev, sk, ro->filter,
+ ro->count);
}
return err;
@@ -267,7 +273,7 @@ static int raw_notifier(struct notifier_block *nb,
struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
struct sock *sk = &ro->sk;
- if (!net_eq(dev_net(dev), &init_net))
+ if (!net_eq(dev_net(dev), sock_net(sk)))
return NOTIFY_DONE;
if (dev->type != ARPHRD_CAN)
@@ -282,7 +288,7 @@ static int raw_notifier(struct notifier_block *nb,
lock_sock(sk);
/* remove current filters & unregister */
if (ro->bound)
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev), dev, sk);
if (ro->count > 1)
kfree(ro->filter);
@@ -358,13 +364,13 @@ static int raw_release(struct socket *sock)
if (ro->ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
if (dev) {
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev), dev, sk);
dev_put(dev);
}
} else
- raw_disable_allfilters(NULL, sk);
+ raw_disable_allfilters(sock_net(sk), NULL, sk);
}
if (ro->count > 1)
@@ -404,7 +410,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (addr->can_ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, addr->can_ifindex);
+ dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
if (!dev) {
err = -ENODEV;
goto out;
@@ -420,13 +426,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ifindex = dev->ifindex;
/* filters set by default/setsockopt */
- err = raw_enable_allfilters(dev, sk);
+ err = raw_enable_allfilters(sock_net(sk), dev, sk);
dev_put(dev);
} else {
ifindex = 0;
/* filters set by default/setsockopt */
- err = raw_enable_allfilters(NULL, sk);
+ err = raw_enable_allfilters(sock_net(sk), NULL, sk);
}
if (!err) {
@@ -435,13 +441,15 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (ro->ifindex) {
struct net_device *dev;
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk),
+ ro->ifindex);
if (dev) {
- raw_disable_allfilters(dev, sk);
+ raw_disable_allfilters(dev_net(dev),
+ dev, sk);
dev_put(dev);
}
} else
- raw_disable_allfilters(NULL, sk);
+ raw_disable_allfilters(sock_net(sk), NULL, sk);
}
ro->ifindex = ifindex;
ro->bound = 1;
@@ -517,15 +525,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (ro->bound && ro->ifindex)
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
if (ro->bound) {
/* (try to) register the new filters */
if (count == 1)
- err = raw_enable_filters(dev, sk, &sfilter, 1);
+ err = raw_enable_filters(sock_net(sk), dev, sk,
+ &sfilter, 1);
else
- err = raw_enable_filters(dev, sk, filter,
- count);
+ err = raw_enable_filters(sock_net(sk), dev, sk,
+ filter, count);
if (err) {
if (count > 1)
kfree(filter);
@@ -533,7 +542,8 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
}
/* remove old filter registrations */
- raw_disable_filters(dev, sk, ro->filter, ro->count);
+ raw_disable_filters(sock_net(sk), dev, sk, ro->filter,
+ ro->count);
}
/* remove old filter space */
@@ -569,18 +579,20 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
if (ro->bound && ro->ifindex)
- dev = dev_get_by_index(&init_net, ro->ifindex);
+ dev = dev_get_by_index(sock_net(sk), ro->ifindex);
/* remove current error mask */
if (ro->bound) {
/* (try to) register the new err_mask */
- err = raw_enable_errfilter(dev, sk, err_mask);
+ err = raw_enable_errfilter(sock_net(sk), dev, sk,
+ err_mask);
if (err)
goto out_err;
/* remove old err_mask registration */
- raw_disable_errfilter(dev, sk, ro->err_mask);
+ raw_disable_errfilter(sock_net(sk), dev, sk,
+ ro->err_mask);
}
/* link new err_mask to the socket */
@@ -741,7 +753,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return -EINVAL;
}
- dev = dev_get_by_index(&init_net, ifindex);
+ dev = dev_get_by_index(sock_net(sk), ifindex);
if (!dev)
return -ENXIO;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 38dcf1eb427d..f76bb3332613 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
#include <linux/kthread.h>
#include <linux/net.h>
#include <linux/nsproxy.h>
+#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/string.h>
@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
{
struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
+ unsigned int noio_flag;
int ret;
BUG_ON(con->sock);
+
+ /* sock_create_kern() allocates with GFP_KERNEL */
+ noio_flag = memalloc_noio_save();
ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
+ memalloc_noio_restore(noio_flag);
if (ret)
return ret;
sock->sk->sk_allocation = GFP_NOFS;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ea633342ab0d..4608aa245410 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
}
spin_unlock_irqrestore(&queue->lock, cpu_flags);
- } while (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, flags & MSG_DONTWAIT));
+
+ if (!sk_can_busy_loop(sk))
+ break;
+
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+ } while (!skb_queue_empty(&sk->sk_receive_queue));
error = -EAGAIN;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7869ae3837ca..ef9fe60ee294 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5060,27 +5060,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
do_softirq();
}
-bool sk_busy_loop(struct sock *sk, int nonblock)
+void napi_busy_loop(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg)
{
- unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+ unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
int (*napi_poll)(struct napi_struct *napi, int budget);
void *have_poll_lock = NULL;
struct napi_struct *napi;
- int rc;
restart:
- rc = false;
napi_poll = NULL;
rcu_read_lock();
- napi = napi_by_id(sk->sk_napi_id);
+ napi = napi_by_id(napi_id);
if (!napi)
goto out;
preempt_disable();
for (;;) {
- rc = 0;
+ int work = 0;
+
local_bh_disable();
if (!napi_poll) {
unsigned long val = READ_ONCE(napi->state);
@@ -5098,16 +5099,15 @@ restart:
have_poll_lock = netpoll_poll_lock(napi);
napi_poll = napi->poll;
}
- rc = napi_poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+ work = napi_poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
count:
- if (rc > 0)
- __NET_ADD_STATS(sock_net(sk),
- LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ if (work > 0)
+ __NET_ADD_STATS(dev_net(napi->dev),
+ LINUX_MIB_BUSYPOLLRXPACKETS, work);
local_bh_enable();
- if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
- busy_loop_timeout(end_time))
+ if (!loop_end || loop_end(loop_end_arg, start_time))
break;
if (unlikely(need_resched())) {
@@ -5116,9 +5116,8 @@ count:
preempt_enable();
rcu_read_unlock();
cond_resched();
- rc = !skb_queue_empty(&sk->sk_receive_queue);
- if (rc || busy_loop_timeout(end_time))
- return rc;
+ if (loop_end(loop_end_arg, start_time))
+ return;
goto restart;
}
cpu_relax();
@@ -5126,12 +5125,10 @@ count:
if (napi_poll)
busy_poll_stop(napi, have_poll_lock);
preempt_enable();
- rc = !skb_queue_empty(&sk->sk_receive_queue);
out:
rcu_read_unlock();
- return rc;
}
-EXPORT_SYMBOL(sk_busy_loop);
+EXPORT_SYMBOL(napi_busy_loop);
#endif /* CONFIG_NET_RX_BUSY_POLL */
@@ -5143,10 +5140,10 @@ static void napi_hash_add(struct napi_struct *napi)
spin_lock(&napi_hash_lock);
- /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
+ /* 0..NR_CPUS range is reserved for sender_cpu use */
do {
- if (unlikely(++napi_gen_id < NR_CPUS + 1))
- napi_gen_id = NR_CPUS + 1;
+ if (unlikely(++napi_gen_id < MIN_NAPI_ID))
+ napi_gen_id = MIN_NAPI_ID;
} while (napi_by_id(napi_gen_id));
napi->napi_id = napi_gen_id;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e9c1e6acfb6d..24b766003a61 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1493,8 +1493,686 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
if (err)
return err;
}
+ return 0;
+}
+
+int devlink_dpipe_match_put(struct sk_buff *skb,
+ struct devlink_dpipe_match *match)
+{
+ struct devlink_dpipe_header *header = match->header;
+ struct devlink_dpipe_field *field = &header->fields[match->field_id];
+ struct nlattr *match_attr;
+
+ match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH);
+ if (!match_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, match_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, match_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_match_put);
+
+static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *matches_attr;
+
+ matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES);
+ if (!matches_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->matches_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, matches_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, matches_attr);
+ return -EMSGSIZE;
+}
+
+int devlink_dpipe_action_put(struct sk_buff *skb,
+ struct devlink_dpipe_action *action)
+{
+ struct devlink_dpipe_header *header = action->header;
+ struct devlink_dpipe_field *field = &header->fields[action->field_id];
+ struct nlattr *action_attr;
+
+ action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION);
+ if (!action_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, action_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, action_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_action_put);
+
+static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table,
+ struct sk_buff *skb)
+{
+ struct nlattr *actions_attr;
+
+ actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS);
+ if (!actions_attr)
+ return -EMSGSIZE;
+
+ if (table->table_ops->actions_dump(table->priv, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, actions_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, actions_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_table_put(struct sk_buff *skb,
+ struct devlink_dpipe_table *table)
+{
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
+ nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
+ table->counters_enabled))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_matches_put(table, skb))
+ goto nla_put_failure;
+
+ if (devlink_dpipe_actions_put(table, skb))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, table_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, table_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb,
+ struct genl_info *info)
+{
+ int err;
+
+ if (*pskb) {
+ err = genlmsg_reply(*pskb, info);
+ if (err)
+ return err;
+ }
+ *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!*pskb)
+ return -ENOMEM;
+ return 0;
+}
+
+static int devlink_dpipe_tables_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ struct nlattr *tables_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ bool incomplete;
+ void *hdr;
+ int i;
+ int err;
+
+ table = list_first_entry(dpipe_tables,
+ struct devlink_dpipe_table, list);
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES);
+ if (!tables_attr)
+ goto nla_put_failure;
+
+ i = 0;
+ incomplete = false;
+ list_for_each_entry_from(table, dpipe_tables, list) {
+ if (!table_name) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err) {
+ if (!i)
+ goto err_table_put;
+ incomplete = true;
+ break;
+ }
+ } else {
+ if (!strcmp(table->name, table_name)) {
+ err = devlink_dpipe_table_put(skb, table);
+ if (err)
+ break;
+ }
+ }
+ i++;
+ }
+
+ nla_nest_end(skb, tables_attr);
+ genlmsg_end(skb, hdr);
+ if (incomplete)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name = NULL;
+
+ if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+
+ return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0,
+ &devlink->dpipe_table_list,
+ table_name);
+}
+
+static int devlink_dpipe_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE,
+ value->value_size, value->value))
+ return -EMSGSIZE;
+ if (value->mask)
+ if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK,
+ value->value_size, value->mask))
+ return -EMSGSIZE;
+ if (value->mapping_valid)
+ if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING,
+ value->mapping_value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->action)
+ return -EINVAL;
+ if (devlink_dpipe_action_put(skb, value->action))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_action_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *action_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ action_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ACTION_VALUE);
+ if (!action_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_action_value_put(skb, &values[i]);
+ if (err)
+ goto err_action_value_put;
+ nla_nest_end(skb, action_attr);
+ }
+ return 0;
+
+err_action_value_put:
+ nla_nest_cancel(skb, action_attr);
+ return err;
+}
+
+static int devlink_dpipe_match_value_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *value)
+{
+ if (!value->match)
+ return -EINVAL;
+ if (devlink_dpipe_match_put(skb, value->match))
+ return -EMSGSIZE;
+ if (devlink_dpipe_value_put(skb, value))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int devlink_dpipe_match_values_put(struct sk_buff *skb,
+ struct devlink_dpipe_value *values,
+ unsigned int values_count)
+{
+ struct nlattr *match_attr;
+ int i;
+ int err;
+
+ for (i = 0; i < values_count; i++) {
+ match_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_MATCH_VALUE);
+ if (!match_attr)
+ return -EMSGSIZE;
+ err = devlink_dpipe_match_value_put(skb, &values[i]);
+ if (err)
+ goto err_match_value_put;
+ nla_nest_end(skb, match_attr);
+ }
+ return 0;
+
+err_match_value_put:
+ nla_nest_cancel(skb, match_attr);
+ return err;
+}
+
+static int devlink_dpipe_entry_put(struct sk_buff *skb,
+ struct devlink_dpipe_entry *entry)
+{
+ struct nlattr *entry_attr, *matches_attr, *actions_attr;
+ int err;
+
+ entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (entry->counter_valid)
+ if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER,
+ entry->counter, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ matches_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES);
+ if (!matches_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_match_values_put(skb, entry->match_values,
+ entry->match_values_count);
+ if (err) {
+ nla_nest_cancel(skb, matches_attr);
+ goto err_match_values_put;
+ }
+ nla_nest_end(skb, matches_attr);
+
+ actions_attr = nla_nest_start(skb,
+ DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES);
+ if (!actions_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_action_values_put(skb, entry->action_values,
+ entry->action_values_count);
+ if (err) {
+ nla_nest_cancel(skb, actions_attr);
+ goto err_action_values_put;
+ }
+ nla_nest_end(skb, actions_attr);
+ nla_nest_end(skb, entry_attr);
return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_match_values_put:
+err_action_values_put:
+ nla_nest_cancel(skb, entry_attr);
+ return err;
+}
+
+static struct devlink_dpipe_table *
+devlink_dpipe_table_find(struct list_head *dpipe_tables,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ list_for_each_entry_rcu(table, dpipe_tables, list) {
+ if (!strcmp(table->name, table_name))
+ return table;
+ }
+ return NULL;
+}
+
+int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink *devlink;
+ int err;
+
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb,
+ dump_ctx->info);
+ if (err)
+ return err;
+
+ dump_ctx->hdr = genlmsg_put(dump_ctx->skb,
+ dump_ctx->info->snd_portid,
+ dump_ctx->info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI,
+ dump_ctx->cmd);
+ if (!dump_ctx->hdr)
+ goto nla_put_failure;
+
+ devlink = dump_ctx->info->user_ptr[0];
+ if (devlink_nl_put_handle(dump_ctx->skb, devlink))
+ goto nla_put_failure;
+ dump_ctx->nest = nla_nest_start(dump_ctx->skb,
+ DEVLINK_ATTR_DPIPE_ENTRIES);
+ if (!dump_ctx->nest)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr);
+ nlmsg_free(dump_ctx->skb);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare);
+
+int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
+ struct devlink_dpipe_entry *entry)
+{
+ return devlink_dpipe_entry_put(dump_ctx->skb, entry);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append);
+
+int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ nla_nest_end(dump_ctx->skb, dump_ctx->nest);
+ genlmsg_end(dump_ctx->skb, dump_ctx->hdr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
+
+static int devlink_dpipe_entries_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_table *table)
+{
+ struct devlink_dpipe_dump_ctx dump_ctx;
+ struct nlmsghdr *nlh;
+ int err;
+
+ dump_ctx.skb = NULL;
+ dump_ctx.cmd = cmd;
+ dump_ctx.info = info;
+
+ err = table->table_ops->entries_dump(table->priv,
+ table->counters_enabled,
+ &dump_ctx);
+ if (err)
+ goto err_entries_dump;
+
+send_done:
+ nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(dump_ctx.skb, info);
+
+err_entries_dump:
+err_skb_send_alloc:
+ genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr);
+ nlmsg_free(dump_ctx.skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_dpipe_table *table;
+ const char *table_name;
+
+ if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME])
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ return -EINVAL;
+
+ if (!table->table_ops->entries_dump)
+ return -EINVAL;
+
+ return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ 0, table);
+}
+
+static int devlink_dpipe_fields_put(struct sk_buff *skb,
+ const struct devlink_dpipe_header *header)
+{
+ struct devlink_dpipe_field *field;
+ struct nlattr *field_attr;
+ int i;
+
+ for (i = 0; i < header->fields_count; i++) {
+ field = &header->fields[i];
+ field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD);
+ if (!field_attr)
+ return -EMSGSIZE;
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type))
+ goto nla_put_failure;
+ nla_nest_end(skb, field_attr);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, field_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_dpipe_header_put(struct sk_buff *skb,
+ struct devlink_dpipe_header *header)
+{
+ struct nlattr *fields_attr, *header_attr;
+ int err;
+
+ header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER);
+ if (!header)
+ return -EMSGSIZE;
+
+ if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) ||
+ nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global))
+ goto nla_put_failure;
+
+ fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS);
+ if (!fields_attr)
+ goto nla_put_failure;
+
+ err = devlink_dpipe_fields_put(skb, header);
+ if (err) {
+ nla_nest_cancel(skb, fields_attr);
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, fields_attr);
+ nla_nest_end(skb, header_attr);
+ return 0;
+
+nla_put_failure:
+ err = -EMSGSIZE;
+ nla_nest_cancel(skb, header_attr);
+ return err;
+}
+
+static int devlink_dpipe_headers_fill(struct genl_info *info,
+ enum devlink_command cmd, int flags,
+ struct devlink_dpipe_headers *
+ dpipe_headers)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *headers_attr;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ void *hdr;
+ int i, j;
+ int err;
+
+ i = 0;
+start_again:
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ return err;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, NLM_F_MULTI, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(skb, devlink))
+ goto nla_put_failure;
+ headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS);
+ if (!headers_attr)
+ goto nla_put_failure;
+
+ j = 0;
+ for (; i < dpipe_headers->headers_count; i++) {
+ err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]);
+ if (err) {
+ if (!j)
+ goto err_table_put;
+ break;
+ }
+ j++;
+ }
+ nla_nest_end(skb, headers_attr);
+ genlmsg_end(skb, hdr);
+ if (i != dpipe_headers->headers_count)
+ goto start_again;
+
+send_done:
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = devlink_dpipe_send_and_alloc_skb(&skb, info);
+ if (err)
+ goto err_skb_send_alloc;
+ goto send_done;
+ }
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ err = -EMSGSIZE;
+err_table_put:
+err_skb_send_alloc:
+ genlmsg_cancel(skb, hdr);
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+
+ if (!devlink->dpipe_headers)
+ return -EOPNOTSUPP;
+ return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET,
+ 0, devlink->dpipe_headers);
+}
+
+static int devlink_dpipe_table_counters_set(struct devlink *devlink,
+ const char *table_name,
+ bool enable)
+{
+ struct devlink_dpipe_table *table;
+
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ return -EINVAL;
+
+ if (table->counter_control_extern)
+ return -EOPNOTSUPP;
+
+ if (!(table->counters_enabled ^ enable))
+ return 0;
+
+ table->counters_enabled = enable;
+ if (table->table_ops->counters_set_update)
+ table->table_ops->counters_set_update(table->priv, enable);
+ return 0;
+}
+
+static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const char *table_name;
+ bool counters_enable;
+
+ if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] ||
+ !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED])
+ return -EINVAL;
+
+ table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]);
+ counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]);
+
+ return devlink_dpipe_table_counters_set(devlink, table_name,
+ counters_enable);
}
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1512,6 +2190,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
};
static const struct genl_ops devlink_nl_ops[] = {
@@ -1644,6 +2324,34 @@ static const struct genl_ops devlink_nl_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
},
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
+ .doit = devlink_nl_cmd_dpipe_table_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET,
+ .doit = devlink_nl_cmd_dpipe_entries_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET,
+ .doit = devlink_nl_cmd_dpipe_headers_get,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
+ {
+ .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET,
+ .doit = devlink_nl_cmd_dpipe_table_counters_set,
+ .policy = devlink_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+ },
};
static struct genl_family devlink_nl_family __ro_after_init = {
@@ -1680,6 +2388,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
devlink_net_set(devlink, &init_net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->sb_list);
+ INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
return devlink;
}
EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -1880,6 +2589,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index)
}
EXPORT_SYMBOL_GPL(devlink_sb_unregister);
+/**
+ * devlink_dpipe_headers_register - register dpipe headers
+ *
+ * @devlink: devlink
+ * @dpipe_headers: dpipe header array
+ *
+ * Register the headers supported by hardware.
+ */
+int devlink_dpipe_headers_register(struct devlink *devlink,
+ struct devlink_dpipe_headers *dpipe_headers)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dpipe_headers = dpipe_headers;
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register);
+
+/**
+ * devlink_dpipe_headers_unregister - unregister dpipe headers
+ *
+ * @devlink: devlink
+ *
+ * Unregister the headers supported by hardware.
+ */
+void devlink_dpipe_headers_unregister(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->dpipe_headers = NULL;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister);
+
+/**
+ * devlink_dpipe_table_counter_enabled - check if counter allocation
+ * required
+ * @devlink: devlink
+ * @table_name: tables name
+ *
+ * Used by driver to check if counter allocation is required.
+ * After counter allocation is turned on the table entries
+ * are updated to include counter statistics.
+ *
+ * After that point on the driver must respect the counter
+ * state so that each entry added to the table is added
+ * with a counter.
+ */
+bool devlink_dpipe_table_counter_enabled(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+ bool enabled;
+
+ rcu_read_lock();
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ enabled = false;
+ if (table)
+ enabled = table->counters_enabled;
+ rcu_read_unlock();
+ return enabled;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
+
+/**
+ * devlink_dpipe_table_register - register dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ * @table_ops: table ops
+ * @priv: priv
+ * @size: size
+ * @counter_control_extern: external control for counters
+ */
+int devlink_dpipe_table_register(struct devlink *devlink,
+ const char *table_name,
+ struct devlink_dpipe_table_ops *table_ops,
+ void *priv, u64 size,
+ bool counter_control_extern)
+{
+ struct devlink_dpipe_table *table;
+
+ if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
+ return -EEXIST;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ table->name = table_name;
+ table->table_ops = table_ops;
+ table->priv = priv;
+ table->size = size;
+ table->counter_control_extern = counter_control_extern;
+
+ mutex_lock(&devlink_mutex);
+ list_add_tail_rcu(&table->list, &devlink->dpipe_table_list);
+ mutex_unlock(&devlink_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_register);
+
+/**
+ * devlink_dpipe_table_unregister - unregister dpipe table
+ *
+ * @devlink: devlink
+ * @table_name: table name
+ */
+void devlink_dpipe_table_unregister(struct devlink *devlink,
+ const char *table_name)
+{
+ struct devlink_dpipe_table *table;
+
+ mutex_lock(&devlink_mutex);
+ table = devlink_dpipe_table_find(&devlink->dpipe_table_list,
+ table_name);
+ if (!table)
+ goto unlock;
+ list_del_rcu(&table->list);
+ mutex_unlock(&devlink_mutex);
+ kfree_rcu(table, rcu);
+ return;
+unlock:
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
+
static int __init devlink_module_init(void)
{
return genl_register_family(&devlink_nl_family);
diff --git a/net/core/filter.c b/net/core/filter.c
index dfb9f61a2fd5..15e9a81ffebe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3309,24 +3309,28 @@ static const struct bpf_verifier_ops tc_cls_act_ops = {
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+ .test_run = bpf_prog_test_run_skb,
};
static const struct bpf_verifier_ops xdp_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+ .test_run = bpf_prog_test_run_xdp,
};
static const struct bpf_verifier_ops cg_skb_ops = {
.get_func_proto = cg_skb_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+ .test_run = bpf_prog_test_run_skb,
};
static const struct bpf_verifier_ops lwt_inout_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+ .test_run = bpf_prog_test_run_skb,
};
static const struct bpf_verifier_ops lwt_xmit_ops = {
@@ -3334,6 +3338,7 @@ static const struct bpf_verifier_ops lwt_xmit_ops = {
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+ .test_run = bpf_prog_test_run_skb,
};
static const struct bpf_verifier_ops cg_sock_ops = {
diff --git a/net/core/flow.c b/net/core/flow.c
index f765c11d8df5..f7f5d1932a27 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -47,7 +47,7 @@ struct flow_flush_info {
static struct kmem_cache *flow_cachep __read_mostly;
-#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
+#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
static void flow_cache_new_hashrnd(unsigned long arg)
@@ -99,7 +99,8 @@ static void flow_cache_gc_task(struct work_struct *work)
}
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- int deleted, struct list_head *gc_list,
+ unsigned int deleted,
+ struct list_head *gc_list,
struct netns_xfrm *xfrm)
{
if (deleted) {
@@ -114,17 +115,18 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
static void __flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
- int shrink_to)
+ unsigned int shrink_to)
{
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
- int i, deleted = 0;
+ unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
+ unsigned int i;
for (i = 0; i < flow_cache_hash_size(fc); i++) {
- int saved = 0;
+ unsigned int saved = 0;
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
@@ -145,7 +147,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
static void flow_cache_shrink(struct flow_cache *fc,
struct flow_cache_percpu *fcp)
{
- int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+ unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
__flow_cache_shrink(fc, fcp, shrink_to);
}
@@ -161,7 +163,7 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
static u32 flow_hash_code(struct flow_cache *fc,
struct flow_cache_percpu *fcp,
const struct flowi *key,
- size_t keysize)
+ unsigned int keysize)
{
const u32 *k = (const u32 *) key;
const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
@@ -174,7 +176,7 @@ static u32 flow_hash_code(struct flow_cache *fc,
* important assumptions that we can here, such as alignment.
*/
static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
- size_t keysize)
+ unsigned int keysize)
{
const flow_compare_t *k1, *k1_lim, *k2;
@@ -199,7 +201,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
- size_t keysize;
+ unsigned int keysize;
unsigned int hash;
local_bh_disable();
@@ -295,9 +297,10 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct flow_cache_entry *fle;
struct hlist_node *tmp;
LIST_HEAD(gc_list);
- int i, deleted = 0;
+ unsigned int deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
+ unsigned int i;
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
@@ -327,7 +330,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp;
- int i;
+ unsigned int i;
fcp = per_cpu_ptr(fc->percpu, cpu);
for (i = 0; i < flow_cache_hash_size(fc); i++)
@@ -402,12 +405,12 @@ void flow_cache_flush_deferred(struct net *net)
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
- size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
+ unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
if (!fcp->hash_table) {
fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!fcp->hash_table) {
- pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+ pr_err("NET: failed to allocate flow cache sz %u\n", sz);
return -ENOMEM;
}
fcp->hash_rnd_recalc = 1;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5f3ae922fcd1..c9cf425303f8 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -159,7 +159,7 @@ __skb_flow_dissect_arp(const struct sk_buff *skb,
unsigned char ar_tip[4];
} *arp_eth, _arp_eth;
const struct arphdr *arp;
- struct arphdr *_arp;
+ struct arphdr _arp;
if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
return FLOW_DISSECT_RET_OUT_GOOD;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7069f5e4a361..8ae87c591c8e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -861,7 +861,8 @@ static void neigh_probe(struct neighbour *neigh)
if (skb)
skb = skb_clone(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
- neigh->ops->solicit(neigh, skb);
+ if (neigh->ops->solicit)
+ neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
kfree_skb(skb);
}
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 0f9275ee5595..1c4810919a0a 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -11,6 +11,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/module.h>
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9c3947a43eff..58419da7961b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4116,22 +4116,25 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
switch (event) {
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_PRE_UP:
- case NETDEV_POST_INIT:
- case NETDEV_REGISTER:
- case NETDEV_CHANGE:
- case NETDEV_PRE_TYPE_CHANGE:
- case NETDEV_GOING_DOWN:
- case NETDEV_UNREGISTER:
- case NETDEV_UNREGISTER_FINAL:
- case NETDEV_RELEASE:
- case NETDEV_JOIN:
- case NETDEV_BONDING_INFO:
+ case NETDEV_REBOOT:
+ case NETDEV_CHANGEMTU:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ case NETDEV_BONDING_FAILOVER:
+ case NETDEV_POST_TYPE_CHANGE:
+ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_CHANGEUPPER:
+ case NETDEV_RESEND_IGMP:
+ case NETDEV_PRECHANGEMTU:
+ case NETDEV_CHANGEINFODATA:
+ case NETDEV_PRECHANGEUPPER:
+ case NETDEV_CHANGELOWERSTATE:
+ case NETDEV_UDP_TUNNEL_PUSH_INFO:
+ case NETDEV_CHANGE_TX_QUEUE_LEN:
+ rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
default:
- rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
break;
}
return NOTIFY_DONE;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index fb87e78a2cc7..6bd2f8fb0476 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -20,9 +20,11 @@
#include <net/tcp.h>
static siphash_key_t net_secret __read_mostly;
+static siphash_key_t ts_secret __read_mostly;
static __always_inline void net_secret_init(void)
{
+ net_get_random_once(&ts_secret, sizeof(ts_secret));
net_get_random_once(&net_secret, sizeof(net_secret));
}
#endif
@@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
+static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+{
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ };
+
+ if (sysctl_tcp_timestamps != 1)
+ return 0;
+
+ return siphash(&combined, offsetofend(typeof(combined), daddr),
+ &ts_secret);
+}
+
u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport, u32 *tsoff)
{
@@ -63,7 +82,7 @@ u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
net_secret_init();
hash = siphash(&combined, offsetofend(typeof(combined), dport),
&net_secret);
- *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ *tsoff = secure_tcpv6_ts_off(saddr, daddr);
return seq_scale(hash);
}
EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff);
@@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
+static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+{
+ if (sysctl_tcp_timestamps != 1)
+ return 0;
+
+ return siphash_2u32((__force u32)saddr, (__force u32)daddr,
+ &ts_secret);
+}
/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
@@ -102,7 +129,7 @@ u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
(__force u32)sport << 16 | (__force u32)dport,
&net_secret);
- *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ *tsoff = secure_tcp_ts_off(saddr, daddr);
return seq_scale(hash);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 1b9030ee6f4b..a06bb7a2a689 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1083,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
union {
int val;
+ u64 val64;
struct linger ling;
struct timeval tm;
} v;
@@ -1328,6 +1329,25 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
goto lenout;
}
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_INCOMING_NAPI_ID:
+ v.val = READ_ONCE(sk->sk_napi_id);
+
+ /* aggregate non-NAPI IDs down to 0 */
+ if (v.val < MIN_NAPI_ID)
+ v.val = 0;
+
+ break;
+#endif
+
+ case SO_COOKIE:
+ lv = sizeof(u64);
+ if (len < lv)
+ return -EINVAL;
+ v.val64 = sock_gen_cookie(sk);
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -2601,7 +2621,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
- sk->sk_stamp = ktime_set(-1L, 0);
+ sk->sk_stamp = SK_DEFAULT_STAMP;
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
@@ -3237,3 +3257,14 @@ static int __init proto_init(void)
subsys_initcall(proto_init);
#endif /* PROC_FS */
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+bool sk_busy_loop_end(void *p, unsigned long start_time)
+{
+ struct sock *sk = p;
+
+ return !skb_queue_empty(&sk->sk_receive_queue) ||
+ sk_busy_loop_timeout(sk, start_time);
+}
+EXPORT_SYMBOL(sk_busy_loop_end);
+#endif /* CONFIG_NET_RX_BUSY_POLL */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9a1a352fd1eb..eed1ebf7f29d 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -13,9 +13,9 @@
static DEFINE_SPINLOCK(reuseport_lock);
-static struct sock_reuseport *__reuseport_alloc(u16 max_socks)
+static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
- size_t size = sizeof(struct sock_reuseport) +
+ unsigned int size = sizeof(struct sock_reuseport) +
sizeof(struct sock *) * max_socks;
struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 4ead336e14ea..7f9cc400eca0 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = {
.data = &sysctl_net_busy_poll,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
.procname = "busy_read",
.data = &sysctl_net_busy_read,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
#endif
#ifdef CONFIG_NET_SCHED
diff --git a/net/core/utils.c b/net/core/utils.c
index 6592d7bbed39..d758880c09a7 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(net_ratelimit);
__be32 in_aton(const char *str)
{
- unsigned long l;
+ unsigned int l;
unsigned int val;
int i;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 9649238eef40..aa21f49f1215 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -6,7 +6,7 @@ config HAVE_NET_DSA
config NET_DSA
tristate "Distributed Switch Architecture"
- depends on HAVE_NET_DSA
+ depends on HAVE_NET_DSA && MAY_USE_DEVLINK
select NET_SWITCHDEV
select PHYLIB
---help---
@@ -31,4 +31,6 @@ config NET_DSA_TAG_TRAILER
config NET_DSA_TAG_QCA
bool
+config NET_DSA_TAG_MTK
+ bool
endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 31d343796251..9b1d478f3713 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -8,3 +8,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
+dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index b6d4f6a23f06..1fb9cf7aaaf4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,15 +14,17 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <net/dsa.h>
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
#include <linux/of_net.h>
#include <linux/of_gpio.h>
+#include <linux/netdevice.h>
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
#include <linux/gpio/consumer.h>
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
@@ -53,6 +55,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_QCA
[DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
#endif
+#ifdef CONFIG_NET_DSA_TAG_MTK
+ [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops,
+#endif
[DSA_TAG_PROTO_NONE] = &none_ops,
};
@@ -896,13 +901,34 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct sk_buff *nskb = NULL;
if (unlikely(dst == NULL)) {
kfree_skb(skb);
return 0;
}
- return dst->rcv(skb, dev, pt, orig_dev);
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ return 0;
+
+ nskb = dst->rcv(skb, dev, pt, orig_dev);
+ if (!nskb) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ skb = nskb;
+ skb_push(skb, ETH_HLEN);
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ skb->dev->stats.rx_packets++;
+ skb->dev->stats.rx_bytes += skb->len;
+
+ netif_receive_skb(skb);
+
+ return 0;
}
static struct packet_type dsa_pack_type __read_mostly = {
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 737be6470c7f..033b3bfb63dc 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -13,16 +13,20 @@
#include <linux/device.h>
#include <linux/err.h>
#include <linux/list.h>
+#include <linux/netdevice.h>
#include <linux/slab.h>
#include <linux/rtnetlink.h>
-#include <net/dsa.h>
#include <linux/of.h>
#include <linux/of_net.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static LIST_HEAD(dsa_switch_trees);
static DEFINE_MUTEX(dsa2_mutex);
+static const struct devlink_ops dsa_devlink_ops = {
+};
+
static struct dsa_switch_tree *dsa_get_dst(u32 tree)
{
struct dsa_switch_tree *dst;
@@ -222,12 +226,18 @@ static int dsa_dsa_port_apply(struct dsa_port *port, u32 index,
return err;
}
- return 0;
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+
+ return devlink_port_register(ds->devlink,
+ &ds->ports[index].devlink_port,
+ index);
}
static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
dsa_cpu_dsa_destroy(port);
}
@@ -245,12 +255,17 @@ static int dsa_cpu_port_apply(struct dsa_port *port, u32 index,
ds->cpu_port_mask |= BIT(index);
- return 0;
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+ err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
+ index);
+ return err;
}
static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
dsa_cpu_dsa_destroy(port);
ds->cpu_port_mask &= ~BIT(index);
@@ -275,12 +290,23 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index,
return err;
}
+ memset(&ds->ports[index].devlink_port, 0,
+ sizeof(ds->ports[index].devlink_port));
+ err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port,
+ index);
+ if (err)
+ return err;
+
+ devlink_port_type_eth_set(&ds->ports[index].devlink_port,
+ ds->ports[index].netdev);
+
return 0;
}
static void dsa_user_port_unapply(struct dsa_port *port, u32 index,
struct dsa_switch *ds)
{
+ devlink_port_unregister(&ds->ports[index].devlink_port);
if (ds->ports[index].netdev) {
dsa_slave_destroy(ds->ports[index].netdev);
ds->ports[index].netdev = NULL;
@@ -301,6 +327,17 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
*/
ds->phys_mii_mask = ds->enabled_port_mask;
+ /* Add the switch to devlink before calling setup, so that setup can
+ * add dpipe tables
+ */
+ ds->devlink = devlink_alloc(&dsa_devlink_ops, 0);
+ if (!ds->devlink)
+ return -ENOMEM;
+
+ err = devlink_register(ds->devlink, ds->dev);
+ if (err)
+ return err;
+
err = ds->ops->setup(ds);
if (err < 0)
return err;
@@ -381,6 +418,13 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
mdiobus_unregister(ds->slave_mii_bus);
dsa_switch_unregister_notifier(ds);
+
+ if (ds->devlink) {
+ devlink_unregister(ds->devlink);
+ devlink_free(ds->devlink);
+ ds->devlink = NULL;
+ }
+
}
static int dsa_dst_apply(struct dsa_switch_tree *dst)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 0706a511244e..107138a55bd8 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -17,8 +17,9 @@
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
- int (*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev);
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev);
};
struct dsa_slave_priv {
@@ -85,4 +86,7 @@ extern const struct dsa_device_ops brcm_netdev_ops;
/* tag_qca.c */
extern const struct dsa_device_ops qca_netdev_ops;
+/* tag_mtk.c */
+extern const struct dsa_device_ops mtk_netdev_ops;
+
#endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 78128acfbf63..7693182df81e 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -17,6 +17,7 @@
#include <linux/of_mdio.h>
#include <linux/mdio.h>
#include <linux/list.h>
+#include <net/dsa.h>
#include <net/rtnetlink.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 6456dacf9ae9..ca6e26e514f0 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -1,7 +1,8 @@
/*
* Handling of a single switch chip, part of a switch fabric
*
- * Copyright (c) 2017 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,9 +20,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
if (ds->index == info->sw_index && ds->ops->port_bridge_join)
return ds->ops->port_bridge_join(ds, info->port, info->br);
- if (ds->index != info->sw_index)
- dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n",
- info->sw_index, info->port, netdev_name(info->br));
+ if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join)
+ return ds->ops->crosschip_bridge_join(ds, info->sw_index,
+ info->port, info->br);
return 0;
}
@@ -32,9 +33,9 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (ds->index == info->sw_index && ds->ops->port_bridge_leave)
ds->ops->port_bridge_leave(ds, info->port, info->br);
- if (ds->index != info->sw_index)
- dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n",
- info->sw_index, info->port, netdev_name(info->br));
+ if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave)
+ ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port,
+ info->br);
return 0;
}
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 5d925b6b2bb1..2a9b52c5af86 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -12,6 +12,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
/* This tag length is 4 bytes, older ones were 6 bytes, we do not
@@ -91,23 +92,17 @@ out_free:
return NULL;
}
-static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
int source_port;
u8 *brcm_tag;
- if (unlikely(dst == NULL))
- goto out_drop;
-
ds = dst->cpu_switch;
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
goto out_drop;
@@ -139,22 +134,12 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
skb->dev = ds->ports[source_port].netdev;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops brcm_netdev_ops = {
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 72579ceea381..1c6633f0de01 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define DSA_HLEN 4
@@ -67,8 +68,9 @@ out_free:
return NULL;
}
-static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -76,13 +78,6 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
int source_device;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
goto out_drop;
@@ -164,21 +159,11 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
}
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops dsa_netdev_ops = {
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 648c051817a1..d9c668aa5e54 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define DSA_HLEN 4
@@ -80,8 +81,9 @@ out_free:
return NULL;
}
-static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -89,13 +91,6 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
int source_device;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
goto out_drop;
@@ -183,21 +178,11 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
}
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops edsa_netdev_ops = {
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
new file mode 100644
index 000000000000..837cdddb53f0
--- /dev/null
+++ b/net/dsa/tag_mtk.c
@@ -0,0 +1,100 @@
+/*
+ * Mediatek DSA Tag support
+ * Copyright (C) 2017 Landen Chao <landen.chao@mediatek.com>
+ * Sean Wang <sean.wang@mediatek.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/etherdevice.h>
+#include <net/dsa.h>
+#include "dsa_priv.h"
+
+#define MTK_HDR_LEN 4
+#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
+#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
+
+static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ u8 *mtk_tag;
+
+ if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
+ goto out_free;
+
+ skb_push(skb, MTK_HDR_LEN);
+
+ memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+
+ /* Build the tag after the MAC Source Address */
+ mtk_tag = skb->data + 2 * ETH_ALEN;
+ mtk_tag[0] = 0;
+ mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ mtk_tag[2] = 0;
+ mtk_tag[3] = 0;
+
+ return skb;
+
+out_free:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_switch *ds;
+ int port;
+ __be16 *phdr, hdr;
+
+ if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
+ goto out_drop;
+
+ /* The MTK header is added by the switch between src addr
+ * and ethertype at this point, skb->data points to 2 bytes
+ * after src addr so header should be 2 bytes right before.
+ */
+ phdr = (__be16 *)(skb->data - 2);
+ hdr = ntohs(*phdr);
+
+ /* Remove MTK tag and recalculate checksum. */
+ skb_pull_rcsum(skb, MTK_HDR_LEN);
+
+ memmove(skb->data - ETH_HLEN,
+ skb->data - ETH_HLEN - MTK_HDR_LEN,
+ 2 * ETH_ALEN);
+
+ /* This protocol doesn't support cascading multiple
+ * switches so it's safe to assume the switch is first
+ * in the tree.
+ */
+ ds = dst->ds[0];
+ if (!ds)
+ goto out_drop;
+
+ /* Get source port information */
+ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
+ if (!ds->ports[port].netdev)
+ goto out_drop;
+
+ skb->dev = ds->ports[port].netdev;
+
+ return skb;
+
+out_drop:
+ return NULL;
+}
+
+const struct dsa_device_ops mtk_netdev_ops = {
+ .xmit = mtk_tag_xmit,
+ .rcv = mtk_tag_rcv,
+};
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 30240f343aea..3ba3f59f7a34 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -12,6 +12,7 @@
*/
#include <linux/etherdevice.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
#define QCA_HDR_LEN 2
@@ -65,8 +66,9 @@ out_free:
return NULL;
}
-static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
@@ -74,13 +76,6 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
int port;
__be16 *phdr, hdr;
- if (unlikely(!dst))
- goto out_drop;
-
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (!skb)
- goto out;
-
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
goto out_drop;
@@ -114,22 +109,12 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
goto out_drop;
/* Update skb & forward the frame accordingly */
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
skb->dev = ds->ports[port].netdev;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops qca_netdev_ops = {
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 26f977176978..aafc2fc74c30 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <net/dsa.h>
#include "dsa_priv.h"
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -57,22 +58,17 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
return nskb;
}
-static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
struct dsa_switch *ds;
u8 *trailer;
int source_port;
- if (unlikely(dst == NULL))
- goto out_drop;
ds = dst->cpu_switch;
- skb = skb_unshare(skb, GFP_ATOMIC);
- if (skb == NULL)
- goto out;
-
if (skb_linearize(skb))
goto out_drop;
@@ -88,21 +84,11 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
pskb_trim_rcsum(skb, skb->len - 4);
skb->dev = ds->ports[source_port].netdev;
- skb_push(skb, ETH_HLEN);
- skb->pkt_type = PACKET_HOST;
- skb->protocol = eth_type_trans(skb, skb->dev);
-
- skb->dev->stats.rx_packets++;
- skb->dev->stats.rx_bytes += skb->len;
- netif_receive_skb(skb);
-
- return 0;
+ return skb;
out_drop:
- kfree_skb(skb);
-out:
- return 0;
+ return NULL;
}
const struct dsa_device_ops trailer_netdev_ops = {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6b1fc6e4278e..d1a11707a126 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1599,8 +1599,9 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
+ .early_demux_handler = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
@@ -1608,8 +1609,9 @@ static const struct net_protocol tcp_protocol = {
.icmp_strict_tag_validation = 1,
};
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
+ .early_demux_handler = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
@@ -1720,6 +1722,8 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+ net->ipv4.sysctl_udp_early_demux = 1;
+ net->ipv4.sysctl_tcp_early_demux = 1;
#ifdef CONFIG_SYSCTL
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 927f1d4b8c80..6d3602ec640c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1802,6 +1802,9 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
goto nla_put_failure;
+ if (!devconf)
+ goto out;
+
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING,
IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
@@ -1823,6 +1826,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
goto nla_put_failure;
+out:
nlmsg_end(skb, nlh);
return 0;
@@ -1831,8 +1835,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
- struct ipv4_devconf *devconf)
+void inet_netconf_notify_devconf(struct net *net, int event, int type,
+ int ifindex, struct ipv4_devconf *devconf)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -1842,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
goto errout;
err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
- RTM_NEWNETCONF, 0, type);
+ event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -2021,10 +2025,12 @@ static void inet_forward_change(struct net *net)
IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv4.devconf_dflt);
@@ -2037,7 +2043,8 @@ static void inet_forward_change(struct net *net)
in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IN_DEV_CONF_SET(in_dev, FORWARDING, on);
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
dev->ifindex, &in_dev->cnf);
}
}
@@ -2082,19 +2089,22 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_RP_FILTER,
ifindex, cnf);
}
if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
ifindex, cnf);
}
if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
new_value != old_value) {
ifindex = devinet_conf_ifindex(net, cnf);
- inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
ifindex, cnf);
}
}
@@ -2129,7 +2139,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
container_of(cnf, struct in_device, cnf);
if (*valp)
dev_disable_lro(idev->dev);
- inet_netconf_notify_devconf(net,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_FORWARDING,
idev->dev->ifindex,
cnf);
@@ -2137,7 +2147,8 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
rtnl_unlock();
rt_cache_flush(net);
} else
- inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv4.devconf_dflt);
}
@@ -2259,7 +2270,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
p->sysctl = t;
- inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
+ ifindex, p);
return 0;
free:
@@ -2268,16 +2280,18 @@ out:
return -ENOBUFS;
}
-static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
+static void __devinet_sysctl_unregister(struct net *net,
+ struct ipv4_devconf *cnf, int ifindex)
{
struct devinet_sysctl_table *t = cnf->sysctl;
- if (!t)
- return;
+ if (t) {
+ cnf->sysctl = NULL;
+ unregister_net_sysctl_table(t->sysctl_header);
+ kfree(t);
+ }
- cnf->sysctl = NULL;
- unregister_net_sysctl_table(t->sysctl_header);
- kfree(t);
+ inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
}
static int devinet_sysctl_register(struct in_device *idev)
@@ -2299,7 +2313,9 @@ static int devinet_sysctl_register(struct in_device *idev)
static void devinet_sysctl_unregister(struct in_device *idev)
{
- __devinet_sysctl_unregister(&idev->cnf);
+ struct net *net = dev_net(idev->dev);
+
+ __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
neigh_sysctl_unregister(idev->arp_parms);
}
@@ -2374,9 +2390,9 @@ static __net_init int devinet_init_net(struct net *net)
#ifdef CONFIG_SYSCTL
err_reg_ctl:
- __devinet_sysctl_unregister(dflt);
+ __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
err_reg_dflt:
- __devinet_sysctl_unregister(all);
+ __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
if (tbl != ctl_forward_entry)
kfree(tbl);
@@ -2398,8 +2414,10 @@ static __net_exit void devinet_exit_net(struct net *net)
tbl = net->ipv4.forw_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.forw_hdr);
- __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
- __devinet_sysctl_unregister(net->ipv4.devconf_all);
+ __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
+ NETCONFA_IFINDEX_DEFAULT);
+ __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
+ NETCONFA_IFINDEX_ALL);
kfree(tbl);
#endif
kfree(net->ipv4.devconf_dflt);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6feabb03516..fa2dc8f692c6 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct net_device *dev = skb->dev;
+ void (*edemux)(struct sk_buff *skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
int protocol = iph->protocol;
ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && ipprot->early_demux) {
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+ edemux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index fd9f34bbd740..c3b12b1c7162 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -57,6 +57,7 @@
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/arp.h>
+#include <net/dsa.h>
#include <net/ip.h>
#include <net/ipconfig.h>
#include <net/route.h>
@@ -306,7 +307,7 @@ static void __init ic_close_devs(void)
while ((d = next)) {
next = d->next;
dev = d->dev;
- if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
+ if (d != ic_dev && !netdev_uses_dsa(dev)) {
pr_debug("IP-Config: Downing %s\n", dev->name);
dev_change_flags(dev, d->flags);
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c0317c940bcd..5bca64fc71b7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -631,7 +631,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
- inet_netconf_notify_devconf(dev_net(dev),
+ inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in_dev->cnf);
ip_rt_multicast_event(in_dev);
@@ -820,8 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
return -EADDRNOTAVAIL;
}
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex,
- &in_dev->cnf);
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
+ dev->ifindex, &in_dev->cnf);
ip_rt_multicast_event(in_dev);
/* Fill in the VIF structures */
@@ -1282,7 +1282,8 @@ static void mrtsock_destruct(struct sock *sk)
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
@@ -1344,7 +1345,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (ret == 0) {
rcu_assign_pointer(mrt->mroute_sk, sk);
IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
- inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ef49989c93b1..da04b9c33ef3 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1249,16 +1249,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = {
.timeout = 180,
};
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
- .me = THIS_MODULE,
- .help = help,
- .expect_policy = &snmp_exp_policy,
- .name = "snmp",
- .tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
- .tuple.dst.protonum = IPPROTO_UDP,
-};
-
static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help = help,
@@ -1277,22 +1267,16 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
static int __init nf_nat_snmp_basic_init(void)
{
- int ret = 0;
-
BUG_ON(nf_nat_snmp_hook != NULL);
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
- ret = nf_conntrack_helper_register(&snmp_trap_helper);
- if (ret < 0) {
- nf_conntrack_helper_unregister(&snmp_helper);
- return ret;
- }
- return ret;
+ return nf_conntrack_helper_register(&snmp_trap_helper);
}
static void __exit nf_nat_snmp_basic_fini(void)
{
RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+ synchronize_rcu();
nf_conntrack_helper_unregister(&snmp_trap_helper);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2af6244b83e2..ccfbce13a633 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -156,17 +156,18 @@ int ping_hash(struct sock *sk)
void ping_unhash(struct sock *sk)
{
struct inet_sock *isk = inet_sk(sk);
+
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+ write_lock_bh(&ping_table.lock);
if (sk_hashed(sk)) {
- write_lock_bh(&ping_table.lock);
hlist_nulls_del(&sk->sk_nulls_node);
sk_nulls_node_init(&sk->sk_nulls_node);
sock_put(sk);
isk->inet_num = 0;
isk->inet_sport = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- write_unlock_bh(&ping_table.lock);
}
+ write_unlock_bh(&ping_table.lock);
}
EXPORT_SYMBOL_GPL(ping_unhash);
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 4b7c0ec65251..32a691b7ce2c 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,7 +28,7 @@
#include <linux/spinlock.h>
#include <net/protocol.h>
-const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
+struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet_offloads);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5dda1ef81c7e..5e1e60546fce 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2667,10 +2667,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
- /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- ip_hdr(skb)->protocol = IPPROTO_ICMP;
- skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
-
src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
@@ -2680,6 +2676,15 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
else
uid = (iif ? INVALID_UID : current_uid());
+ /* Bugfix: need to give ip_route_input enough of an IP header to
+ * not gag.
+ */
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
+ ip_hdr(skb)->saddr = src;
+ ip_hdr(skb)->daddr = dst;
+
+ skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
+
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
fl4.saddr = src;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 711c3e2e17b1..6fb25693c00b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -24,6 +24,7 @@
#include <net/cipso_ipv4.h>
#include <net/inet_frag.h>
#include <net/ping.h>
+#include <net/protocol.h>
static int zero;
static int one = 1;
@@ -294,6 +295,58 @@ bad_key:
return ret;
}
+static void proc_configure_early_demux(int enabled, int protocol)
+{
+ struct net_protocol *ipprot;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol *ip6prot;
+#endif
+
+ ipprot = rcu_dereference(inet_protos[protocol]);
+ if (ipprot)
+ ipprot->early_demux = enabled ? ipprot->early_demux_handler :
+ NULL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6prot = rcu_dereference(inet6_protos[protocol]);
+ if (ip6prot)
+ ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
+ NULL;
+#endif
+}
+
+static int proc_tcp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+
+ proc_configure_early_demux(enabled, IPPROTO_TCP);
+ }
+
+ return ret;
+}
+
+static int proc_udp_early_demux(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret = 0;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (write && !ret) {
+ int enabled = init_net.ipv4.sysctl_udp_early_demux;
+
+ proc_configure_early_demux(enabled, IPPROTO_UDP);
+ }
+
+ return ret;
+}
+
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -750,6 +803,20 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "udp_early_demux",
+ .data = &init_net.ipv4.sysctl_udp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_udp_early_demux
+ },
+ {
+ .procname = "tcp_early_demux",
+ .data = &init_net.ipv4.sysctl_tcp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tcp_early_demux
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1665948dff8c..94f0b5b50e0d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2393,7 +2393,7 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
u16 snd_wscale = opt.opt_val & 0xFFFF;
u16 rcv_wscale = opt.opt_val >> 16;
- if (snd_wscale > 14 || rcv_wscale > 14)
+ if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE)
return -EFBIG;
tp->rx_opt.snd_wscale = snd_wscale;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a75c48f62e27..31f2765ef851 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define REXMIT_LOST 1 /* retransmit packets marked lost */
#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */
-static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
+static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
+ unsigned int len)
{
static bool __once __read_mostly;
@@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
- pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
- dev ? dev->name : "Unknown driver");
+ if (!dev || len >= dev->mtu)
+ pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+ dev ? dev->name : "Unknown driver");
rcu_read_unlock();
}
}
@@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
if (len >= icsk->icsk_ack.rcv_mss) {
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
- if (unlikely(icsk->icsk_ack.rcv_mss != len))
- tcp_gro_dev_warn(sk, skb);
+ /* Account for possibly-removed options */
+ if (unlikely(len > icsk->icsk_ack.rcv_mss +
+ MAX_TCP_OPTION_SPACE))
+ tcp_gro_dev_warn(sk, skb, len);
} else {
/* Otherwise, we make more careful check taking into account,
* that SACKs block is variable.
@@ -874,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
const int ts)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (metric > tp->reordering) {
- int mib_idx;
+ int mib_idx;
+ if (metric > tp->reordering) {
tp->reordering = min(sysctl_tcp_max_reordering, metric);
- /* This exciting event is worth to be remembered. 8) */
- if (ts)
- mib_idx = LINUX_MIB_TCPTSREORDER;
- else if (tcp_is_reno(tp))
- mib_idx = LINUX_MIB_TCPRENOREORDER;
- else if (tcp_is_fack(tp))
- mib_idx = LINUX_MIB_TCPFACKREORDER;
- else
- mib_idx = LINUX_MIB_TCPSACKREORDER;
-
- NET_INC_STATS(sock_net(sk), mib_idx);
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -902,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
}
tp->rack.reord = 1;
+
+ /* This exciting event is worth to be remembered. 8) */
+ if (ts)
+ mib_idx = LINUX_MIB_TCPTSREORDER;
+ else if (tcp_is_reno(tp))
+ mib_idx = LINUX_MIB_TCPRENOREORDER;
+ else if (tcp_is_fack(tp))
+ mib_idx = LINUX_MIB_TCPFACKREORDER;
+ else
+ mib_idx = LINUX_MIB_TCPSACKREORDER;
+
+ NET_INC_STATS(sock_net(sk), mib_idx);
}
/* This must be called before lost_out is incremented */
@@ -3759,11 +3764,12 @@ void tcp_parse_options(const struct sk_buff *skb,
!estab && sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
- if (snd_wscale > 14) {
- net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
+ if (snd_wscale > TCP_MAX_WSCALE) {
+ net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
__func__,
- snd_wscale);
- snd_wscale = 14;
+ snd_wscale,
+ TCP_MAX_WSCALE);
+ snd_wscale = TCP_MAX_WSCALE;
}
opt_rx->snd_wscale = snd_wscale;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7482b5d11861..20cbd2f07f28 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1409,8 +1409,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
if (!nsk)
goto discard;
if (nsk != sk) {
- sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1e217948be62..8f6373b0cd77 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -26,6 +26,7 @@
#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
+#include <net/busy_poll.h>
int sysctl_tcp_abort_on_overflow __read_mostly;
@@ -799,6 +800,9 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
+ /* record NAPI ID of child */
+ sk_mark_napi_id(child, skb);
+
tcp_segs_in(tcp_sk(child), skb);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 22548b5f05cb..0e807a83c1bc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -212,12 +212,12 @@ void tcp_select_initial_window(int __space, __u32 mss,
/* If no clamp set the clamp to the max possible scaled window */
if (*window_clamp == 0)
- (*window_clamp) = (65535 << 14);
+ (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
space = min(*window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
- space = (space / mss) * mss;
+ space = rounddown(space, mss);
/* NOTE: offering an initial window larger than 32767
* will break some buggy TCP stacks. If the admin tells us
@@ -234,13 +234,11 @@ void tcp_select_initial_window(int __space, __u32 mss,
(*rcv_wscale) = 0;
if (wscale_ok) {
- /* Set window scaling on max possible window
- * See RFC1323 for an explanation of the limit to 14
- */
+ /* Set window scaling on max possible window */
space = max_t(u32, space, sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
- while (space > 65535 && (*rcv_wscale) < 14) {
+ while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
space >>= 1;
(*rcv_wscale)++;
}
@@ -253,7 +251,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
}
/* Set the clamp no higher than max representable value */
- (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
+ (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
}
EXPORT_SYMBOL(tcp_select_initial_window);
@@ -2561,7 +2559,6 @@ u32 __tcp_select_window(struct sock *sk)
/* Don't do rounding if we are using window scaling, since the
* scaled window will not line up with the MSS boundary anyway.
*/
- window = tp->rcv_wnd;
if (tp->rx_opt.rcv_wscale) {
window = free_space;
@@ -2569,10 +2566,9 @@ u32 __tcp_select_window(struct sock *sk)
* Import case: prevent zero window announcement if
* 1<<rcv_wscale > mss.
*/
- if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
- window = (((window >> tp->rx_opt.rcv_wscale) + 1)
- << tp->rx_opt.rcv_wscale);
+ window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale));
} else {
+ window = tp->rcv_wnd;
/* Get the largest window that is a nice multiple of mss.
* Window clamp already applied above.
* If our current window offering is within 1 mss of the
@@ -2582,7 +2578,7 @@ u32 __tcp_select_window(struct sock *sk)
* is too small.
*/
if (window <= free_space - mss || window > free_space)
- window = (free_space / mss) * mss;
+ window = rounddown(free_space, mss);
else if (mss == full_space &&
free_space > window + (full_space >> 1))
window = free_space;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 4ecb38ae8504..d8acbd9f477a 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
/* Account for retransmits that are lost again */
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
+ tcp_skb_pcount(skb));
}
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e2afe677a9d9..48c452959d2c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -307,6 +307,7 @@ config IPV6_SEG6_LWTUNNEL
bool "IPv6: Segment Routing Header encapsulation support"
depends on IPV6
select LWTUNNEL
+ select DST_CACHE
---help---
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dff5beb26a01..67ec87ea5fb6 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -549,6 +549,9 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
goto nla_put_failure;
+ if (!devconf)
+ goto out;
+
if ((all || type == NETCONFA_FORWARDING) &&
nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
goto nla_put_failure;
@@ -567,6 +570,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
devconf->ignore_routes_with_linkdown) < 0)
goto nla_put_failure;
+out:
nlmsg_end(skb, nlh);
return 0;
@@ -575,8 +579,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
- struct ipv6_devconf *devconf)
+void inet6_netconf_notify_devconf(struct net *net, int event, int type,
+ int ifindex, struct ipv6_devconf *devconf)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -586,7 +590,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex,
goto errout;
err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
- RTM_NEWNETCONF, 0, type);
+ event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -769,7 +773,8 @@ static void dev_forward_change(struct inet6_dev *idev)
else
addrconf_leave_anycast(ifa);
}
- inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
dev->ifindex, &idev->cnf);
}
@@ -804,7 +809,8 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_dflt->forwarding) {
if ((!newf) ^ (!old))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
rtnl_unlock();
@@ -816,13 +822,15 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
net->ipv6.devconf_dflt->forwarding = newf;
if ((!newf) ^ (!old_dflt))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
addrconf_forward_change(net, newf);
if ((!newf) ^ (!old))
- inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
} else if ((!newf) ^ (!old))
@@ -847,6 +855,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
idev->cnf.ignore_routes_with_linkdown = newf;
if (changed)
inet6_netconf_notify_devconf(dev_net(dev),
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
dev->ifindex,
&idev->cnf);
@@ -869,6 +878,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
@@ -881,6 +891,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
addrconf_linkdown_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
+ RTM_NEWNETCONF,
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
@@ -5675,17 +5686,20 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
return restart_syscall();
if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
else if (valp == &net->ipv6.devconf_all->proxy_ndp)
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
else {
struct inet6_dev *idev = ctl->extra1;
- inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_PROXY_NEIGH,
idev->dev->ifindex,
&idev->cnf);
}
@@ -6348,7 +6362,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
ifindex = NETCONFA_IFINDEX_DEFAULT;
else
ifindex = idev->dev->ifindex;
- inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
+ ifindex, p);
return 0;
free:
@@ -6357,7 +6372,8 @@ out:
return -ENOBUFS;
}
-static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
+static void __addrconf_sysctl_unregister(struct net *net,
+ struct ipv6_devconf *p, int ifindex)
{
struct ctl_table *table;
@@ -6368,6 +6384,8 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
unregister_net_sysctl_table(p->sysctl_header);
p->sysctl_header = NULL;
kfree(table);
+
+ inet6_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
}
static int addrconf_sysctl_register(struct inet6_dev *idev)
@@ -6391,7 +6409,8 @@ static int addrconf_sysctl_register(struct inet6_dev *idev)
static void addrconf_sysctl_unregister(struct inet6_dev *idev)
{
- __addrconf_sysctl_unregister(&idev->cnf);
+ __addrconf_sysctl_unregister(dev_net(idev->dev), &idev->cnf,
+ idev->dev->ifindex);
neigh_sysctl_unregister(idev->nd_parms);
}
@@ -6434,7 +6453,7 @@ static int __net_init addrconf_init_net(struct net *net)
#ifdef CONFIG_SYSCTL
err_reg_dflt:
- __addrconf_sysctl_unregister(all);
+ __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
err_reg_all:
kfree(dflt);
#endif
@@ -6447,8 +6466,10 @@ err_alloc_all:
static void __net_exit addrconf_exit_net(struct net *net)
{
#ifdef CONFIG_SYSCTL
- __addrconf_sysctl_unregister(net->ipv6.devconf_dflt);
- __addrconf_sysctl_unregister(net->ipv6.devconf_all);
+ __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt,
+ NETCONFA_IFINDEX_DEFAULT);
+ __addrconf_sysctl_unregister(net, net->ipv6.devconf_all,
+ NETCONFA_IFINDEX_ALL);
#endif
kfree(net->ipv6.devconf_dflt);
kfree(net->ipv6.devconf_all);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a9a9553ee63d..1635d218735e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1005,6 +1005,10 @@ static int __init inet6_init(void)
if (err)
goto seg6_fail;
+ err = igmp6_late_init();
+ if (err)
+ goto igmp6_late_err;
+
#ifdef CONFIG_SYSCTL
err = ipv6_sysctl_register();
if (err)
@@ -1015,8 +1019,10 @@ out:
#ifdef CONFIG_SYSCTL
sysctl_fail:
- seg6_exit();
+ igmp6_late_cleanup();
#endif
+igmp6_late_err:
+ seg6_exit();
seg6_fail:
calipso_exit();
calipso_fail:
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index aacfb4bce153..b04539dd4629 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,6 +49,8 @@
int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ void (*edemux)(struct sk_buff *skb);
+
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -60,8 +62,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && ipprot->early_demux)
- ipprot->early_demux(skb);
+ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
+ edemux(skb);
}
if (!skb_valid_dst(skb))
ip6_route_input(skb);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6ba6c900ebcf..fb4546e80c82 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -815,7 +815,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
in6_dev->cnf.mc_forwarding--;
- inet6_netconf_notify_devconf(dev_net(dev),
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
}
@@ -974,7 +974,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
in6_dev = __in6_dev_get(dev);
if (in6_dev) {
in6_dev->cnf.mc_forwarding++;
- inet6_netconf_notify_devconf(dev_net(dev),
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
dev->ifindex, &in6_dev->cnf);
}
@@ -1599,7 +1599,8 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
write_unlock_bh(&mrt_lock);
if (!err)
- inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
rtnl_unlock();
@@ -1620,7 +1621,7 @@ int ip6mr_sk_done(struct sock *sk)
mrt->mroute6_sk = NULL;
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
- inet6_netconf_notify_devconf(net,
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1bdc703cb966..07403fa164e1 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2463,7 +2463,6 @@ static void mld_ifc_event(struct inet6_dev *idev)
mld_ifc_start_timer(idev, 1);
}
-
static void igmp6_timer_handler(unsigned long data)
{
struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
@@ -2599,6 +2598,44 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
write_unlock_bh(&idev->lock);
}
+static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *pmc;
+
+ ASSERT_RTNL();
+
+ if (mld_in_v1_mode(idev)) {
+ read_lock_bh(&idev->lock);
+ for (pmc = idev->mc_list; pmc; pmc = pmc->next)
+ igmp6_join_group(pmc);
+ read_unlock_bh(&idev->lock);
+ } else
+ mld_send_report(idev, NULL);
+}
+
+static int ipv6_mc_netdev_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ switch (event) {
+ case NETDEV_RESEND_IGMP:
+ if (idev)
+ ipv6_mc_rejoin_groups(idev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block igmp6_netdev_notifier = {
+ .notifier_call = ipv6_mc_netdev_event,
+};
+
#ifdef CONFIG_PROC_FS
struct igmp6_mc_iter_state {
struct seq_net_private p;
@@ -2970,7 +3007,17 @@ int __init igmp6_init(void)
return register_pernet_subsys(&igmp6_net_ops);
}
+int __init igmp6_late_init(void)
+{
+ return register_netdevice_notifier(&igmp6_netdev_notifier);
+}
+
void igmp6_cleanup(void)
{
unregister_pernet_subsys(&igmp6_net_ops);
}
+
+void igmp6_late_cleanup(void)
+{
+ unregister_netdevice_notifier(&igmp6_netdev_notifier);
+}
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index e3770abe688a..b5d54d4f995c 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -26,7 +26,7 @@
#include <net/protocol.h>
#if IS_ENABLED(CONFIG_IPV6)
-const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
+struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet6_protos);
int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 85582257d3af..a644aaecdfd3 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -26,17 +26,13 @@
#include <linux/seg6_iptunnel.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
-#ifdef CONFIG_DST_CACHE
#include <net/dst_cache.h>
-#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
struct seg6_lwt {
-#ifdef CONFIG_DST_CACHE
struct dst_cache cache;
-#endif
struct seg6_iptunnel_encap tuninfo[0];
};
@@ -105,7 +101,7 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC);
+ err = skb_cow_head(skb, tot_len);
if (unlikely(err))
return err;
@@ -156,7 +152,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC);
+ err = skb_cow_head(skb, hdrlen);
if (unlikely(err))
return err;
@@ -237,6 +233,9 @@ static int seg6_do_srh(struct sk_buff *skb)
static int seg6_input(struct sk_buff *skb)
{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct dst_entry *dst = NULL;
+ struct seg6_lwt *slwt;
int err;
err = seg6_do_srh(skb);
@@ -245,8 +244,26 @@ static int seg6_input(struct sk_buff *skb)
return err;
}
+ slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+ preempt_disable();
+ dst = dst_cache_get(&slwt->cache);
+ preempt_enable();
+
skb_dst_drop(skb);
- ip6_route_input(skb);
+
+ if (!dst) {
+ ip6_route_input(skb);
+ dst = skb_dst(skb);
+ if (!dst->error) {
+ preempt_disable();
+ dst_cache_set_ip6(&slwt->cache, dst,
+ &ipv6_hdr(skb)->saddr);
+ preempt_enable();
+ }
+ } else {
+ skb_dst_set(skb, dst);
+ }
return dst_input(skb);
}
@@ -264,11 +281,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
-#ifdef CONFIG_DST_CACHE
preempt_disable();
dst = dst_cache_get(&slwt->cache);
preempt_enable();
-#endif
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -287,11 +302,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
-#ifdef CONFIG_DST_CACHE
preempt_disable();
dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
preempt_enable();
-#endif
}
skb_dst_drop(skb);
@@ -355,13 +368,11 @@ static int seg6_build_state(struct nlattr *nla,
slwt = seg6_lwt_lwtunnel(newts);
-#ifdef CONFIG_DST_CACHE
err = dst_cache_init(&slwt->cache, GFP_KERNEL);
if (err) {
kfree(newts);
return err;
}
-#endif
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
@@ -375,12 +386,10 @@ static int seg6_build_state(struct nlattr *nla,
return 0;
}
-#ifdef CONFIG_DST_CACHE
static void seg6_destroy_state(struct lwtunnel_state *lwt)
{
dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
}
-#endif
static int seg6_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
@@ -414,9 +423,7 @@ static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
static const struct lwtunnel_encap_ops seg6_iptun_ops = {
.build_state = seg6_build_state,
-#ifdef CONFIG_DST_CACHE
.destroy_state = seg6_destroy_state,
-#endif
.output = seg6_output,
.input = seg6_input,
.fill_encap = seg6_fill_encap_info,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0f08d718a002..8e42e8f54b70 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1293,8 +1293,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
if (nsk != sk) {
- sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
@@ -1925,8 +1923,9 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
+ .early_demux_handler = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b793ed1d2a36..fd4b1c98a472 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1436,8 +1436,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
-static const struct inet6_protocol udpv6_protocol = {
+static struct inet6_protocol udpv6_protocol = {
.early_demux = udp_v6_early_demux,
+ .early_demux_handler = udp_v6_early_demux,
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 309062f3debe..31762f76cdb5 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct kcm_attach info;
if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- err = -EFAULT;
+ return -EFAULT;
err = kcm_attach_ioctl(sock, &info);
@@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct kcm_unattach info;
if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- err = -EFAULT;
+ return -EFAULT;
err = kcm_unattach_ioctl(sock, &info);
@@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct socket *newsock = NULL;
if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- err = -EFAULT;
+ return -EFAULT;
err = kcm_clone(sock, &info, &newsock);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8adab6335ced..e37d9554da7b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -278,7 +278,57 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
}
EXPORT_SYMBOL_GPL(l2tp_session_find);
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+/* Like l2tp_session_find() but takes a reference on the returned session.
+ * Optionally calls session->ref() too if do_ref is true.
+ */
+struct l2tp_session *l2tp_session_get(struct net *net,
+ struct l2tp_tunnel *tunnel,
+ u32 session_id, bool do_ref)
+{
+ struct hlist_head *session_list;
+ struct l2tp_session *session;
+
+ if (!tunnel) {
+ struct l2tp_net *pn = l2tp_pernet(net);
+
+ session_list = l2tp_session_id_hash_2(pn, session_id);
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(session, session_list, global_hlist) {
+ if (session->session_id == session_id) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
+ rcu_read_unlock_bh();
+
+ return session;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return NULL;
+ }
+
+ session_list = l2tp_session_id_hash(tunnel, session_id);
+ read_lock_bh(&tunnel->hlist_lock);
+ hlist_for_each_entry(session, session_list, hlist) {
+ if (session->session_id == session_id) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
+ read_unlock_bh(&tunnel->hlist_lock);
+
+ return session;
+ }
+ }
+ read_unlock_bh(&tunnel->hlist_lock);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_get);
+
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+ bool do_ref)
{
int hash;
struct l2tp_session *session;
@@ -288,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
if (++count > nth) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
read_unlock_bh(&tunnel->hlist_lock);
return session;
}
@@ -298,12 +351,13 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
/* Lookup a session by interface name.
* This is very inefficient but is only used by management interfaces.
*/
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+ bool do_ref)
{
struct l2tp_net *pn = l2tp_pernet(net);
int hash;
@@ -313,7 +367,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
if (!strcmp(session->ifname, ifname)) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
rcu_read_unlock_bh();
+
return session;
}
}
@@ -323,7 +381,49 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
+
+static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
+ struct l2tp_session *session)
+{
+ struct l2tp_session *session_walk;
+ struct hlist_head *g_head;
+ struct hlist_head *head;
+ struct l2tp_net *pn;
+
+ head = l2tp_session_id_hash(tunnel, session->session_id);
+
+ write_lock_bh(&tunnel->hlist_lock);
+ hlist_for_each_entry(session_walk, head, hlist)
+ if (session_walk->session_id == session->session_id)
+ goto exist;
+
+ if (tunnel->version == L2TP_HDR_VER_3) {
+ pn = l2tp_pernet(tunnel->l2tp_net);
+ g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net),
+ session->session_id);
+
+ spin_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_for_each_entry(session_walk, g_head, global_hlist)
+ if (session_walk->session_id == session->session_id)
+ goto exist_glob;
+
+ hlist_add_head_rcu(&session->global_hlist, g_head);
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ }
+
+ hlist_add_head(&session->hlist, head);
+ write_unlock_bh(&tunnel->hlist_lock);
+
+ return 0;
+
+exist_glob:
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+exist:
+ write_unlock_bh(&tunnel->hlist_lock);
+
+ return -EEXIST;
+}
/* Lookup a tunnel by id
*/
@@ -633,6 +733,9 @@ discard:
* a data (not control) frame before coming here. Fields up to the
* session-id have already been parsed and ptr points to the data
* after the session-id.
+ *
+ * session->ref() must have been called prior to l2tp_recv_common().
+ * session->deref() will be called automatically after skb is processed.
*/
void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -642,14 +745,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int offset;
u32 ns, nr;
- /* The ref count is increased since we now hold a pointer to
- * the session. Take care to decrement the refcnt when exiting
- * this function from now on...
- */
- l2tp_session_inc_refcount(session);
- if (session->ref)
- (*session->ref)(session);
-
/* Parse and check optional cookie */
if (session->peer_cookie_len > 0) {
if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
@@ -802,8 +897,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
/* Try to dequeue as many skbs from reorder_q as we can. */
l2tp_recv_dequeue(session);
- l2tp_session_dec_refcount(session);
-
return;
discard:
@@ -812,8 +905,6 @@ discard:
if (session->deref)
(*session->deref)(session);
-
- l2tp_session_dec_refcount(session);
}
EXPORT_SYMBOL(l2tp_recv_common);
@@ -920,8 +1011,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
/* Find the session context */
- session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+ session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
if (!session || !session->recv_skb) {
+ if (session) {
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ }
+
/* Not found? Pass to userspace to deal with */
l2tp_info(tunnel, L2TP_MSG_DATA,
"%s: no session found (%u/%u). Passing up.\n",
@@ -930,6 +1027,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
+ l2tp_session_dec_refcount(session);
return 0;
@@ -1738,6 +1836,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct l2tp_session *session;
+ int err;
session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
if (session != NULL) {
@@ -1793,6 +1892,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
l2tp_session_set_header_len(session, tunnel->version);
+ err = l2tp_session_add_to_tunnel(tunnel, session);
+ if (err) {
+ kfree(session);
+
+ return ERR_PTR(err);
+ }
+
/* Bump the reference count. The session context is deleted
* only when this drops to zero.
*/
@@ -1802,28 +1908,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
/* Ensure tunnel socket isn't deleted */
sock_hold(tunnel->sock);
- /* Add session to the tunnel's hash list */
- write_lock_bh(&tunnel->hlist_lock);
- hlist_add_head(&session->hlist,
- l2tp_session_id_hash(tunnel, session_id));
- write_unlock_bh(&tunnel->hlist_lock);
-
- /* And to the global session list if L2TPv3 */
- if (tunnel->version != L2TP_HDR_VER_2) {
- struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
- spin_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_add_head_rcu(&session->global_hlist,
- l2tp_session_id_hash_2(pn, session_id));
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
- }
-
/* Ignore management session in session count value */
if (session->session_id != 0)
atomic_inc(&l2tp_session_count);
+
+ return session;
}
- return session;
+ return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL_GPL(l2tp_session_create);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index aebf281d09ee..8ce7818c7a9d 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -230,11 +230,16 @@ out:
return tunnel;
}
+struct l2tp_session *l2tp_session_get(struct net *net,
+ struct l2tp_tunnel *tunnel,
+ u32 session_id, bool do_ref);
struct l2tp_session *l2tp_session_find(struct net *net,
struct l2tp_tunnel *tunnel,
u32 session_id);
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+ bool do_ref);
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+ bool do_ref);
struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 2d6760a2ae34..d100aed3d06f 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
{
- pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
pd->session_idx++;
if (pd->session == NULL) {
@@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
}
/* Show the tunnel or session context */
- if (pd->session == NULL)
+ if (!pd->session) {
l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
- else
+ } else {
l2tp_dfs_seq_session_show(m, pd->session);
+ if (pd->session->deref)
+ pd->session->deref(pd->session);
+ l2tp_session_dec_refcount(pd->session);
+ }
out:
return 0;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8bf18a5f66e0..138566a63123 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
#include "l2tp_core.h"
@@ -204,6 +207,53 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
}
#endif
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+ struct l2tp_session *session,
+ struct net_device *dev)
+{
+ unsigned int overhead = 0;
+ struct dst_entry *dst;
+ u32 l3_overhead = 0;
+
+ /* if the encap is UDP, account for UDP header size */
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ overhead += sizeof(struct udphdr);
+ dev->needed_headroom += sizeof(struct udphdr);
+ }
+ if (session->mtu != 0) {
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+ return;
+ }
+ l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+ if (l3_overhead == 0) {
+ /* L3 Overhead couldn't be identified, this could be
+ * because tunnel->sock was NULL or the socket's
+ * address family was not IPv4 or IPv6,
+ * dev mtu stays at 1500.
+ */
+ return;
+ }
+ /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
+ * UDP overhead, if any, was already factored in above.
+ */
+ overhead += session->hdr_len + ETH_HLEN + l3_overhead;
+
+ /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+ dst = sk_dst_get(tunnel->sock);
+ if (dst) {
+ /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+ u32 pmtu = dst_mtu(dst);
+
+ if (pmtu != 0)
+ dev->mtu = pmtu;
+ dst_release(dst);
+ }
+ session->mtu = dev->mtu - overhead;
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+}
+
static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct net_device *dev;
@@ -221,12 +271,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
goto out;
}
- session = l2tp_session_find(net, tunnel, session_id);
- if (session) {
- rc = -EEXIST;
- goto out;
- }
-
if (cfg->ifname) {
dev = dev_get_by_name(net, cfg->ifname);
if (dev) {
@@ -240,8 +284,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
peer_session_id, cfg);
- if (!session) {
- rc = -ENOMEM;
+ if (IS_ERR(session)) {
+ rc = PTR_ERR(session);
goto out;
}
@@ -253,12 +297,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
}
dev_net_set(dev, net);
- if (session->mtu == 0)
- session->mtu = dev->mtu - session->hdr_len;
- dev->mtu = session->mtu;
- dev->needed_headroom += session->hdr_len;
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
+ l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
priv->dev = dev;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index d25038cfd64e..4d322c1b7233 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_find(net, NULL, session_id);
- if (session == NULL)
+ session = l2tp_session_get(net, NULL, session_id, true);
+ if (!session)
goto discard;
tunnel = session->tunnel;
- if (tunnel == NULL)
- goto discard;
+ if (!tunnel)
+ goto discard_sess;
/* Trace packet contents, if enabled */
if (tunnel->debug & L2TP_MSG_DATA) {
length = min(32u, skb->len);
if (!pskb_may_pull(skb, length))
- goto discard;
+ goto discard_sess;
/* Point to L2TP header */
optr = ptr = skb->data;
@@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
}
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+ l2tp_session_dec_refcount(session);
return 0;
@@ -178,9 +179,10 @@ pass_up:
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel != NULL)
+ if (tunnel) {
sk = tunnel->sock;
- else {
+ sock_hold(sk);
+ } else {
struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
read_lock_bh(&l2tp_ip_lock);
@@ -202,6 +204,12 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
+discard_sess:
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ goto discard;
+
discard_put:
sock_put(sk);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index a4abcbc4c09a..88b397c30d86 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_find(net, NULL, session_id);
- if (session == NULL)
+ session = l2tp_session_get(net, NULL, session_id, true);
+ if (!session)
goto discard;
tunnel = session->tunnel;
- if (tunnel == NULL)
- goto discard;
+ if (!tunnel)
+ goto discard_sess;
/* Trace packet contents, if enabled */
if (tunnel->debug & L2TP_MSG_DATA) {
length = min(32u, skb->len);
if (!pskb_may_pull(skb, length))
- goto discard;
+ goto discard_sess;
/* Point to L2TP header */
optr = ptr = skb->data;
@@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len,
tunnel->recv_payload_hook);
+ l2tp_session_dec_refcount(session);
+
return 0;
pass_up:
@@ -191,9 +193,10 @@ pass_up:
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel != NULL)
+ if (tunnel) {
sk = tunnel->sock;
- else {
+ sock_hold(sk);
+ } else {
struct ipv6hdr *iph = ipv6_hdr(skb);
read_lock_bh(&l2tp_ip6_lock);
@@ -215,6 +218,12 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
+discard_sess:
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ goto discard;
+
discard_put:
sock_put(sk);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 3620fba31786..7e3e669baac4 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
/* Accessed under genl lock */
static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
-static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
+ bool do_ref)
{
u32 tunnel_id;
u32 session_id;
@@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
if (info->attrs[L2TP_ATTR_IFNAME]) {
ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
- session = l2tp_session_find_by_ifname(net, ifname);
+ session = l2tp_session_get_by_ifname(net, ifname, do_ref);
} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
(info->attrs[L2TP_ATTR_CONN_ID])) {
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
tunnel = l2tp_tunnel_find(net, tunnel_id);
if (tunnel)
- session = l2tp_session_find(net, tunnel, session_id);
+ session = l2tp_session_get(net, tunnel, session_id,
+ do_ref);
}
return session;
@@ -642,10 +644,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
session_id, peer_session_id, &cfg);
if (ret >= 0) {
- session = l2tp_session_find(net, tunnel, session_id);
- if (session)
+ session = l2tp_session_get(net, tunnel, session_id, false);
+ if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
+ l2tp_session_dec_refcount(session);
+ }
}
out:
@@ -658,7 +662,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
struct l2tp_session *session;
u16 pw_type;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, true);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -672,6 +676,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+
out:
return ret;
}
@@ -681,7 +689,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
int ret = 0;
struct l2tp_session *session;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, false);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -716,6 +724,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
ret = l2tp_session_notify(&l2tp_nl_family, info,
session, L2TP_CMD_SESSION_MODIFY);
+ l2tp_session_dec_refcount(session);
+
out:
return ret;
}
@@ -811,29 +821,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
int ret;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, false);
if (session == NULL) {
ret = -ENODEV;
- goto out;
+ goto err;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto out;
+ goto err_ref;
}
ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
0, session, L2TP_CMD_SESSION_GET);
if (ret < 0)
- goto err_out;
+ goto err_ref_msg;
- return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
+ ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
-err_out:
- nlmsg_free(msg);
+ l2tp_session_dec_refcount(session);
-out:
+ return ret;
+
+err_ref_msg:
+ nlmsg_free(msg);
+err_ref:
+ l2tp_session_dec_refcount(session);
+err:
return ret;
}
@@ -852,7 +867,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
goto out;
}
- session = l2tp_session_find_nth(tunnel, si);
+ session = l2tp_session_get_nth(tunnel, si, false);
if (session == NULL) {
ti++;
tunnel = NULL;
@@ -862,8 +877,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- session, L2TP_CMD_SESSION_GET) < 0)
+ session, L2TP_CMD_SESSION_GET) < 0) {
+ l2tp_session_dec_refcount(session);
break;
+ }
+ l2tp_session_dec_refcount(session);
si++;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 36cc56fd0418..861b255a2d51 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session)
static void pppol2tp_session_destruct(struct sock *sk)
{
struct l2tp_session *session = sk->sk_user_data;
+
+ skb_queue_purge(&sk->sk_receive_queue);
+ skb_queue_purge(&sk->sk_write_queue);
+
if (session) {
sk->sk_user_data = NULL;
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
@@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock)
l2tp_session_queue_purge(session);
sock_put(sk);
}
- skb_queue_purge(&sk->sk_receive_queue);
- skb_queue_purge(&sk->sk_write_queue);
-
release_sock(sk);
/* This will delete the session context via
@@ -582,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
int error = 0;
u32 tunnel_id, peer_tunnel_id;
u32 session_id, peer_session_id;
+ bool drop_refcnt = false;
int ver = 2;
int fd;
@@ -683,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel->peer_tunnel_id == 0)
tunnel->peer_tunnel_id = peer_tunnel_id;
- /* Create session if it doesn't already exist. We handle the
- * case where a session was previously created by the netlink
- * interface by checking that the session doesn't already have
- * a socket and its tunnel socket are what we expect. If any
- * of those checks fail, return EEXIST to the caller.
- */
- session = l2tp_session_find(sock_net(sk), tunnel, session_id);
- if (session == NULL) {
- /* Default MTU must allow space for UDP/L2TP/PPP
- * headers.
+ session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+ if (session) {
+ drop_refcnt = true;
+ ps = l2tp_session_priv(session);
+
+ /* Using a pre-existing session is fine as long as it hasn't
+ * been connected yet.
*/
- cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ if (ps->sock) {
+ error = -EEXIST;
+ goto end;
+ }
- /* Allocate and initialize a new session context. */
- session = l2tp_session_create(sizeof(struct pppol2tp_session),
- tunnel, session_id,
- peer_session_id, &cfg);
- if (session == NULL) {
- error = -ENOMEM;
+ /* consistency checks */
+ if (ps->tunnel_sock != tunnel->sock) {
+ error = -EEXIST;
goto end;
}
} else {
- ps = l2tp_session_priv(session);
- error = -EEXIST;
- if (ps->sock != NULL)
- goto end;
+ /* Default MTU must allow space for UDP/L2TP/PPP headers */
+ cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ cfg.mru = cfg.mtu;
- /* consistency checks */
- if (ps->tunnel_sock != tunnel->sock)
+ session = l2tp_session_create(sizeof(struct pppol2tp_session),
+ tunnel, session_id,
+ peer_session_id, &cfg);
+ if (IS_ERR(session)) {
+ error = PTR_ERR(session);
goto end;
+ }
}
/* Associate session with its PPPoL2TP socket */
@@ -777,6 +779,8 @@ out_no_ppp:
session->name);
end:
+ if (drop_refcnt)
+ l2tp_session_dec_refcount(session);
release_sock(sk);
return error;
@@ -804,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
if (tunnel->sock == NULL)
goto out;
- /* Check that this session doesn't already exist */
- error = -EEXIST;
- session = l2tp_session_find(net, tunnel, session_id);
- if (session != NULL)
- goto out;
-
/* Default MTU values. */
if (cfg->mtu == 0)
cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
@@ -817,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
cfg->mru = cfg->mtu;
/* Allocate and initialize a new session context. */
- error = -ENOMEM;
session = l2tp_session_create(sizeof(struct pppol2tp_session),
tunnel, session_id,
peer_session_id, cfg);
- if (session == NULL)
+ if (IS_ERR(session)) {
+ error = PTR_ERR(session);
goto out;
+ }
ps = l2tp_session_priv(session);
ps->tunnel_sock = tunnel->sock;
@@ -1140,11 +1139,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
if (stats.session_id != 0) {
/* resend to session ioctl handler */
struct l2tp_session *session =
- l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
- if (session != NULL)
- err = pppol2tp_session_ioctl(session, cmd, arg);
- else
+ l2tp_session_get(sock_net(sk), tunnel,
+ stats.session_id, true);
+
+ if (session) {
+ err = pppol2tp_session_ioctl(session, cmd,
+ arg);
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ } else {
err = -EBADR;
+ }
break;
}
#ifdef CONFIG_XFRM
@@ -1554,7 +1560,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
- pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
pd->session_idx++;
if (pd->session == NULL) {
@@ -1681,10 +1687,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
/* Show the tunnel or session context.
*/
- if (pd->session == NULL)
+ if (!pd->session) {
pppol2tp_seq_tunnel_show(m, pd->tunnel);
- else
+ } else {
pppol2tp_seq_session_show(m, pd->session);
+ if (pd->session->deref)
+ pd->session->deref(pd->session);
+ l2tp_session_dec_refcount(pd->session);
+ }
out:
return 0;
@@ -1843,4 +1853,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP");
MODULE_LICENSE("GPL");
MODULE_VERSION(PPPOL2TP_DRV_VERSION);
MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP);
-MODULE_ALIAS_L2TP_PWTYPE(11);
+MODULE_ALIAS_L2TP_PWTYPE(7);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 40813dd3301c..5bb0c5012819 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
ieee80211_recalc_ps(local);
if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+ sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ local->ops->wake_tx_queue) {
/* XXX: for AP_VLAN, actually track AP queues */
netif_tx_start_all_queues(dev);
} else if (dev) {
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 56ccffa3f2bf..62141dcec2d6 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -19,6 +19,7 @@
#ifndef __IEEE802154_I_H
#define __IEEE802154_I_H
+#include <linux/interrupt.h>
#include <linux/mutex.h>
#include <linux/hrtimer.h>
#include <net/cfg802154.h>
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 82589b2abf3c..5928d22ba9c8 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -24,6 +24,9 @@
#include <net/nexthop.h>
#include "internal.h"
+/* max memory we will use for mpls_route */
+#define MAX_MPLS_ROUTE_MEM 4096
+
/* Maximum number of labels to look ahead at when selecting a path of
* a multipath route
*/
@@ -60,10 +63,7 @@ EXPORT_SYMBOL_GPL(mpls_output_possible);
static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
{
- u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN);
- int nh_index = nh - rt->rt_nh;
-
- return nh0_via + rt->rt_max_alen * nh_index;
+ return (u8 *)nh + rt->rt_via_offset;
}
static const u8 *mpls_nh_via(const struct mpls_route *rt,
@@ -189,21 +189,32 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
return hash;
}
+static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index)
+{
+ return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size);
+}
+
+/* number of alive nexthops (rt->rt_nhn_alive) and the flags for
+ * a next hop (nh->nh_flags) are modified by netdev event handlers.
+ * Since those fields can change at any moment, use READ_ONCE to
+ * access both.
+ */
static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
struct sk_buff *skb)
{
- int alive = ACCESS_ONCE(rt->rt_nhn_alive);
u32 hash = 0;
int nh_index = 0;
int n = 0;
+ u8 alive;
/* No need to look further into packet if there's only
* one path
*/
if (rt->rt_nhn == 1)
- goto out;
+ return rt->rt_nh;
- if (alive <= 0)
+ alive = READ_ONCE(rt->rt_nhn_alive);
+ if (alive == 0)
return NULL;
hash = mpls_multipath_hash(rt, skb);
@@ -211,7 +222,9 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
if (alive == rt->rt_nhn)
goto out;
for_nexthops(rt) {
- if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ unsigned int nh_flags = READ_ONCE(nh->nh_flags);
+
+ if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
continue;
if (n == nh_index)
return nh;
@@ -219,7 +232,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
} endfor_nexthops(rt);
out:
- return &rt->rt_nh[nh_index];
+ return mpls_get_nexthop(rt, nh_index);
}
static bool mpls_egress(struct net *net, struct mpls_route *rt,
@@ -458,20 +471,27 @@ struct mpls_route_config {
int rc_mp_len;
};
-static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
+/* all nexthops within a route have the same size based on max
+ * number of labels and max via length for a hop
+ */
+static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels)
{
- u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN);
+ u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen);
struct mpls_route *rt;
+ size_t size;
- rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh),
- VIA_ALEN_ALIGN) +
- num_nh * max_alen_aligned,
- GFP_KERNEL);
- if (rt) {
- rt->rt_nhn = num_nh;
- rt->rt_nhn_alive = num_nh;
- rt->rt_max_alen = max_alen_aligned;
- }
+ size = sizeof(*rt) + num_nh * nh_size;
+ if (size > MAX_MPLS_ROUTE_MEM)
+ return ERR_PTR(-EINVAL);
+
+ rt = kzalloc(size, GFP_KERNEL);
+ if (!rt)
+ return ERR_PTR(-ENOMEM);
+
+ rt->rt_nhn = num_nh;
+ rt->rt_nhn_alive = num_nh;
+ rt->rt_nh_size = nh_size;
+ rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels);
return rt;
}
@@ -676,9 +696,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
return -ENOMEM;
err = -EINVAL;
- /* Ensure only a supported number of labels are present */
- if (cfg->rc_output_labels > MAX_NEW_LABELS)
- goto errout;
nh->nh_labels = cfg->rc_output_labels;
for (i = 0; i < nh->nh_labels; i++)
@@ -703,7 +720,7 @@ errout:
static int mpls_nh_build(struct net *net, struct mpls_route *rt,
struct mpls_nh *nh, int oif, struct nlattr *via,
- struct nlattr *newdst)
+ struct nlattr *newdst, u8 max_labels)
{
int err = -ENOMEM;
@@ -711,7 +728,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt,
goto errout;
if (newdst) {
- err = nla_get_labels(newdst, MAX_NEW_LABELS,
+ err = nla_get_labels(newdst, max_labels,
&nh->nh_labels, nh->nh_label);
if (err)
goto errout;
@@ -736,22 +753,20 @@ errout:
return err;
}
-static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
- u8 cfg_via_alen, u8 *max_via_alen)
+static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
+ u8 cfg_via_alen, u8 *max_via_alen,
+ u8 *max_labels)
{
- int nhs = 0;
int remaining = len;
-
- if (!rtnh) {
- *max_via_alen = cfg_via_alen;
- return 1;
- }
+ u8 nhs = 0;
*max_via_alen = 0;
+ *max_labels = 0;
while (rtnh_ok(rtnh, remaining)) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
int attrlen;
+ u8 n_labels = 0;
attrlen = rtnh_attrlen(rtnh);
nla = nla_find(attrs, attrlen, RTA_VIA);
@@ -765,7 +780,20 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
via_alen);
}
+ nla = nla_find(attrs, attrlen, RTA_NEWDST);
+ if (nla &&
+ nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0)
+ return 0;
+
+ *max_labels = max_t(u8, *max_labels, n_labels);
+
+ /* number of nexthops is tracked by a u8.
+ * Check for overflow.
+ */
+ if (nhs == 255)
+ return 0;
nhs++;
+
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -774,13 +802,13 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
}
static int mpls_nh_build_multi(struct mpls_route_config *cfg,
- struct mpls_route *rt)
+ struct mpls_route *rt, u8 max_labels)
{
struct rtnexthop *rtnh = cfg->rc_mp;
struct nlattr *nla_via, *nla_newdst;
int remaining = cfg->rc_mp_len;
- int nhs = 0;
int err = 0;
+ u8 nhs = 0;
change_nexthops(rt) {
int attrlen;
@@ -807,7 +835,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg,
}
err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
- rtnh->rtnh_ifindex, nla_via, nla_newdst);
+ rtnh->rtnh_ifindex, nla_via, nla_newdst,
+ max_labels);
if (err)
goto errout;
@@ -834,7 +863,8 @@ static int mpls_route_add(struct mpls_route_config *cfg)
int err = -EINVAL;
u8 max_via_alen;
unsigned index;
- int nhs;
+ u8 max_labels;
+ u8 nhs;
index = cfg->rc_label;
@@ -872,22 +902,32 @@ static int mpls_route_add(struct mpls_route_config *cfg)
goto errout;
err = -EINVAL;
- nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
- cfg->rc_via_alen, &max_via_alen);
+ if (cfg->rc_mp) {
+ nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
+ cfg->rc_via_alen, &max_via_alen,
+ &max_labels);
+ } else {
+ max_via_alen = cfg->rc_via_alen;
+ max_labels = cfg->rc_output_labels;
+ nhs = 1;
+ }
+
if (nhs == 0)
goto errout;
err = -ENOMEM;
- rt = mpls_rt_alloc(nhs, max_via_alen);
- if (!rt)
+ rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
goto errout;
+ }
rt->rt_protocol = cfg->rc_protocol;
rt->rt_payload_type = cfg->rc_payload_type;
rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
if (cfg->rc_mp)
- err = mpls_nh_build_multi(cfg, rt);
+ err = mpls_nh_build_multi(cfg, rt, max_labels);
else
err = mpls_nh_build_from_cfg(cfg, rt);
if (err)
@@ -1040,8 +1080,8 @@ static int mpls_netconf_msgsize_devconf(int type)
return size;
}
-static void mpls_netconf_notify_devconf(struct net *net, int type,
- struct mpls_dev *mdev)
+static void mpls_netconf_notify_devconf(struct net *net, int event,
+ int type, struct mpls_dev *mdev)
{
struct sk_buff *skb;
int err = -ENOBUFS;
@@ -1050,8 +1090,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int type,
if (!skb)
goto errout;
- err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF,
- 0, type);
+ err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type);
if (err < 0) {
/* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
WARN_ON(err == -EMSGSIZE);
@@ -1184,9 +1223,8 @@ static int mpls_conf_proc(struct ctl_table *ctl, int write,
if (i == offsetof(struct mpls_dev, input_enabled) &&
val != oval) {
- mpls_netconf_notify_devconf(net,
- NETCONFA_INPUT,
- mdev);
+ mpls_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_INPUT, mdev);
}
}
@@ -1227,10 +1265,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
- mdev->sysctl = register_net_sysctl(dev_net(dev), path, table);
+ mdev->sysctl = register_net_sysctl(net, path, table);
if (!mdev->sysctl)
goto free;
+ mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev);
return 0;
free:
@@ -1239,13 +1278,17 @@ out:
return -ENOBUFS;
}
-static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev)
+static void mpls_dev_sysctl_unregister(struct net_device *dev,
+ struct mpls_dev *mdev)
{
+ struct net *net = dev_net(dev);
struct ctl_table *table;
table = mdev->sysctl->ctl_table_arg;
unregister_net_sysctl_table(mdev->sysctl);
kfree(table);
+
+ mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev);
}
static struct mpls_dev *mpls_add_dev(struct net_device *dev)
@@ -1271,11 +1314,12 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev)
u64_stats_init(&mpls_stats->syncp);
}
+ mdev->dev = dev;
+
err = mpls_dev_sysctl_register(dev, mdev);
if (err)
goto free;
- mdev->dev = dev;
rcu_assign_pointer(dev->mpls_ptr, mdev);
return mdev;
@@ -1298,8 +1342,7 @@ static void mpls_ifdown(struct net_device *dev, int event)
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
- unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN;
- unsigned int alive;
+ u8 alive, deleted;
unsigned index;
platform_label = rtnl_dereference(net->mpls.platform_label);
@@ -1310,36 +1353,48 @@ static void mpls_ifdown(struct net_device *dev, int event)
continue;
alive = 0;
+ deleted = 0;
change_nexthops(rt) {
+ unsigned int nh_flags = nh->nh_flags;
+
if (rtnl_dereference(nh->nh_dev) != dev)
goto next;
switch (event) {
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
- nh->nh_flags |= RTNH_F_DEAD;
+ nh_flags |= RTNH_F_DEAD;
/* fall through */
case NETDEV_CHANGE:
- nh->nh_flags |= RTNH_F_LINKDOWN;
+ nh_flags |= RTNH_F_LINKDOWN;
break;
}
if (event == NETDEV_UNREGISTER)
RCU_INIT_POINTER(nh->nh_dev, NULL);
+
+ if (nh->nh_flags != nh_flags)
+ WRITE_ONCE(nh->nh_flags, nh_flags);
next:
- if (!(nh->nh_flags & nh_flags))
+ if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
alive++;
+ if (!rtnl_dereference(nh->nh_dev))
+ deleted++;
} endfor_nexthops(rt);
WRITE_ONCE(rt->rt_nhn_alive, alive);
+
+ /* if there are no more nexthops, delete the route */
+ if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn)
+ mpls_route_update(net, index, NULL, NULL);
}
}
-static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
+static void mpls_ifup(struct net_device *dev, unsigned int flags)
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
unsigned index;
- int alive;
+ u8 alive;
platform_label = rtnl_dereference(net->mpls.platform_label);
for (index = 0; index < net->mpls.platform_labels; index++) {
@@ -1350,20 +1405,22 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
alive = 0;
change_nexthops(rt) {
+ unsigned int nh_flags = nh->nh_flags;
struct net_device *nh_dev =
rtnl_dereference(nh->nh_dev);
- if (!(nh->nh_flags & nh_flags)) {
+ if (!(nh_flags & flags)) {
alive++;
continue;
}
if (nh_dev != dev)
continue;
alive++;
- nh->nh_flags &= ~nh_flags;
+ nh_flags &= ~flags;
+ WRITE_ONCE(nh->nh_flags, flags);
} endfor_nexthops(rt);
- ACCESS_ONCE(rt->rt_nhn_alive) = alive;
+ WRITE_ONCE(rt->rt_nhn_alive, alive);
}
}
@@ -1414,7 +1471,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
mpls_ifdown(dev, event);
mdev = mpls_dev_get(dev);
if (mdev) {
- mpls_dev_sysctl_unregister(mdev);
+ mpls_dev_sysctl_unregister(dev, mdev);
RCU_INIT_POINTER(dev->mpls_ptr, NULL);
call_rcu(&mdev->rcu, mpls_dev_destroy_rcu);
}
@@ -1424,7 +1481,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
if (mdev) {
int err;
- mpls_dev_sysctl_unregister(mdev);
+ mpls_dev_sysctl_unregister(dev, mdev);
err = mpls_dev_sysctl_register(dev, mdev);
if (err)
return notifier_from_errno(err);
@@ -1484,16 +1541,18 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
EXPORT_SYMBOL_GPL(nla_put_labels);
int nla_get_labels(const struct nlattr *nla,
- u32 max_labels, u8 *labels, u32 label[])
+ u8 max_labels, u8 *labels, u32 label[])
{
unsigned len = nla_len(nla);
- unsigned nla_labels;
struct mpls_shim_hdr *nla_label;
+ u8 nla_labels;
bool bos;
int i;
- /* len needs to be an even multiple of 4 (the label size) */
- if (len & 3)
+ /* len needs to be an even multiple of 4 (the label size). Number
+ * of labels is a u8 so check for overflow.
+ */
+ if (len & 3 || len / 4 > 255)
return -EINVAL;
/* Limit the number of new labels allowed */
@@ -1501,6 +1560,10 @@ int nla_get_labels(const struct nlattr *nla,
if (nla_labels > max_labels)
return -EINVAL;
+ /* when label == NULL, caller wants number of labels */
+ if (!label)
+ goto out;
+
nla_label = nla_data(nla);
bos = true;
for (i = nla_labels - 1; i >= 0; i--, bos = false) {
@@ -1524,6 +1587,7 @@ int nla_get_labels(const struct nlattr *nla,
label[i] = dec.label;
}
+out:
*labels = nla_labels;
return 0;
}
@@ -1585,7 +1649,6 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
err = -EINVAL;
rtm = nlmsg_data(nlh);
- memset(cfg, 0, sizeof(*cfg));
if (rtm->rtm_family != AF_MPLS)
goto errout;
@@ -1684,27 +1747,43 @@ errout:
static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- struct mpls_route_config cfg;
+ struct mpls_route_config *cfg;
int err;
- err = rtm_to_route_config(skb, nlh, &cfg);
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg)
+ return -ENOMEM;
+
+ err = rtm_to_route_config(skb, nlh, cfg);
if (err < 0)
- return err;
+ goto out;
+
+ err = mpls_route_del(cfg);
+out:
+ kfree(cfg);
- return mpls_route_del(&cfg);
+ return err;
}
static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- struct mpls_route_config cfg;
+ struct mpls_route_config *cfg;
int err;
- err = rtm_to_route_config(skb, nlh, &cfg);
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (!cfg)
+ return -ENOMEM;
+
+ err = rtm_to_route_config(skb, nlh, cfg);
if (err < 0)
- return err;
+ goto out;
+
+ err = mpls_route_add(cfg);
+out:
+ kfree(cfg);
- return mpls_route_add(&cfg);
+ return err;
}
static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
@@ -1761,21 +1840,23 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
} else {
struct rtnexthop *rtnh;
struct nlattr *mp;
- int dead = 0;
- int linkdown = 0;
+ u8 linkdown = 0;
+ u8 dead = 0;
mp = nla_nest_start(skb, RTA_MULTIPATH);
if (!mp)
goto nla_put_failure;
for_nexthops(rt) {
+ dev = rtnl_dereference(nh->nh_dev);
+ if (!dev)
+ continue;
+
rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
if (!rtnh)
goto nla_put_failure;
- dev = rtnl_dereference(nh->nh_dev);
- if (dev)
- rtnh->rtnh_ifindex = dev->ifindex;
+ rtnh->rtnh_ifindex = dev->ifindex;
if (nh->nh_flags & RTNH_F_LINKDOWN) {
rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
linkdown++;
@@ -1867,6 +1948,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
size_t nhsize = 0;
for_nexthops(rt) {
+ if (!rtnl_dereference(nh->nh_dev))
+ continue;
nhsize += nla_total_size(sizeof(struct rtnexthop));
/* RTA_VIA */
if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
@@ -1929,8 +2012,8 @@ static int resize_platform_label_table(struct net *net, size_t limit)
/* In case the predefined labels need to be populated */
if (limit > MPLS_LABEL_IPV4NULL) {
struct net_device *lo = net->loopback_dev;
- rt0 = mpls_rt_alloc(1, lo->addr_len);
- if (!rt0)
+ rt0 = mpls_rt_alloc(1, lo->addr_len, 0);
+ if (IS_ERR(rt0))
goto nort0;
RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
rt0->rt_protocol = RTPROT_KERNEL;
@@ -1943,13 +2026,13 @@ static int resize_platform_label_table(struct net *net, size_t limit)
}
if (limit > MPLS_LABEL_IPV6NULL) {
struct net_device *lo = net->loopback_dev;
- rt2 = mpls_rt_alloc(1, lo->addr_len);
- if (!rt2)
+ rt2 = mpls_rt_alloc(1, lo->addr_len, 0);
+ if (IS_ERR(rt2))
goto nort2;
RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
rt2->rt_protocol = RTPROT_KERNEL;
rt2->rt_payload_type = MPT_IPV6;
- rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
+ rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
rt2->rt_nh->nh_via_alen = lo->addr_len;
memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 62928d8fabd1..4db6a5971322 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -2,6 +2,11 @@
#define MPLS_INTERNAL_H
#include <net/mpls.h>
+/* put a reasonable limit on the number of labels
+ * we will accept from userspace
+ */
+#define MAX_NEW_LABELS 30
+
struct mpls_entry_decoded {
u32 label;
u8 ttl;
@@ -64,7 +69,6 @@ struct mpls_dev {
struct sk_buff;
#define LABEL_NOT_SPECIFIED (1 << 20)
-#define MAX_NEW_LABELS 2
/* This maximum ha length copied from the definition of struct neighbour */
#define VIA_ALEN_ALIGN sizeof(unsigned long)
@@ -83,13 +87,31 @@ enum mpls_payload_type {
struct mpls_nh { /* next hop label forwarding entry */
struct net_device __rcu *nh_dev;
+
+ /* nh_flags is accessed under RCU in the packet path; it is
+ * modified handling netdev events with rtnl lock held
+ */
unsigned int nh_flags;
- u32 nh_label[MAX_NEW_LABELS];
u8 nh_labels;
u8 nh_via_alen;
u8 nh_via_table;
+ u8 nh_reserved1;
+
+ u32 nh_label[0];
};
+/* offset of via from beginning of mpls_nh */
+#define MPLS_NH_VIA_OFF(num_labels) \
+ ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \
+ VIA_ALEN_ALIGN)
+
+/* all nexthops within a route have the same size based on the
+ * max number of labels and max via length across all nexthops
+ */
+#define MPLS_NH_SIZE(num_labels, max_via_alen) \
+ (MPLS_NH_VIA_OFF((num_labels)) + \
+ ALIGN((max_via_alen), VIA_ALEN_ALIGN))
+
enum mpls_ttl_propagation {
MPLS_TTL_PROP_DEFAULT,
MPLS_TTL_PROP_ENABLED,
@@ -104,16 +126,16 @@ enum mpls_ttl_propagation {
* +----------------------+
* | mpls_nh 0 |
* +----------------------+
- * | ... |
- * +----------------------+
- * | mpls_nh n-1 |
- * +----------------------+
- * | alignment padding |
+ * | alignment padding | 4 bytes for odd number of labels
* +----------------------+
* | via[rt_max_alen] 0 |
* +----------------------+
+ * | alignment padding | via's aligned on sizeof(unsigned long)
+ * +----------------------+
* | ... |
* +----------------------+
+ * | mpls_nh n-1 |
+ * +----------------------+
* | via[rt_max_alen] n-1 |
* +----------------------+
*/
@@ -123,22 +145,29 @@ struct mpls_route { /* next hop label forwarding entry */
u8 rt_payload_type;
u8 rt_max_alen;
u8 rt_ttl_propagate;
- unsigned int rt_nhn;
- unsigned int rt_nhn_alive;
+ u8 rt_nhn;
+ /* rt_nhn_alive is accessed under RCU in the packet path; it
+ * is modified handling netdev events with rtnl lock held
+ */
+ u8 rt_nhn_alive;
+ u8 rt_nh_size;
+ u8 rt_via_offset;
+ u8 rt_reserved1;
struct mpls_nh rt_nh[0];
};
#define for_nexthops(rt) { \
- int nhsel; struct mpls_nh *nh; \
- for (nhsel = 0, nh = (rt)->rt_nh; \
+ int nhsel; struct mpls_nh *nh; u8 *__nh; \
+ for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \
nhsel < (rt)->rt_nhn; \
- nh++, nhsel++)
+ __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
#define change_nexthops(rt) { \
- int nhsel; struct mpls_nh *nh; \
- for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \
+ int nhsel; struct mpls_nh *nh; u8 *__nh; \
+ for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \
+ __nh = (u8 *)((rt)->rt_nh); \
nhsel < (rt)->rt_nhn; \
- nh++, nhsel++)
+ __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++)
#define endfor_nexthops(rt) }
@@ -173,7 +202,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels,
const u32 label[]);
-int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
+int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
u32 label[]);
int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
u8 via[]);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 22f71fce0bfb..fe00e98667cf 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -164,6 +164,7 @@ static int mpls_build_state(struct nlattr *nla,
struct mpls_iptunnel_encap *tun_encap_info;
struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
struct lwtunnel_state *newts;
+ u8 n_labels;
int ret;
ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
@@ -175,12 +176,18 @@ static int mpls_build_state(struct nlattr *nla,
return -EINVAL;
- newts = lwtunnel_state_alloc(sizeof(*tun_encap_info));
+ /* determine number of labels */
+ if (nla_get_labels(tb[MPLS_IPTUNNEL_DST],
+ MAX_NEW_LABELS, &n_labels, NULL))
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) +
+ n_labels * sizeof(u32));
if (!newts)
return -ENOMEM;
tun_encap_info = mpls_lwtunnel_encap(newts);
- ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
+ ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels,
&tun_encap_info->labels, tun_encap_info->label);
if (ret)
goto errout;
@@ -257,7 +264,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
a_hdr->default_ttl != b_hdr->default_ttl)
return 1;
- for (l = 0; l < MAX_NEW_LABELS; l++)
+ for (l = 0; l < a_hdr->labels; l++)
if (a_hdr->label[l] != b_hdr->label[l])
return 1;
return 0;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index da9df2d56e66..22fc32143e9c 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net,
BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
+ /* synchronize_rcu() is called from ctnetlink_exit. */
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
@@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net,
BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
+ /* synchronize_rcu() is called from ctnetlink_exit. */
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 02bcf00c2492..008299b7f78f 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
- BUG_ON(t == NULL);
+ if (!t) {
+ rcu_read_unlock();
+ return NULL;
+ }
+
off = ALIGN(sizeof(struct nf_ct_ext), t->align);
len = off + t->len + var_alloc_len;
alloc_size = t->alloc_size + var_alloc_len;
@@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
- BUG_ON(t == NULL);
+ if (!t) {
+ rcu_read_unlock();
+ return NULL;
+ }
newoff = ALIGN(old->len, t->align);
newlen = newoff + t->len + var_alloc_len;
@@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
update_alloc_size(type);
mutex_unlock(&nf_ct_ext_type_mutex);
- rcu_barrier(); /* Wait for completion of call_rcu()'s */
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d49cc1e03c5b..ecdc324c7785 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3442,6 +3442,7 @@ static void __exit ctnetlink_exit(void)
#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
RCU_INIT_POINTER(nfnl_ct_hook, NULL);
#endif
+ synchronize_rcu();
}
module_init(ctnetlink_init);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 94b14c5a8b17..82802e4a6640 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void)
#ifdef CONFIG_XFRM
RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
#endif
+ synchronize_rcu();
+
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index de8782345c86..d45558178da5 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -32,6 +32,13 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
+struct nfnl_cthelper {
+ struct list_head list;
+ struct nf_conntrack_helper helper;
+};
+
+static LIST_HEAD(nfnl_cthelper_list);
+
static int
nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
@@ -161,6 +168,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
int i, ret;
struct nf_conntrack_expect_policy *expect_policy;
struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
+ unsigned int class_max;
ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
nfnl_cthelper_expect_policy_set);
@@ -170,19 +178,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
if (!tb[NFCTH_POLICY_SET_NUM])
return -EINVAL;
- helper->expect_class_max =
- ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
-
- if (helper->expect_class_max != 0 &&
- helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES)
+ class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+ if (class_max == 0)
+ return -EINVAL;
+ if (class_max > NF_CT_MAX_EXPECT_CLASSES)
return -EOVERFLOW;
expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) *
- helper->expect_class_max, GFP_KERNEL);
+ class_max, GFP_KERNEL);
if (expect_policy == NULL)
return -ENOMEM;
- for (i=0; i<helper->expect_class_max; i++) {
+ for (i = 0; i < class_max; i++) {
if (!tb[NFCTH_POLICY_SET+i])
goto err;
@@ -191,6 +198,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
if (ret < 0)
goto err;
}
+
+ helper->expect_class_max = class_max - 1;
helper->expect_policy = expect_policy;
return 0;
err:
@@ -203,18 +212,20 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_helper *helper;
+ struct nfnl_cthelper *nfcth;
int ret;
if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
return -EINVAL;
- helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL);
- if (helper == NULL)
+ nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
+ if (nfcth == NULL)
return -ENOMEM;
+ helper = &nfcth->helper;
ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
if (ret < 0)
- goto err;
+ goto err1;
strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
@@ -245,15 +256,101 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
ret = nf_conntrack_helper_register(helper);
if (ret < 0)
- goto err;
+ goto err2;
+ list_add_tail(&nfcth->list, &nfnl_cthelper_list);
return 0;
-err:
- kfree(helper);
+err2:
+ kfree(helper->expect_policy);
+err1:
+ kfree(nfcth);
return ret;
}
static int
+nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
+ struct nf_conntrack_expect_policy *new_policy,
+ const struct nlattr *attr)
+{
+ struct nlattr *tb[NFCTH_POLICY_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
+ nfnl_cthelper_expect_pol);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFCTH_POLICY_NAME] ||
+ !tb[NFCTH_POLICY_EXPECT_MAX] ||
+ !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
+ return -EINVAL;
+
+ if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name))
+ return -EBUSY;
+
+ new_policy->max_expected =
+ ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+ new_policy->timeout =
+ ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
+
+ return 0;
+}
+
+static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
+ struct nf_conntrack_helper *helper)
+{
+ struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+ struct nf_conntrack_expect_policy *policy;
+ int i, err;
+
+ /* Check first that all policy attributes are well-formed, so we don't
+ * leave things in inconsistent state on errors.
+ */
+ for (i = 0; i < helper->expect_class_max + 1; i++) {
+
+ if (!tb[NFCTH_POLICY_SET + i])
+ return -EINVAL;
+
+ err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+ &new_policy[i],
+ tb[NFCTH_POLICY_SET + i]);
+ if (err < 0)
+ return err;
+ }
+ /* Now we can safely update them. */
+ for (i = 0; i < helper->expect_class_max + 1; i++) {
+ policy = (struct nf_conntrack_expect_policy *)
+ &helper->expect_policy[i];
+ policy->max_expected = new_policy->max_expected;
+ policy->timeout = new_policy->timeout;
+ }
+
+ return 0;
+}
+
+static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
+ const struct nlattr *attr)
+{
+ struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1];
+ unsigned int class_max;
+ int err;
+
+ err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
+ nfnl_cthelper_expect_policy_set);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFCTH_POLICY_SET_NUM])
+ return -EINVAL;
+
+ class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+ if (helper->expect_class_max + 1 != class_max)
+ return -EBUSY;
+
+ return nfnl_cthelper_update_policy_all(tb, helper);
+}
+
+static int
nfnl_cthelper_update(const struct nlattr * const tb[],
struct nf_conntrack_helper *helper)
{
@@ -263,8 +360,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
return -EBUSY;
if (tb[NFCTH_POLICY]) {
- ret = nfnl_cthelper_parse_expect_policy(helper,
- tb[NFCTH_POLICY]);
+ ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
if (ret < 0)
return ret;
}
@@ -293,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
const char *helper_name;
struct nf_conntrack_helper *cur, *helper = NULL;
struct nf_conntrack_tuple tuple;
- int ret = 0, i;
+ struct nfnl_cthelper *nlcth;
+ int ret = 0;
if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
return -EINVAL;
@@ -304,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
if (ret < 0)
return ret;
- rcu_read_lock();
- for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
- hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+ list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- if (strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0)
- continue;
+ if ((tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- if ((tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_EXCL) {
- ret = -EEXIST;
- goto err;
- }
- helper = cur;
- break;
- }
+ helper = cur;
+ break;
}
- rcu_read_unlock();
if (helper == NULL)
ret = nfnl_cthelper_create(tb, &tuple);
@@ -336,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
ret = nfnl_cthelper_update(tb, helper);
return ret;
-err:
- rcu_read_unlock();
- return ret;
}
static int
@@ -377,10 +462,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM,
- htonl(helper->expect_class_max)))
+ htonl(helper->expect_class_max + 1)))
goto nla_put_failure;
- for (i=0; i<helper->expect_class_max; i++) {
+ for (i = 0; i < helper->expect_class_max + 1; i++) {
nest_parms2 = nla_nest_start(skb,
(NFCTH_POLICY_SET+i) | NLA_F_NESTED);
if (nest_parms2 == NULL)
@@ -502,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const tb[])
{
- int ret = -ENOENT, i;
+ int ret = -ENOENT;
struct nf_conntrack_helper *cur;
struct sk_buff *skb2;
char *helper_name = NULL;
struct nf_conntrack_tuple tuple;
+ struct nfnl_cthelper *nlcth;
bool tuple_set = false;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -527,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
tuple_set = true;
}
- for (i = 0; i < nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+ list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
+ if (helper_name &&
+ strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ if (tuple_set &&
+ (tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- if (helper_name && strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0) {
- continue;
- }
- if (tuple_set &&
- (tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
-
- skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL) {
- ret = -ENOMEM;
- break;
- }
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL) {
+ ret = -ENOMEM;
+ break;
+ }
- ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq,
- NFNL_MSG_TYPE(nlh->nlmsg_type),
- NFNL_MSG_CTHELPER_NEW, cur);
- if (ret <= 0) {
- kfree_skb(skb2);
- break;
- }
+ ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ NFNL_MSG_CTHELPER_NEW, cur);
+ if (ret <= 0) {
+ kfree_skb(skb2);
+ break;
+ }
- ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
+ ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
+ if (ret > 0)
+ ret = 0;
- /* this avoids a loop in nfnetlink. */
- return ret == -EAGAIN ? -ENOBUFS : ret;
- }
+ /* this avoids a loop in nfnetlink. */
+ return ret == -EAGAIN ? -ENOBUFS : ret;
}
return ret;
}
@@ -576,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
{
char *helper_name = NULL;
struct nf_conntrack_helper *cur;
- struct hlist_node *tmp;
struct nf_conntrack_tuple tuple;
bool tuple_set = false, found = false;
- int i, j = 0, ret;
+ struct nfnl_cthelper *nlcth, *n;
+ int j = 0, ret;
if (tb[NFCTH_NAME])
helper_name = nla_data(tb[NFCTH_NAME]);
@@ -592,28 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
tuple_set = true;
}
- for (i = 0; i < nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
- hnode) {
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
+ j++;
- j++;
+ if (helper_name &&
+ strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+ continue;
- if (helper_name && strncmp(cur->name, helper_name,
- NF_CT_HELPER_NAME_LEN) != 0) {
- continue;
- }
- if (tuple_set &&
- (tuple.src.l3num != cur->tuple.src.l3num ||
- tuple.dst.protonum != cur->tuple.dst.protonum))
- continue;
+ if (tuple_set &&
+ (tuple.src.l3num != cur->tuple.src.l3num ||
+ tuple.dst.protonum != cur->tuple.dst.protonum))
+ continue;
- found = true;
- nf_conntrack_helper_unregister(cur);
- }
+ found = true;
+ nf_conntrack_helper_unregister(cur);
+ kfree(cur->expect_policy);
+
+ list_del(&nlcth->list);
+ kfree(nlcth);
}
+
/* Make sure we return success if we flush and there is no helpers */
return (found || j == 0) ? 0 : -ENOENT;
}
@@ -662,20 +741,16 @@ err_out:
static void __exit nfnl_cthelper_exit(void)
{
struct nf_conntrack_helper *cur;
- struct hlist_node *tmp;
- int i;
+ struct nfnl_cthelper *nlcth, *n;
nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
- for (i=0; i<nf_ct_helper_hsize; i++) {
- hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
- hnode) {
- /* skip non-userspace conntrack helpers. */
- if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
- continue;
+ list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+ cur = &nlcth->helper;
- nf_conntrack_helper_unregister(cur);
- }
+ nf_conntrack_helper_unregister(cur);
+ kfree(cur->expect_policy);
+ kfree(nlcth);
}
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index baa75f3ab7e7..57c2cdf7b691 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void)
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
+ synchronize_rcu();
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- rcu_barrier();
}
module_init(cttimeout_init);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3ee0b8a000a4..933509ebf3d3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
- return NULL;
+ goto nlmsg_failure;
}
nlh = nlmsg_put(skb, 0, 0,
@@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (!nlh) {
skb_tx_error(entskb);
kfree_skb(skb);
- return NULL;
+ goto nlmsg_failure;
}
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = entry->state.pf;
@@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
}
nlh->nlmsg_len = skb->len;
+ if (seclen)
+ security_release_secctx(secdata, seclen);
return skb;
nla_put_failure:
skb_tx_error(entskb);
kfree_skb(skb);
net_err_ratelimited("nf_queue: error creating packet message\n");
+nlmsg_failure:
+ if (seclen)
+ security_release_secctx(secdata, seclen);
return NULL;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 596eaff66649..fc232441cf23 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -78,14 +78,6 @@ struct listeners {
/* state bits */
#define NETLINK_S_CONGESTED 0x0
-/* flags */
-#define NETLINK_F_KERNEL_SOCKET 0x1
-#define NETLINK_F_RECV_PKTINFO 0x2
-#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_F_RECV_NO_ENOBUFS 0x8
-#define NETLINK_F_LISTEN_ALL_NSID 0x10
-#define NETLINK_F_CAP_ACK 0x20
-
static inline int netlink_is_kernel(struct sock *sk)
{
return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 4fdb38318977..f792f8d7f982 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -6,6 +6,14 @@
#include <linux/workqueue.h>
#include <net/sock.h>
+/* flags */
+#define NETLINK_F_KERNEL_SOCKET 0x1
+#define NETLINK_F_RECV_PKTINFO 0x2
+#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
+#define NETLINK_F_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_LISTEN_ALL_NSID 0x10
+#define NETLINK_F_CAP_ACK 0x20
+
#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index a5546249fb10..8faa20b4d457 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -19,6 +19,27 @@ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
nlk->groups);
}
+static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ u32 flags = 0;
+
+ if (nlk->cb_running)
+ flags |= NDIAG_FLAG_CB_RUNNING;
+ if (nlk->flags & NETLINK_F_RECV_PKTINFO)
+ flags |= NDIAG_FLAG_PKTINFO;
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
+ flags |= NDIAG_FLAG_BROADCAST_ERROR;
+ if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)
+ flags |= NDIAG_FLAG_NO_ENOBUFS;
+ if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ flags |= NDIAG_FLAG_LISTEN_ALL_NSID;
+ if (nlk->flags & NETLINK_F_CAP_ACK)
+ flags |= NDIAG_FLAG_CAP_ACK;
+
+ return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags);
+}
+
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
struct netlink_diag_req *req,
u32 portid, u32 seq, u32 flags, int sk_ino)
@@ -52,6 +73,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
goto out_nlmsg_trim;
+ if ((req->ndiag_show & NDIAG_SHOW_FLAGS) &&
+ sk_diag_put_flags(sk, skb))
+ goto out_nlmsg_trim;
+
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e0a87776a010..7b2c2fce408a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net,
*/
if (nf_ct_is_confirmed(ct))
nf_ct_delete(ct, 0, 0);
- else
- nf_conntrack_put(&ct->ct_general);
+
+ nf_conntrack_put(&ct->ct_general);
nf_ct_set(skb, NULL, 0);
return false;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9d4bb8eb63f2..3f76cb765e5b 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
/* Link layer. */
clear_vlan(key);
- if (key->mac_proto == MAC_PROTO_NONE) {
+ if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
if (unlikely(eth_type_vlan(skb->protocol)))
return -EINVAL;
@@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
{
- return key_extract(skb, key);
+ int res;
+
+ res = key_extract(skb, key);
+ if (!res)
+ key->mac_proto &= ~SW_FLOW_KEY_INVALID;
+
+ return res;
}
static int key_extract_mac_proto(struct sk_buff *skb)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a0dbe7ca8f72..8489beff5c25 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
+ if (val > INT_MAX)
+ return -EINVAL;
po->tp_reserve = val;
return 0;
}
@@ -4193,8 +4195,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
goto out;
if (po->tp_version >= TPACKET_V3 &&
- (int)(req->tp_block_size -
- BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+ req->tp_block_size <=
+ BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
goto out;
if (unlikely(req->tp_frame_size < po->tp_hdrlen +
po->tp_reserve))
@@ -4205,6 +4207,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
if (unlikely(rb->frames_per_block == 0))
goto out;
+ if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+ goto out;
if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
req->tp_frame_nr))
goto out;
diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index b83c6807a5ae..326fd97444f5 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -16,7 +16,7 @@ if QRTR
config QRTR_SMD
tristate "SMD IPC Router channels"
- depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n)
+ depends on RPMSG || (COMPILE_TEST && RPMSG=n)
---help---
Say Y here to support SMD based ipcrouter channels. SMD is the
most common transport for IPC Router.
diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c
index 0d11132b3370..50615d5efac1 100644
--- a/net/qrtr/smd.c
+++ b/net/qrtr/smd.c
@@ -14,21 +14,21 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <linux/soc/qcom/smd.h>
+#include <linux/rpmsg.h>
#include "qrtr.h"
struct qrtr_smd_dev {
struct qrtr_endpoint ep;
- struct qcom_smd_channel *channel;
+ struct rpmsg_endpoint *channel;
struct device *dev;
};
/* from smd to qrtr */
-static int qcom_smd_qrtr_callback(struct qcom_smd_channel *channel,
- const void *data, size_t len)
+static int qcom_smd_qrtr_callback(struct rpmsg_device *rpdev,
+ void *data, int len, void *priv, u32 addr)
{
- struct qrtr_smd_dev *qdev = qcom_smd_get_drvdata(channel);
+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev);
int rc;
if (!qdev)
@@ -54,7 +54,7 @@ static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
if (rc)
goto out;
- rc = qcom_smd_send(qdev->channel, skb->data, skb->len);
+ rc = rpmsg_send(qdev->channel, skb->data, skb->len);
out:
if (rc)
@@ -64,57 +64,55 @@ out:
return rc;
}
-static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev)
+static int qcom_smd_qrtr_probe(struct rpmsg_device *rpdev)
{
struct qrtr_smd_dev *qdev;
int rc;
- qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL);
+ qdev = devm_kzalloc(&rpdev->dev, sizeof(*qdev), GFP_KERNEL);
if (!qdev)
return -ENOMEM;
- qdev->channel = sdev->channel;
- qdev->dev = &sdev->dev;
+ qdev->channel = rpdev->ept;
+ qdev->dev = &rpdev->dev;
qdev->ep.xmit = qcom_smd_qrtr_send;
rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO);
if (rc)
return rc;
- qcom_smd_set_drvdata(sdev->channel, qdev);
- dev_set_drvdata(&sdev->dev, qdev);
+ dev_set_drvdata(&rpdev->dev, qdev);
- dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n");
+ dev_dbg(&rpdev->dev, "Qualcomm SMD QRTR driver probed\n");
return 0;
}
-static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev)
+static void qcom_smd_qrtr_remove(struct rpmsg_device *rpdev)
{
- struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev);
+ struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev);
qrtr_endpoint_unregister(&qdev->ep);
- dev_set_drvdata(&sdev->dev, NULL);
+ dev_set_drvdata(&rpdev->dev, NULL);
}
-static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = {
+static const struct rpmsg_device_id qcom_smd_qrtr_smd_match[] = {
{ "IPCRTR" },
{}
};
-static struct qcom_smd_driver qcom_smd_qrtr_driver = {
+static struct rpmsg_driver qcom_smd_qrtr_driver = {
.probe = qcom_smd_qrtr_probe,
.remove = qcom_smd_qrtr_remove,
.callback = qcom_smd_qrtr_callback,
- .smd_match_table = qcom_smd_qrtr_smd_match,
- .driver = {
+ .id_table = qcom_smd_qrtr_smd_match,
+ .drv = {
.name = "qcom_smd_qrtr",
- .owner = THIS_MODULE,
},
};
-module_qcom_smd_driver(qcom_smd_qrtr_driver);
+module_rpmsg_driver(qcom_smd_qrtr_driver);
MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver");
MODULE_LICENSE("GPL v2");
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 1fa75ab7b733..6a5ebdea7d2e 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -333,11 +333,19 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
rds_conn_path_reset(cp);
if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING,
+ RDS_CONN_DOWN) &&
+ !rds_conn_path_transition(cp, RDS_CONN_ERROR,
RDS_CONN_DOWN)) {
/* This can happen - eg when we're in the middle of tearing
* down the connection, and someone unloads the rds module.
- * Quite reproduceable with loopback connections.
+ * Quite reproducible with loopback connections.
* Mostly harmless.
+ *
+ * Note that this also happens with rds-tcp because
+ * we could have triggered rds_conn_path_drop in irq
+ * mode from rds_tcp_state change on the receipt of
+ * a FIN, thus we need to recheck for RDS_CONN_ERROR
+ * here.
*/
rds_conn_path_error(cp, "%s: failed to transition "
"to state DOWN, current state "
diff --git a/net/rds/threads.c b/net/rds/threads.c
index e36e333a0aa0..3e447d056d09 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -156,7 +156,7 @@ void rds_connect_worker(struct work_struct *work)
struct rds_connection *conn = cp->cp_conn;
int ret;
- if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
+ if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr)
return;
clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 26a7b1db1361..7486926e60a8 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -740,6 +740,25 @@ static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
}
/*
+ * Abort a call due to a protocol error.
+ */
+static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ const char *eproto_why,
+ const char *why,
+ u32 abort_code)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
+ return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+}
+
+#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
+ __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
+ (abort_why), (abort_code))
+
+/*
* conn_client.c
*/
extern unsigned int rxrpc_max_client_connections;
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 0ed181f53f32..1752fcf8e8f1 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -413,11 +413,11 @@ found_service:
case RXRPC_CONN_REMOTELY_ABORTED:
rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->remote_abort, ECONNABORTED);
+ conn->remote_abort, -ECONNABORTED);
break;
case RXRPC_CONN_LOCALLY_ABORTED:
rxrpc_abort_call("CON", call, sp->hdr.seq,
- conn->local_abort, ECONNABORTED);
+ conn->local_abort, -ECONNABORTED);
break;
default:
BUG();
@@ -600,7 +600,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
write_lock_bh(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_SERVER_ACCEPTING:
- __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED);
+ __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED);
abort = true;
/* fall through */
case RXRPC_CALL_COMPLETE:
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 97a17ada4431..7a77844aab16 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -386,7 +386,7 @@ recheck_state:
now = ktime_get_real();
if (ktime_before(call->expire_at, now)) {
- rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index d79cd36987a9..47f7f4205653 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -486,7 +486,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->to_be_accepted.next,
struct rxrpc_call, accept_link);
list_del(&call->accept_link);
- rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET);
+ rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
rxrpc_put_call(call, rxrpc_call_put);
}
@@ -494,7 +494,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->sock_calls.next,
struct rxrpc_call, sock_link);
rxrpc_get_call(call, rxrpc_call_got);
- rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET);
+ rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
rxrpc_send_abort_packet(call);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index c3be03e8d098..e8dea0d49e7f 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -550,6 +550,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
call->cid = conn->proto.cid | channel;
call->call_id = call_id;
+ trace_rxrpc_connect_call(call);
_net("CONNECT call %08x:%08x as call %d on conn %d",
call->cid, call->call_id, call->debug_id, conn->debug_id);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index b099b64366f3..46babcf82ce8 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -168,7 +168,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
* generate a connection-level abort
*/
static int rxrpc_abort_connection(struct rxrpc_connection *conn,
- u32 error, u32 abort_code)
+ int error, u32 abort_code)
{
struct rxrpc_wire_header whdr;
struct msghdr msg;
@@ -281,14 +281,17 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
case RXRPC_PACKET_TYPE_ABORT:
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &wtmp, sizeof(wtmp)) < 0)
+ &wtmp, sizeof(wtmp)) < 0) {
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_abort"));
return -EPROTO;
+ }
abort_code = ntohl(wtmp);
_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, ECONNABORTED);
+ abort_code, -ECONNABORTED);
return -ECONNABORTED;
case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -327,7 +330,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
return 0;
default:
- _leave(" = -EPROTO [%u]", sp->hdr.type);
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_conn_pkt"));
return -EPROTO;
}
}
@@ -370,7 +374,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn)
abort:
_debug("abort %d, %d", ret, abort_code);
- rxrpc_abort_connection(conn, -ret, abort_code);
+ rxrpc_abort_connection(conn, ret, abort_code);
_leave(" [aborted]");
}
@@ -419,9 +423,8 @@ requeue_and_leave:
goto out;
protocol_error:
- if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
+ if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
goto requeue_and_leave;
rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
- _leave(" [EPROTO]");
goto out;
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 18b2ad8be8e2..45dba732a3b4 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -30,7 +30,7 @@
static void rxrpc_proto_abort(const char *why,
struct rxrpc_call *call, rxrpc_seq_t seq)
{
- if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) {
+ if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) {
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
rxrpc_queue_call(call);
}
@@ -665,6 +665,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
rwind = RXRPC_RXTX_BUFF_SIZE - 1;
if (rwind > call->tx_winsize)
wake = true;
+ trace_rxrpc_rx_rwind_change(call, sp->hdr.serial,
+ ntohl(ackinfo->rwind), wake);
call->tx_winsize = rwind;
}
@@ -877,7 +879,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
}
/*
- * Process an ABORT packet.
+ * Process an ABORT packet directed at a call.
*/
static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
{
@@ -892,10 +894,12 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
&wtmp, sizeof(wtmp)) >= 0)
abort_code = ntohl(wtmp);
+ trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code);
+
_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, ECONNABORTED))
+ abort_code, -ECONNABORTED))
rxrpc_notify_socket(call);
}
@@ -958,7 +962,7 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
case RXRPC_CALL_COMPLETE:
break;
default:
- if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) {
+ if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) {
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
rxrpc_queue_call(call);
}
@@ -1017,8 +1021,11 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
struct rxrpc_wire_header whdr;
/* dig out the RxRPC connection details */
- if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("bad_hdr"));
return -EBADMSG;
+ }
memset(sp, 0, sizeof(*sp));
sp->hdr.epoch = ntohl(whdr.epoch);
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 7d4375e557e6..af276f173b10 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -46,7 +46,10 @@ static int none_respond_to_challenge(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
- *_abort_code = RX_PROTOCOL_ERROR;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("chall_none"));
return -EPROTO;
}
@@ -54,7 +57,10 @@ static int none_verify_response(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
- *_abort_code = RX_PROTOCOL_ERROR;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
+ tracepoint_string("resp_none"));
return -EPROTO;
}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index bf13b8470c9a..1ed9c0c2e94f 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -296,7 +296,7 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
hlist_del_init(&call->error_link);
rxrpc_see_call(call);
- if (rxrpc_set_call_completion(call, compl, 0, error))
+ if (rxrpc_set_call_completion(call, compl, 0, -error))
rxrpc_notify_socket(call);
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 3e2f1a8e9c5b..f9caf3b77509 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -83,11 +83,11 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
break;
case RXRPC_CALL_NETWORK_ERROR:
- tmp = call->error;
+ tmp = -call->error;
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp);
break;
case RXRPC_CALL_LOCAL_ERROR:
- tmp = call->error;
+ tmp = -call->error;
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
break;
default:
@@ -682,14 +682,16 @@ out:
return ret;
short_data:
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data"));
ret = -EBADMSG;
goto out;
excess_data:
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data"));
ret = -EMSGSIZE;
goto out;
call_complete:
*_abort = call->abort_code;
- ret = -call->error;
+ ret = call->error;
if (call->completion == RXRPC_CALL_SUCCEEDED) {
ret = 1;
if (size > 0)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 4374e7b9c7bf..1bb9b2ccc267 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -148,15 +148,13 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
u32 data_size,
void *sechdr)
{
- struct rxrpc_skb_priv *sp;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxkad_level1_hdr hdr;
struct rxrpc_crypt iv;
struct scatterlist sg;
u16 check;
- sp = rxrpc_skb(skb);
-
_enter("");
check = sp->hdr.seq ^ call->call_id;
@@ -323,6 +321,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_crypt iv;
struct scatterlist sg[16];
struct sk_buff *trailer;
+ bool aborted;
u32 data_size, buf;
u16 check;
int nsg;
@@ -330,7 +329,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
_enter("");
if (len < 8) {
- rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H",
+ RXKADSEALEDINCON);
goto protocol_error;
}
@@ -355,7 +355,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
/* Extract the decrypted packet length */
if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
- rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1",
+ RXKADDATALEN);
goto protocol_error;
}
offset += sizeof(sechdr);
@@ -368,12 +369,14 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
check ^= seq ^ call->call_id;
check &= 0xffff;
if (check != 0) {
- rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C",
+ RXKADSEALEDINCON);
goto protocol_error;
}
if (data_size > len) {
- rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L",
+ RXKADDATALEN);
goto protocol_error;
}
@@ -381,8 +384,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO");
+ if (aborted)
+ rxrpc_send_abort_packet(call);
return -EPROTO;
nomem:
@@ -403,6 +406,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
struct sk_buff *trailer;
+ bool aborted;
u32 data_size, buf;
u16 check;
int nsg;
@@ -410,7 +414,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
_enter(",{%d}", skb->len);
if (len < 8) {
- rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H",
+ RXKADSEALEDINCON);
goto protocol_error;
}
@@ -445,7 +450,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
/* Extract the decrypted packet length */
if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
- rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2",
+ RXKADDATALEN);
goto protocol_error;
}
offset += sizeof(sechdr);
@@ -458,12 +464,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
check ^= seq ^ call->call_id;
check &= 0xffff;
if (check != 0) {
- rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C",
+ RXKADSEALEDINCON);
goto protocol_error;
}
if (data_size > len) {
- rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO);
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L",
+ RXKADDATALEN);
goto protocol_error;
}
@@ -471,8 +479,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
return 0;
protocol_error:
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO");
+ if (aborted)
+ rxrpc_send_abort_packet(call);
return -EPROTO;
nomem:
@@ -491,6 +499,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg;
+ bool aborted;
u16 cksum;
u32 x, y;
@@ -522,10 +531,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
cksum = 1; /* zero checksums are not permitted */
if (cksum != expected_cksum) {
- rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO);
- rxrpc_send_abort_packet(call);
- _leave(" = -EPROTO [csum failed]");
- return -EPROTO;
+ aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK",
+ RXKADSEALEDINCON);
+ goto protocol_error;
}
switch (call->conn->params.security_level) {
@@ -538,6 +546,11 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
default:
return -ENOANO;
}
+
+protocol_error:
+ if (aborted)
+ rxrpc_send_abort_packet(call);
+ return -EPROTO;
}
/*
@@ -754,22 +767,23 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
struct rxkad_response resp
__attribute__((aligned(8))); /* must be aligned for crypto */
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ const char *eproto;
u32 version, nonce, min_level, abort_code;
int ret;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
- if (!conn->params.key) {
- _leave(" = -EPROTO [no key]");
- return -EPROTO;
- }
+ eproto = tracepoint_string("chall_no_key");
+ abort_code = RX_PROTOCOL_ERROR;
+ if (!conn->params.key)
+ goto protocol_error;
+ abort_code = RXKADEXPIRED;
ret = key_validate(conn->params.key);
- if (ret < 0) {
- *_abort_code = RXKADEXPIRED;
- return ret;
- }
+ if (ret < 0)
+ goto other_error;
+ eproto = tracepoint_string("chall_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&challenge, sizeof(challenge)) < 0)
@@ -782,13 +796,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
_proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
sp->hdr.serial, version, nonce, min_level);
+ eproto = tracepoint_string("chall_ver");
abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
goto protocol_error;
abort_code = RXKADLEVELFAIL;
+ ret = -EACCES;
if (conn->params.security_level < min_level)
- goto protocol_error;
+ goto other_error;
token = conn->params.key->payload.data[0];
@@ -815,28 +831,34 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
return rxkad_send_response(conn, &sp->hdr, &resp, token->kad);
protocol_error:
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
+ ret = -EPROTO;
+other_error:
*_abort_code = abort_code;
- _leave(" = -EPROTO [%d]", abort_code);
- return -EPROTO;
+ return ret;
}
/*
* decrypt the kerberos IV ticket in the response
*/
static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
void *ticket, size_t ticket_len,
struct rxrpc_crypt *_session_key,
time_t *_expiry,
u32 *_abort_code)
{
struct skcipher_request *req;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv, key;
struct scatterlist sg[1];
struct in_addr addr;
unsigned int life;
+ const char *eproto;
time_t issue, now;
bool little_endian;
int ret;
+ u32 abort_code;
u8 *p, *q, *name, *end;
_enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key));
@@ -847,11 +869,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
if (ret < 0) {
switch (ret) {
case -EKEYEXPIRED:
- *_abort_code = RXKADEXPIRED;
- goto error;
+ abort_code = RXKADEXPIRED;
+ goto other_error;
default:
- *_abort_code = RXKADNOAUTH;
- goto error;
+ abort_code = RXKADNOAUTH;
+ goto other_error;
}
}
@@ -860,13 +882,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
+ ret = -ENOMEM;
req = skcipher_request_alloc(conn->server_key->payload.data[0],
GFP_NOFS);
- if (!req) {
- *_abort_code = RXKADNOAUTH;
- ret = -ENOMEM;
- goto error;
- }
+ if (!req)
+ goto temporary_error;
sg_init_one(&sg[0], ticket, ticket_len);
skcipher_request_set_callback(req, 0, NULL, NULL);
@@ -877,11 +897,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p = ticket;
end = p + ticket_len;
-#define Z(size) \
+#define Z(field) \
({ \
u8 *__str = p; \
+ eproto = tracepoint_string("rxkad_bad_"#field); \
q = memchr(p, 0, end - p); \
- if (!q || q - p > (size)) \
+ if (!q || q - p > (field##_SZ)) \
goto bad_ticket; \
for (; p < q; p++) \
if (!isprint(*p)) \
@@ -896,17 +917,18 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p++;
/* extract the authentication name */
- name = Z(ANAME_SZ);
+ name = Z(ANAME);
_debug("KIV ANAME: %s", name);
/* extract the principal's instance */
- name = Z(INST_SZ);
+ name = Z(INST);
_debug("KIV INST : %s", name);
/* extract the principal's authentication domain */
- name = Z(REALM_SZ);
+ name = Z(REALM);
_debug("KIV REALM: %s", name);
+ eproto = tracepoint_string("rxkad_bad_len");
if (end - p < 4 + 8 + 4 + 2)
goto bad_ticket;
@@ -941,36 +963,37 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
/* check the ticket is in date */
if (issue > now) {
- *_abort_code = RXKADNOAUTH;
+ abort_code = RXKADNOAUTH;
ret = -EKEYREJECTED;
- goto error;
+ goto other_error;
}
if (issue < now - life) {
- *_abort_code = RXKADEXPIRED;
+ abort_code = RXKADEXPIRED;
ret = -EKEYEXPIRED;
- goto error;
+ goto other_error;
}
*_expiry = issue + life;
/* get the service name */
- name = Z(SNAME_SZ);
+ name = Z(SNAME);
_debug("KIV SNAME: %s", name);
/* get the service instance name */
- name = Z(INST_SZ);
+ name = Z(INST);
_debug("KIV SINST: %s", name);
-
- ret = 0;
-error:
- _leave(" = %d", ret);
- return ret;
+ return 0;
bad_ticket:
- *_abort_code = RXKADBADTICKET;
- ret = -EBADMSG;
- goto error;
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
+ abort_code = RXKADBADTICKET;
+ ret = -EPROTO;
+other_error:
+ *_abort_code = abort_code;
+ return ret;
+temporary_error:
+ return ret;
}
/*
@@ -1020,6 +1043,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
__attribute__((aligned(8))); /* must be aligned for crypto */
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
+ const char *eproto;
time_t expiry;
void *ticket;
u32 abort_code, version, kvno, ticket_len, level;
@@ -1028,6 +1052,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
+ eproto = tracepoint_string("rxkad_rsp_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&response, sizeof(response)) < 0)
@@ -1041,40 +1066,43 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
sp->hdr.serial, version, kvno, ticket_len);
+ eproto = tracepoint_string("rxkad_rsp_ver");
abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
goto protocol_error;
+ eproto = tracepoint_string("rxkad_rsp_tktlen");
abort_code = RXKADTICKETLEN;
if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
goto protocol_error;
+ eproto = tracepoint_string("rxkad_rsp_unkkey");
abort_code = RXKADUNKNOWNKEY;
if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
goto protocol_error;
/* extract the kerberos ticket and decrypt and decode it */
+ ret = -ENOMEM;
ticket = kmalloc(ticket_len, GFP_NOFS);
if (!ticket)
- return -ENOMEM;
+ goto temporary_error;
+ eproto = tracepoint_string("rxkad_tkt_short");
abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
ticket, ticket_len) < 0)
goto protocol_error_free;
- ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
- &expiry, &abort_code);
- if (ret < 0) {
- *_abort_code = abort_code;
- kfree(ticket);
- return ret;
- }
+ ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key,
+ &expiry, _abort_code);
+ if (ret < 0)
+ goto temporary_error_free;
/* use the session key from inside the ticket to decrypt the
* response */
rxkad_decrypt_response(conn, &response, &session_key);
+ eproto = tracepoint_string("rxkad_rsp_param");
abort_code = RXKADSEALEDINCON;
if (ntohl(response.encrypted.epoch) != conn->proto.epoch)
goto protocol_error_free;
@@ -1085,6 +1113,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
csum = response.encrypted.checksum;
response.encrypted.checksum = 0;
rxkad_calc_response_checksum(&response);
+ eproto = tracepoint_string("rxkad_rsp_csum");
if (response.encrypted.checksum != csum)
goto protocol_error_free;
@@ -1093,11 +1122,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
struct rxrpc_call *call;
u32 call_id = ntohl(response.encrypted.call_id[i]);
+ eproto = tracepoint_string("rxkad_rsp_callid");
if (call_id > INT_MAX)
goto protocol_error_unlock;
+ eproto = tracepoint_string("rxkad_rsp_callctr");
if (call_id < conn->channels[i].call_counter)
goto protocol_error_unlock;
+
+ eproto = tracepoint_string("rxkad_rsp_callst");
if (call_id > conn->channels[i].call_counter) {
call = rcu_dereference_protected(
conn->channels[i].call,
@@ -1109,10 +1142,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
}
spin_unlock(&conn->channel_lock);
+ eproto = tracepoint_string("rxkad_rsp_seq");
abort_code = RXKADOUTOFSEQUENCE;
if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1)
goto protocol_error_free;
+ eproto = tracepoint_string("rxkad_rsp_level");
abort_code = RXKADLEVELFAIL;
level = ntohl(response.encrypted.level);
if (level > RXRPC_SECURITY_ENCRYPT)
@@ -1123,10 +1158,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
* this the connection security can be handled in exactly the same way
* as for a client connection */
ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
- if (ret < 0) {
- kfree(ticket);
- return ret;
- }
+ if (ret < 0)
+ goto temporary_error_free;
kfree(ticket);
_leave(" = 0");
@@ -1137,9 +1170,18 @@ protocol_error_unlock:
protocol_error_free:
kfree(ticket);
protocol_error:
+ trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
*_abort_code = abort_code;
- _leave(" = -EPROTO [%d]", abort_code);
return -EPROTO;
+
+temporary_error_free:
+ kfree(ticket);
+temporary_error:
+ /* Ignore the response packet if we got a temporary error such as
+ * ENOMEM. We just want to send the challenge again. Note that we
+ * also come out this way if the ticket decryption fails.
+ */
+ return ret;
}
/*
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 97ab214ca411..96ffa5d5733b 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -556,7 +556,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = -ESHUTDOWN;
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
ret = 0;
- if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
+ if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED))
ret = rxrpc_send_abort_packet(call);
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
@@ -623,7 +623,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
read_unlock_bh(&call->state_lock);
break;
default:
- /* Request phase complete for this client call */
+ /* Request phase complete for this client call */
+ trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send"));
ret = -EPROTO;
break;
}
@@ -642,20 +643,24 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data);
* @error: Local error value
* @why: 3-char string indicating why.
*
- * Allow a kernel service to abort a call, if it's still in an abortable state.
+ * Allow a kernel service to abort a call, if it's still in an abortable state
+ * and return true if the call was aborted, false if it was already complete.
*/
-void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
+bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
u32 abort_code, int error, const char *why)
{
+ bool aborted;
+
_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
mutex_lock(&call->user_mutex);
- if (rxrpc_abort_call(why, call, 0, abort_code, error))
+ aborted = rxrpc_abort_call(why, call, 0, abort_code, error);
+ if (aborted)
rxrpc_send_abort_packet(call);
mutex_unlock(&call->user_mutex);
- _leave("");
+ return aborted;
}
EXPORT_SYMBOL(rxrpc_kernel_abort_call);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 3d6b9286c203..ca193af8634a 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -508,9 +508,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
get_random_bytes(&fnew->hashrnd, 4);
}
- fnew->perturb_timer.function = flow_perturbation;
- fnew->perturb_timer.data = (unsigned long)fnew;
- init_timer_deferrable(&fnew->perturb_timer);
+ setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
+ (unsigned long)fnew);
tcf_exts_change(tp, &fnew->exts, &e);
tcf_em_tree_change(tp, &fnew->ematches, &t);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 3b86a97bc67c..593183a5b5b5 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -58,7 +58,6 @@ struct choke_sched_data {
/* Variables */
struct red_vars vars;
- struct tcf_proto __rcu *filter_list;
struct {
u32 prob_drop; /* Early probability drops */
u32 prob_mark; /* Early probability marks */
@@ -152,11 +151,6 @@ static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
choke_skb_cb(skb)->classid = classid;
}
-static u16 choke_get_classid(const struct sk_buff *skb)
-{
- return choke_skb_cb(skb)->classid;
-}
-
/*
* Compare flow of two packets
* Returns true only if source and destination address and port match.
@@ -188,40 +182,6 @@ static bool choke_match_flow(struct sk_buff *skb1,
}
/*
- * Classify flow using either:
- * 1. pre-existing classification result in skb
- * 2. fast internal classification
- * 3. use TC filter based classification
- */
-static bool choke_classify(struct sk_buff *skb,
- struct Qdisc *sch, int *qerr)
-
-{
- struct choke_sched_data *q = qdisc_priv(sch);
- struct tcf_result res;
- struct tcf_proto *fl;
- int result;
-
- fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res, false);
- if (result >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- case TC_ACT_SHOT:
- return false;
- }
-#endif
- choke_set_classid(skb, TC_H_MIN(res.classid));
- return true;
- }
-
- return false;
-}
-
-/*
* Select a packet at random from queue
* HACK: since queue can have holes from previous deletion; retry several
* times to find a random skb but then just give up and return the head
@@ -257,25 +217,15 @@ static bool choke_match_random(const struct choke_sched_data *q,
return false;
oskb = choke_peek_random(q, pidx);
- if (rcu_access_pointer(q->filter_list))
- return choke_get_classid(nskb) == choke_get_classid(oskb);
-
return choke_match_flow(oskb, nskb);
}
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
const struct red_parms *p = &q->parms;
- if (rcu_access_pointer(q->filter_list)) {
- /* If using external classifiers, get result and record it. */
- if (!choke_classify(skb, sch, &ret))
- goto other_drop; /* Packet was eaten by filter */
- }
-
choke_skb_cb(skb)->keys_valid = 0;
/* Compute average queue usage (see RED) */
q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
@@ -339,12 +289,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
congestion_drop:
qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;
-
-other_drop:
- if (ret & __NET_XMIT_BYPASS)
- qdisc_qstats_drop(sch);
- __qdisc_drop(skb, to_free);
- return ret;
}
static struct sk_buff *choke_dequeue(struct Qdisc *sch)
@@ -538,7 +482,6 @@ static void choke_destroy(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
choke_free(q->tab);
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 42e8c8615e65..b00e02c139de 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -714,9 +714,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
struct sfq_sched_data *q = qdisc_priv(sch);
int i;
- q->perturb_timer.function = sfq_perturbation;
- q->perturb_timer.data = (unsigned long)sch;
- init_timer_deferrable(&q->perturb_timer);
+ setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
+ (unsigned long)sch);
for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
q->dep[i].next = i + SFQ_MAX_FLOWS;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0439a1a68367..a9708da28eb5 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -246,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
if (!sctp_ulpq_init(&asoc->ulpq, asoc))
goto fail_init;
+ if (sctp_stream_new(asoc, gfp))
+ goto fail_init;
+
/* Assume that peer would support both address types unless we are
* told otherwise.
*/
@@ -264,7 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* AUTH related initializations */
INIT_LIST_HEAD(&asoc->endpoint_shared_keys);
if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp))
- goto fail_init;
+ goto stream_free;
asoc->active_key_id = ep->active_key_id;
asoc->prsctp_enable = ep->prsctp_enable;
@@ -287,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
return asoc;
+stream_free:
+ sctp_stream_free(asoc->stream);
fail_init:
sock_put(asoc->base.sk);
sctp_endpoint_put(asoc->ep);
@@ -1407,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
/* Update the association's pmtu and frag_point by going through all the
* transports. This routine is called when a transport's PMTU has changed.
*/
-void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
+void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
{
struct sctp_transport *t;
__u32 pmtu = 0;
@@ -1419,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
list_for_each_entry(t, &asoc->peer.transport_addr_list,
transports) {
if (t->pmtu_pending && t->dst) {
- sctp_transport_update_pmtu(sk, t,
- SCTP_TRUNC4(dst_mtu(t->dst)));
+ sctp_transport_update_pmtu(
+ t, SCTP_TRUNC4(dst_mtu(t->dst)));
t->pmtu_pending = 0;
}
if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index e3621cb4827f..697721a7a3f1 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -306,14 +306,24 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
time_after(jiffies, chunk->msg->expires_at)) {
- if (chunk->sent_count)
+ struct sctp_stream_out *streamout =
+ &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+
+ if (chunk->sent_count) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
- else
+ streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ } else {
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ }
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
+ struct sctp_stream_out *streamout =
+ &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream];
+
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+ streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
chunk->msg->expires_at &&
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2a28ab20487f..0e06a278d2a9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
if (t->param_flags & SPP_PMTUD_ENABLE) {
/* Update transports view of the MTU */
- sctp_transport_update_pmtu(sk, t, pmtu);
+ sctp_transport_update_pmtu(t, pmtu);
/* Update association pmtu. */
- sctp_assoc_sync_pmtu(sk, asoc);
+ sctp_assoc_sync_pmtu(asoc);
}
/* Retransmit with the new pmtu setting.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1224421036b3..1409a875ad8e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
{
struct sctp_transport *tp = packet->transport;
struct sctp_association *asoc = tp->asoc;
+ struct sock *sk;
pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
-
packet->vtag = vtag;
- if (asoc && tp->dst) {
- struct sock *sk = asoc->base.sk;
-
- rcu_read_lock();
- if (__sk_dst_get(sk) != tp->dst) {
- dst_hold(tp->dst);
- sk_setup_caps(sk, tp->dst);
- }
-
- if (sk_can_gso(sk)) {
- struct net_device *dev = tp->dst->dev;
+ /* do the following jobs only once for a flush schedule */
+ if (!sctp_packet_empty(packet))
+ return;
- packet->max_size = dev->gso_max_size;
- } else {
- packet->max_size = asoc->pathmtu;
- }
- rcu_read_unlock();
+ /* set packet max_size with pathmtu */
+ packet->max_size = tp->pathmtu;
+ if (!asoc)
+ return;
- } else {
- packet->max_size = tp->pathmtu;
+ /* update dst or transport pathmtu if in need */
+ sk = asoc->base.sk;
+ if (!sctp_transport_dst_check(tp)) {
+ sctp_transport_route(tp, NULL, sctp_sk(sk));
+ if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(asoc);
+ } else if (!sctp_transport_pmtu_check(tp)) {
+ if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(asoc);
}
- if (ecn_capable && sctp_packet_empty(packet)) {
- struct sctp_chunk *chunk;
+ /* If there a is a prepend chunk stick it on the list before
+ * any other chunks get appended.
+ */
+ if (ecn_capable) {
+ struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc);
- /* If there a is a prepend chunk stick it on the list before
- * any other chunks get appended.
- */
- chunk = sctp_get_ecne_prepend(asoc);
if (chunk)
sctp_packet_append_chunk(packet, chunk);
}
+
+ if (!tp->dst)
+ return;
+
+ /* set packet max_size with gso_max_size if gso is enabled*/
+ rcu_read_lock();
+ if (__sk_dst_get(sk) != tp->dst) {
+ dst_hold(tp->dst);
+ sk_setup_caps(sk, tp->dst);
+ }
+ packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size
+ : asoc->pathmtu;
+ rcu_read_unlock();
}
/* Initialize the packet structure. */
@@ -582,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sh->vtag = htonl(packet->vtag);
sh->checksum = 0;
- /* update dst if in need */
- if (!sctp_transport_dst_check(tp)) {
- sctp_transport_route(tp, NULL, sctp_sk(sk));
- if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE)
- sctp_assoc_sync_pmtu(sk, asoc);
- }
+ /* drop packet if no dst */
dst = dst_clone(tp->dst);
if (!dst) {
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
@@ -704,7 +709,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
*/
if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) &&
- !chunk->msg->force_delay)
+ !asoc->force_delay)
/* Nothing unacked */
return SCTP_XMIT_OK;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 025ccff67072..fe4c3d462f6e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -353,6 +353,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
struct sctp_chunk *chk, *temp;
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+ struct sctp_stream_out *streamout;
+
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
@@ -361,8 +363,10 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
sctp_insert_list(&asoc->outqueue.abandoned,
&chk->transmitted_list);
+ streamout = &asoc->stream->out[chk->sinfo.sinfo_stream];
asoc->sent_cnt_removable--;
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
if (!chk->tsn_gap_acked) {
if (chk->transport)
@@ -396,6 +400,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
q->out_qlen -= chk->skb->len;
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) {
+ struct sctp_stream_out *streamout =
+ &asoc->stream->out[chk->sinfo.sinfo_stream];
+
+ streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ }
msg_len -= SCTP_DATA_SNDSIZE(chk) +
sizeof(struct sk_buff) +
@@ -1026,8 +1036,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
* stream identifier.
*/
- if (chunk->sinfo.sinfo_stream >=
- asoc->c.sinit_num_ostreams) {
+ if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) {
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 206377fe91ec..a0b29d43627f 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sctp_seq_dump_remote_addrs(seq, assoc);
seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
"%8d %8d %8d %8d",
- assoc->hbinterval, assoc->c.sinit_max_instreams,
- assoc->c.sinit_num_ostreams, assoc->max_retrans,
+ assoc->hbinterval, assoc->stream->incnt,
+ assoc->stream->outcnt, assoc->max_retrans,
assoc->init_retries, assoc->shutdown_retries,
assoc->rtx_data_chunks,
atomic_read(&sk->sk_wmem_alloc),
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 969a30c7bb54..118faff6a332 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
* association.
*/
if (!asoc->temp) {
- int error;
-
- asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams,
- asoc->c.sinit_num_ostreams, gfp);
- if (!asoc->stream)
+ if (sctp_stream_init(asoc, gfp))
goto clean_up;
- error = sctp_assoc_set_id(asoc, gfp);
- if (error)
+ if (sctp_assoc_set_id(asoc, gfp))
goto clean_up;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index ab1374fa5ab0..4f5e6cfc7f60 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3955,7 +3955,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
/* Silently discard the chunk if stream-id is not valid */
sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+ if (ntohs(skip->stream) >= asoc->stream->incnt)
goto discard_noforce;
}
@@ -4026,7 +4026,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
/* Silently discard the chunk if stream-id is not valid */
sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+ if (ntohs(skip->stream) >= asoc->stream->incnt)
goto gen_shutdown;
}
@@ -6362,7 +6362,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* and discard the DATA chunk.
*/
sid = ntohs(data_hdr->stream);
- if (sid >= asoc->c.sinit_max_instreams) {
+ if (sid >= asoc->stream->incnt) {
/* Mark tsn as received even though we drop it */
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 72cc3ecf6516..8e56df8d175d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
}
if (asoc->pmtu_pending)
- sctp_assoc_pending_pmtu(sk, asoc);
+ sctp_assoc_pending_pmtu(asoc);
/* If fragmentation is disabled and the message length exceeds the
* association fragmentation point, return EMSGSIZE. The I-D
@@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
}
/* Check for invalid stream. */
- if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) {
+ if (sinfo->sinfo_stream >= asoc->stream->outcnt) {
err = -EINVAL;
goto out_free;
}
@@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
err = PTR_ERR(datamsg);
goto out_free;
}
- datamsg->force_delay = !!(msg->msg_flags & MSG_MORE);
+ asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
/* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
@@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
if (trans) {
trans->pathmtu = params->spp_pathmtu;
- sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+ sctp_assoc_sync_pmtu(asoc);
} else if (asoc) {
asoc->pathmtu = params->spp_pathmtu;
} else {
@@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
(trans->param_flags & ~SPP_PMTUD) | pmtud_change;
if (update) {
sctp_transport_pmtu(trans, sctp_opt2sk(sp));
- sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+ sctp_assoc_sync_pmtu(asoc);
}
} else if (asoc) {
asoc->param_flags =
@@ -4497,8 +4497,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
info->sctpi_rwnd = asoc->a_rwnd;
info->sctpi_unackdata = asoc->unack_data;
info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
- info->sctpi_instrms = asoc->c.sinit_max_instreams;
- info->sctpi_outstrms = asoc->c.sinit_num_ostreams;
+ info->sctpi_instrms = asoc->stream->incnt;
+ info->sctpi_outstrms = asoc->stream->outcnt;
list_for_each(pos, &asoc->base.inqueue.in_chunk_list)
info->sctpi_inqueue++;
list_for_each(pos, &asoc->outqueue.out_chunk_list)
@@ -4727,8 +4727,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
status.sstat_unackdata = asoc->unack_data;
status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
- status.sstat_instrms = asoc->c.sinit_max_instreams;
- status.sstat_outstrms = asoc->c.sinit_num_ostreams;
+ status.sstat_instrms = asoc->stream->incnt;
+ status.sstat_outstrms = asoc->stream->outcnt;
status.sstat_fragmentation_point = asoc->frag_point;
status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,
@@ -6576,6 +6576,61 @@ out:
return retval;
}
+static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_stream_out *streamout;
+ struct sctp_association *asoc;
+ struct sctp_prstatus params;
+ int retval = -EINVAL;
+ int policy;
+
+ if (len < sizeof(params))
+ goto out;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ policy = params.sprstat_policy;
+ if (policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+ if (!asoc || params.sprstat_sid >= asoc->stream->outcnt)
+ goto out;
+
+ streamout = &asoc->stream->out[params.sprstat_sid];
+ if (policy == SCTP_PR_SCTP_NONE) {
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+ params.sprstat_abandoned_unsent +=
+ streamout->abandoned_unsent[policy];
+ params.sprstat_abandoned_sent +=
+ streamout->abandoned_sent[policy];
+ }
+ } else {
+ params.sprstat_abandoned_unsent =
+ streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ params.sprstat_abandoned_sent =
+ streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ }
+
+ if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
@@ -6825,6 +6880,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
optlen);
break;
+ case SCTP_PR_STREAM_STATUS:
+ retval = sctp_getsockopt_pr_streamstatus(sk, len, optval,
+ optlen);
+ break;
case SCTP_RECONFIG_SUPPORTED:
retval = sctp_getsockopt_reconfig_supported(sk, len, optval,
optlen);
@@ -7518,9 +7577,12 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
- if (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, noblock))
- continue;
+ if (sk_can_busy_loop(sk)) {
+ sk_busy_loop(sk, noblock);
+
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ continue;
+ }
/* User doesn't want to wait. */
error = -EAGAIN;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 961d0a1e99d1..eff6008a32ba 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -35,33 +35,60 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
-struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp)
+int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp)
{
struct sctp_stream *stream;
int i;
stream = kzalloc(sizeof(*stream), gfp);
if (!stream)
- return NULL;
+ return -ENOMEM;
- stream->outcnt = outcnt;
+ stream->outcnt = asoc->c.sinit_num_ostreams;
stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
if (!stream->out) {
kfree(stream);
- return NULL;
+ return -ENOMEM;
}
for (i = 0; i < stream->outcnt; i++)
stream->out[i].state = SCTP_STREAM_OPEN;
- stream->incnt = incnt;
+ asoc->stream = stream;
+
+ return 0;
+}
+
+int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp)
+{
+ struct sctp_stream *stream = asoc->stream;
+ int i;
+
+ /* Initial stream->out size may be very big, so free it and alloc
+ * a new one with new outcnt to save memory.
+ */
+ kfree(stream->out);
+ stream->outcnt = asoc->c.sinit_num_ostreams;
+ stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
+ if (!stream->out)
+ goto nomem;
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ stream->incnt = asoc->c.sinit_max_instreams;
stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp);
if (!stream->in) {
kfree(stream->out);
- kfree(stream);
- return NULL;
+ goto nomem;
}
- return stream;
+ return 0;
+
+nomem:
+ asoc->stream = NULL;
+ kfree(stream);
+
+ return -ENOMEM;
}
void sctp_stream_free(struct sctp_stream *stream)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 3379668af368..721eeebfcd8a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
}
-void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu)
+void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
- struct dst_entry *dst;
+ struct dst_entry *dst = sctp_transport_dst_check(t);
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
- __func__, pmtu,
- SCTP_DEFAULT_MINSEGMENT);
+ __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
/* Use default minimum segment size and disable
* pmtu discovery on this transport.
*/
@@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p
t->pathmtu = pmtu;
}
- dst = sctp_transport_dst_check(t);
- if (!dst)
- t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
-
if (dst) {
- dst->ops->update_pmtu(dst, sk, NULL, pmtu);
-
+ dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
dst = sctp_transport_dst_check(t);
- if (!dst)
- t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
}
+
+ if (!dst)
+ t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
}
/* Caches the dst entry and source address for a transport's destination
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index a95f74bb5569..7e1f0e24d177 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -11,6 +11,7 @@
#ifndef _SMC_IB_H
#define _SMC_IB_H
+#include <linux/interrupt.h>
#include <linux/if_ether.h>
#include <rdma/ib_verbs.h>
diff --git a/net/socket.c b/net/socket.c
index 985ef06792d6..eea997036ada 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3356,3 +3356,49 @@ int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+ struct inet_sock *inet;
+ struct ip_options_rcu *opt;
+ u32 overhead = 0;
+ bool owned_by_user;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6_pinfo *np;
+ struct ipv6_txoptions *optv6 = NULL;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+ if (!sk)
+ return overhead;
+
+ owned_by_user = sock_owned_by_user(sk);
+ switch (sk->sk_family) {
+ case AF_INET:
+ inet = inet_sk(sk);
+ overhead += sizeof(struct iphdr);
+ opt = rcu_dereference_protected(inet->inet_opt,
+ owned_by_user);
+ if (opt)
+ overhead += opt->opt.optlen;
+ return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ np = inet6_sk(sk);
+ overhead += sizeof(struct ipv6hdr);
+ if (np)
+ optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+ if (optv6)
+ overhead += (optv6->opt_flen + optv6->opt_nflen);
+ return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+ default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+ return overhead;
+ }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 8931e33b6541..2b720fa35c4f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1635,6 +1635,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
xprt = &svsk->sk_xprt;
svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
+ set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
serv->sv_bc_xprt = xprt;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index c13a5c35ce14..fc8f14c7bfec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -127,6 +127,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
xprt = &cma_xprt->sc_xprt;
svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
+ set_bit(XPT_CONG_CTRL, &xprt->xpt_flags);
serv->sv_bc_xprt = xprt;
dprintk("svcrdma: %s(%p)\n", __func__, xprt);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 9be6592e4a6f..bd0aac87b41a 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns);
+ tipc_subscrp_get(s);
list_add(&s->nameseq_list, &nseq->subscriptions);
if (!sseq)
@@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
if (seq != NULL) {
spin_lock_bh(&seq->lock);
list_del_init(&s->nameseq_list);
+ tipc_subscrp_put(s);
if (!seq->first_free && list_empty(&seq->subscriptions)) {
hlist_del_init_rcu(&seq->ns_list);
kfree(seq->sseqs);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7130e73bd42c..15f6ce7bf868 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2511,6 +2511,28 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
}
+static int tipc_socketpair(struct socket *sock1, struct socket *sock2)
+{
+ struct tipc_sock *tsk2 = tipc_sk(sock2->sk);
+ struct tipc_sock *tsk1 = tipc_sk(sock1->sk);
+ u32 onode = tipc_own_addr(sock_net(sock1->sk));
+
+ tsk1->peer.family = AF_TIPC;
+ tsk1->peer.addrtype = TIPC_ADDR_ID;
+ tsk1->peer.scope = TIPC_NODE_SCOPE;
+ tsk1->peer.addr.id.ref = tsk2->portid;
+ tsk1->peer.addr.id.node = onode;
+ tsk2->peer.family = AF_TIPC;
+ tsk2->peer.addrtype = TIPC_ADDR_ID;
+ tsk2->peer.scope = TIPC_NODE_SCOPE;
+ tsk2->peer.addr.id.ref = tsk1->portid;
+ tsk2->peer.addr.id.node = onode;
+
+ tipc_sk_finish_conn(tsk1, tsk2->portid, onode);
+ tipc_sk_finish_conn(tsk2, tsk1->portid, onode);
+ return 0;
+}
+
/* Protocol switches for the various types of TIPC sockets */
static const struct proto_ops msg_ops = {
@@ -2519,7 +2541,7 @@ static const struct proto_ops msg_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = sock_no_accept,
.getname = tipc_getname,
.poll = tipc_poll,
@@ -2540,7 +2562,7 @@ static const struct proto_ops packet_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
.poll = tipc_poll,
@@ -2561,7 +2583,7 @@ static const struct proto_ops stream_ops = {
.release = tipc_release,
.bind = tipc_bind,
.connect = tipc_connect,
- .socketpair = sock_no_socketpair,
+ .socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
.poll = tipc_poll,
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 271cd66e4b3b..0bf91cd3733c 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -54,8 +54,6 @@ struct tipc_subscriber {
static void tipc_subscrp_delete(struct tipc_subscription *sub);
static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
-static void tipc_subscrp_put(struct tipc_subscription *subscription);
-static void tipc_subscrp_get(struct tipc_subscription *subscription);
/**
* htohl - convert value to endianness used by destination
@@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
{
struct tipc_name_seq seq;
- tipc_subscrp_get(sub);
tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
@@ -135,7 +132,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
node);
- tipc_subscrp_put(sub);
}
static void tipc_subscrp_timeout(unsigned long data)
@@ -145,6 +141,7 @@ static void tipc_subscrp_timeout(unsigned long data)
spin_lock_bh(&subscriber->lock);
tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
spin_unlock_bh(&subscriber->lock);
/* Notify subscriber of timeout */
@@ -177,20 +174,17 @@ static void tipc_subscrp_kref_release(struct kref *kref)
struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
struct tipc_subscriber *subscriber = sub->subscriber;
- spin_lock_bh(&subscriber->lock);
- list_del(&sub->subscrp_list);
atomic_dec(&tn->subscription_count);
- spin_unlock_bh(&subscriber->lock);
kfree(sub);
tipc_subscrb_put(subscriber);
}
-static void tipc_subscrp_put(struct tipc_subscription *subscription)
+void tipc_subscrp_put(struct tipc_subscription *subscription)
{
kref_put(&subscription->kref, tipc_subscrp_kref_release);
}
-static void tipc_subscrp_get(struct tipc_subscription *subscription)
+void tipc_subscrp_get(struct tipc_subscription *subscription)
{
kref_get(&subscription->kref);
}
@@ -210,11 +204,8 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
continue;
tipc_nametbl_unsubscribe(sub);
- tipc_subscrp_get(sub);
- spin_unlock_bh(&subscriber->lock);
+ list_del(&sub->subscrp_list);
tipc_subscrp_delete(sub);
- tipc_subscrp_put(sub);
- spin_lock_bh(&subscriber->lock);
if (s)
break;
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index ffdc214c117a..ee52957dc952 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap);
int tipc_topsrv_start(struct net *net);
void tipc_topsrv_stop(struct net *net);
+void tipc_subscrp_put(struct tipc_subscription *subscription);
+void tipc_subscrp_get(struct tipc_subscription *subscription);
+
#endif
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 928691c43408..6a7fe7660551 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -996,7 +996,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
- struct path path = { NULL, NULL };
+ struct path path = { };
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 4be4fbbc0b50..10ae7823a19d 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -96,31 +96,23 @@ static int PROTOCOL_OVERRIDE = -1;
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
{
- int err;
-
switch (vmci_error) {
case VMCI_ERROR_NO_MEM:
- err = ENOMEM;
- break;
+ return -ENOMEM;
case VMCI_ERROR_DUPLICATE_ENTRY:
case VMCI_ERROR_ALREADY_EXISTS:
- err = EADDRINUSE;
- break;
+ return -EADDRINUSE;
case VMCI_ERROR_NO_ACCESS:
- err = EPERM;
- break;
+ return -EPERM;
case VMCI_ERROR_NO_RESOURCES:
- err = ENOBUFS;
- break;
+ return -ENOBUFS;
case VMCI_ERROR_INVALID_RESOURCE:
- err = EHOSTUNREACH;
- break;
+ return -EHOSTUNREACH;
case VMCI_ERROR_INVALID_ARGS:
default:
- err = EINVAL;
+ break;
}
-
- return err > 0 ? -err : err;
+ return -EINVAL;
}
static u32 vmci_transport_peer_rid(u32 peer_cid)
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 16b6b5988be9..570a2b67ca10 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev)
/* Age scan results with time spent in suspend */
cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
- if (rdev->ops->resume) {
- rtnl_lock();
- if (rdev->wiphy.registered)
- ret = rdev_resume(rdev);
- rtnl_unlock();
- }
+ rtnl_lock();
+ if (rdev->wiphy.registered && rdev->ops->resume)
+ ret = rdev_resume(rdev);
+ rtnl_unlock();
return ret;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8e696eafd067..4f7e62ddc17e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -412,7 +412,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
up = nla_data(rp);
ulen = xfrm_replay_state_esn_len(up);
- if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen)
+ /* Check the overall length and the internal bitmap length to avoid
+ * potential overflow. */
+ if (nla_len(rp) < ulen ||
+ xfrm_replay_state_esn_len(replay_esn) != ulen ||
+ replay_esn->bmp_len != up->bmp_len)
+ return -EINVAL;
+
+ if (up->replay_window > up->bmp_len * sizeof(__u32) * 8)
return -EINVAL;
return 0;