diff options
Diffstat (limited to 'net')
149 files changed, 3625 insertions, 1411 deletions
diff --git a/net/Makefile b/net/Makefile index 9b681550e3a3..9086ffbb5085 100644 --- a/net/Makefile +++ b/net/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ +obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 71343d0fec94..495ba7cdcb04 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -679,15 +679,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_forw_packet *forw_packet_aggr; + struct sk_buff *skb; unsigned char *skb_buff; unsigned int skb_size; atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; - forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, - queue_left, bat_priv); - if (!forw_packet_aggr) - return; - if (atomic_read(&bat_priv->aggregated_ogms) && packet_len < BATADV_MAX_AGGREGATION_BYTES) skb_size = BATADV_MAX_AGGREGATION_BYTES; @@ -696,9 +692,14 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb_size += ETH_HLEN; - forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); - if (!forw_packet_aggr->skb) { - batadv_forw_packet_free(forw_packet_aggr, true); + skb = netdev_alloc_skb_ip_align(NULL, skb_size); + if (!skb) + return; + + forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, + queue_left, bat_priv, skb); + if (!forw_packet_aggr) { + kfree_skb(skb); return; } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ba8420d8a992..d07e89ec8467 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -395,7 +395,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): CLAIM %pM on vid %d\n", mac, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame @@ -404,7 +404,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: /* announcement frame @@ -413,7 +413,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, ether_addr_copy(hw_src, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", - ethhdr->h_source, BATADV_PRINT_VID(vid)); + ethhdr->h_source, batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame @@ -425,14 +425,14 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; case BATADV_CLAIM_TYPE_LOOPDETECT: ether_addr_copy(ethhdr->h_source, mac); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): LOOPDETECT of %pM to %pM on vid %d\n", ethhdr->h_source, ethhdr->h_dest, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); break; } @@ -475,9 +475,9 @@ static void batadv_bla_loopdetect_report(struct work_struct *work) batadv_info(bat_priv->soft_iface, "Possible loop on VLAN %d detected which can't be handled by BLA - please check your network setup!\n", - BATADV_PRINT_VID(backbone_gw->vid)); + batadv_print_vid(backbone_gw->vid)); snprintf(vid_str, sizeof(vid_str), "%d", - BATADV_PRINT_VID(backbone_gw->vid)); + batadv_print_vid(backbone_gw->vid)); vid_str[sizeof(vid_str) - 1] = 0; batadv_throw_uevent(bat_priv, BATADV_UEV_BLA, BATADV_UEV_LOOPDETECT, @@ -510,7 +510,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", - orig, BATADV_PRINT_VID(vid)); + orig, batadv_print_vid(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -719,7 +719,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, BATADV_PRINT_VID(vid)); + mac, batadv_print_vid(vid)); kref_get(&claim->refcount); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, @@ -739,8 +739,8 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, goto claim_free_ref; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_add_claim(): changing ownership for %pM, vid %d\n", - mac, BATADV_PRINT_VID(vid)); + "bla_add_claim(): changing ownership for %pM, vid %d to gw %pM\n", + mac, batadv_print_vid(vid), backbone_gw->orig); remove_crc = true; } @@ -809,7 +809,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, return; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", - mac, BATADV_PRINT_VID(vid)); + mac, batadv_print_vid(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); @@ -849,7 +849,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - BATADV_PRINT_VID(vid), backbone_gw->orig, crc); + batadv_print_vid(vid), backbone_gw->orig, crc); spin_lock_bh(&backbone_gw->crc_lock); backbone_crc = backbone_gw->crc; @@ -859,7 +859,7 @@ static bool batadv_handle_announce(struct batadv_priv *bat_priv, u8 *an_addr, batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", backbone_gw->orig, - BATADV_PRINT_VID(backbone_gw->vid), + batadv_print_vid(backbone_gw->vid), backbone_crc, crc); batadv_bla_send_request(backbone_gw); @@ -904,7 +904,7 @@ static bool batadv_handle_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", - BATADV_PRINT_VID(vid), ethhdr->h_source); + batadv_print_vid(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return true; @@ -941,7 +941,7 @@ static bool batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", - claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); + claim_addr, batadv_print_vid(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_put(backbone_gw); @@ -1161,7 +1161,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, + ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst); if (ret < 2) @@ -1197,7 +1197,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); + ethhdr->h_source, batadv_print_vid(vid), hw_src, hw_dst); return true; } @@ -1295,10 +1295,13 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv, goto skip; batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_purge_claims(): %pM, vid %d, time out\n", - claim->addr, claim->vid); + "bla_purge_claims(): timed out.\n"); purge_now: + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "bla_purge_claims(): %pM, vid %d\n", + claim->addr, claim->vid); + batadv_handle_unclaim(bat_priv, primary_if, backbone_gw->orig, claim->addr, claim->vid); @@ -1846,6 +1849,13 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, /* possible optimization: race for a claim */ /* No claim exists yet, claim it for us! */ + + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "bla_rx(): Unclaimed MAC %pM found. Claim it. Local: %s\n", + ethhdr->h_source, + batadv_is_my_client(bat_priv, + ethhdr->h_source, vid) ? + "yes" : "no"); batadv_handle_claim(bat_priv, primary_if, primary_if->net_dev->dev_addr, ethhdr->h_source, vid); @@ -1963,10 +1973,22 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, /* if yes, the client has roamed and we have * to unclaim it. */ - batadv_handle_unclaim(bat_priv, primary_if, - primary_if->net_dev->dev_addr, - ethhdr->h_source, vid); - goto allow; + if (batadv_has_timed_out(claim->lasttime, 100)) { + /* only unclaim if the last claim entry is + * older than 100 ms to make sure we really + * have a roaming client here. + */ + batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Roaming client %pM detected. Unclaim it.\n", + ethhdr->h_source); + batadv_handle_unclaim(bat_priv, primary_if, + primary_if->net_dev->dev_addr, + ethhdr->h_source, vid); + goto allow; + } else { + batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_tx(): Race for claim %pM detected. Drop packet.\n", + ethhdr->h_source); + goto handled; + } } /* check if it is a multicast/broadcast frame */ @@ -2042,7 +2064,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) backbone_crc = backbone_gw->crc; spin_unlock_bh(&backbone_gw->crc_lock); seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", - claim->addr, BATADV_PRINT_VID(claim->vid), + claim->addr, batadv_print_vid(claim->vid), backbone_gw->orig, (is_own ? 'x' : ' '), backbone_crc); @@ -2274,7 +2296,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", backbone_gw->orig, - BATADV_PRINT_VID(backbone_gw->vid), secs, + batadv_print_vid(backbone_gw->vid), secs, msecs, backbone_crc); } rcu_read_unlock(); @@ -2449,3 +2471,52 @@ out: return ret; } + +#ifdef CONFIG_BATMAN_ADV_DAT +/** + * batadv_bla_check_claim - check if address is claimed + * + * @bat_priv: the bat priv with all the soft interface information + * @addr: mac address of which the claim status is checked + * @vid: the VLAN ID + * + * addr is checked if this address is claimed by the local device itself. + * + * Return: true if bla is disabled or the mac is claimed by the device, + * false if the device addr is already claimed by another gateway + */ +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, + u8 *addr, unsigned short vid) +{ + struct batadv_bla_claim search_claim; + struct batadv_bla_claim *claim = NULL; + struct batadv_hard_iface *primary_if = NULL; + bool ret = true; + + if (!atomic_read(&bat_priv->bridge_loop_avoidance)) + return ret; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + return ret; + + /* First look if the mac address is claimed */ + ether_addr_copy(search_claim.addr, addr); + search_claim.vid = vid; + + claim = batadv_claim_hash_find(bat_priv, &search_claim); + + /* If there is a claim and we are not owner of the claim, + * return false. + */ + if (claim) { + if (!batadv_compare_eth(claim->backbone_gw->orig, + primary_if->net_dev->dev_addr)) + ret = false; + batadv_claim_put(claim); + } + + batadv_hardif_put(primary_if); + return ret; +} +#endif diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index e157986bd01c..234775748b8e 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -69,6 +69,10 @@ void batadv_bla_status_update(struct net_device *net_dev); int batadv_bla_init(struct batadv_priv *bat_priv); void batadv_bla_free(struct batadv_priv *bat_priv); int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); +#ifdef CONFIG_BATMAN_ADV_DAT +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, + unsigned short vid); +#endif #define BATADV_BLA_CRC_INIT 0 #else /* ifdef CONFIG_BATMAN_ADV_BLA */ @@ -145,6 +149,13 @@ static inline int batadv_bla_backbone_dump(struct sk_buff *msg, return -EOPNOTSUPP; } +static inline +bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr, + unsigned short vid) +{ + return true; +} + #endif /* ifdef CONFIG_BATMAN_ADV_BLA */ #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 1bfd1dbc2feb..013e970eff39 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -43,6 +43,7 @@ #include <linux/workqueue.h> #include <net/arp.h> +#include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" @@ -330,7 +331,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, batadv_dbg(BATADV_DBG_DAT, bat_priv, "Entry updated: %pI4 %pM (vid: %d)\n", &dat_entry->ip, dat_entry->mac_addr, - BATADV_PRINT_VID(vid)); + batadv_print_vid(vid)); goto out; } @@ -356,7 +357,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, } batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM (vid: %d)\n", - &dat_entry->ip, dat_entry->mac_addr, BATADV_PRINT_VID(vid)); + &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid)); out: if (dat_entry) @@ -835,7 +836,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n", &dat_entry->ip, dat_entry->mac_addr, - BATADV_PRINT_VID(dat_entry->vid), + batadv_print_vid(dat_entry->vid), last_seen_mins, last_seen_secs); } rcu_read_unlock(); @@ -1002,6 +1003,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; + struct net_device *soft_iface = bat_priv->soft_iface; int hdr_size = 0; unsigned short vid; @@ -1040,16 +1042,30 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, goto out; } + /* If BLA is enabled, only send ARP replies if we have claimed + * the destination for the ARP request or if no one else of + * the backbone gws belonging to our backbone has claimed the + * destination. + */ + if (!batadv_bla_check_claim(bat_priv, + dat_entry->mac_addr, vid)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Device %pM claimed by another backbone gw. Don't send ARP reply!", + dat_entry->mac_addr); + ret = true; + goto out; + } + skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src, dat_entry->mac_addr, hw_src, vid); if (!skb_new) goto out; - skb_new->protocol = eth_type_trans(skb_new, - bat_priv->soft_iface); - bat_priv->stats.rx_packets++; - bat_priv->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size; + skb_new->protocol = eth_type_trans(skb_new, soft_iface); + + soft_iface->stats.rx_packets++; + soft_iface->stats.rx_bytes += skb->len + ETH_HLEN + hdr_size; netif_rx(skb_new); batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); @@ -1188,6 +1204,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) { + struct batadv_dat_entry *dat_entry = NULL; u16 type; __be32 ip_src, ip_dst; u8 *hw_src, *hw_dst; @@ -1210,12 +1227,41 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, hw_dst = batadv_arp_hw_dst(skb, hdr_size); ip_dst = batadv_arp_ip_dst(skb, hdr_size); + /* If ip_dst is already in cache and has the right mac address, + * drop this frame if this ARP reply is destined for us because it's + * most probably an ARP reply generated by another node of the DHT. + * We have most probably received already a reply earlier. Delivering + * this frame would lead to doubled receive of an ARP reply. + */ + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_src, vid); + if (dat_entry && batadv_compare_eth(hw_src, dat_entry->mac_addr)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, "Doubled ARP reply removed: ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]; dat_entry: %pM-%pI4\n", + hw_src, &ip_src, hw_dst, &ip_dst, + dat_entry->mac_addr, &dat_entry->ip); + dropped = true; + goto out; + } + /* Update our internal cache with both the IP addresses the node got * within the ARP reply */ batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); + /* If BLA is enabled, only forward ARP replies if we have claimed the + * source of the ARP reply or if no one else of the same backbone has + * already claimed that client. This prevents that different gateways + * to the same backbone all forward the ARP reply leading to multiple + * replies in the backbone. + */ + if (!batadv_bla_check_claim(bat_priv, hw_src, vid)) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Device %pM claimed by another backbone gw. Drop ARP reply.\n", + hw_src); + dropped = true; + goto out; + } + /* if this REPLY is directed to a client of mine, let's deliver the * packet to the interface */ @@ -1228,6 +1274,8 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, out: if (dropped) kfree_skb(skb); + if (dat_entry) + batadv_dat_entry_put(dat_entry); /* if dropped == false -> deliver to the interface */ return dropped; } @@ -1256,7 +1304,7 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, /* If this packet is an ARP_REQUEST and the node already has the * information that it is going to ask, then the packet can be dropped */ - if (forw_packet->num_packets) + if (batadv_forw_packet_is_rebroadcast(forw_packet)) goto out; vid = batadv_dat_get_vid(forw_packet->skb, &hdr_size); diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 7a2b9f4da078..65ce97efa6b5 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -73,9 +73,10 @@ __printf(2, 3); /* possibly ratelimited debug output */ #define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ do { \ - if (atomic_read(&(bat_priv)->log_level) & (type) && \ + struct batadv_priv *__batpriv = (bat_priv); \ + if (atomic_read(&__batpriv->log_level) & (type) && \ (!(ratelimited) || net_ratelimit())) \ - batadv_debug_log(bat_priv, fmt, ## arg); \ + batadv_debug_log(__batpriv, fmt, ## arg); \ } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5000c540614d..fb381fb26a66 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -516,6 +516,9 @@ static void batadv_recv_handler_init(void) BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12); BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8); + i = FIELD_SIZEOF(struct sk_buff, cb); + BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i); + /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 57a8103dbce7..810f7d026f54 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.0" +#define BATADV_SOURCE_VERSION "2017.1" #endif /* B.A.T.M.A.N. parameters */ @@ -193,6 +193,7 @@ enum batadv_uev_type { #include <linux/percpu.h> #include <linux/types.h> +#include "packet.h" #include "types.h" struct net_device; @@ -200,8 +201,19 @@ struct packet_type; struct seq_file; struct sk_buff; -#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \ - (int)((vid) & VLAN_VID_MASK) : -1) +/** + * batadv_print_vid - return printable version of vid information + * @vid: the VLAN identifier + * + * Return: -1 when no VLAN is used, VLAN id otherwise + */ +static inline int batadv_print_vid(unsigned short vid) +{ + if (vid & BATADV_VLAN_HAS_TAG) + return (int)(vid & VLAN_VID_MASK); + else + return -1; +} extern struct list_head batadv_hardif_list; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 952ba81a565b..d327670641ac 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -494,9 +494,8 @@ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) if (!bridged) goto update; -#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) - pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); -#endif + if (!IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)) + pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP); querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP); @@ -671,7 +670,6 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, return 0; } -#if IS_ENABLED(CONFIG_IPV6) /** * batadv_mcast_is_report_ipv6 - check for MLD reports * @skb: the ethernet frame destined for the mesh @@ -736,7 +734,6 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, return 0; } -#endif /** * batadv_mcast_forw_mode_check - check for optimized forwarding potential @@ -765,11 +762,12 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, case ETH_P_IP: return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb, is_unsnoopable); -#if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: + if (!IS_ENABLED(CONFIG_IPV6)) + return -EINVAL; + return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb, is_unsnoopable); -#endif default: return -EINVAL; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7fd740b6e36d..e1ebe14ee2a6 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -941,15 +941,17 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_unicast_4addr_packet *unicast_4addr_packet; - u8 *orig_addr; - struct batadv_orig_node *orig_node = NULL; + u8 *orig_addr, *orig_addr_gw; + struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - bool is4addr; + struct ethhdr *ethhdr; int ret = NET_RX_DROP; + bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + ethhdr = eth_hdr(skb); is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ @@ -972,6 +974,23 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { + /* If this is a unicast packet from another backgone gw, + * drop it. + */ + orig_addr_gw = ethhdr->h_source; + orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); + if (orig_node_gw) { + is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, + hdr_size); + batadv_orig_node_put(orig_node_gw); + if (is_gw) { + batadv_dbg(BATADV_DBG_BLA, bat_priv, + "recv_unicast_packet(): Dropped unicast pkt received from another backbone gw %pM.\n", + orig_addr_gw); + return NET_RX_DROP; + } + } + if (is4addr) { subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 1489ec27daff..403df596a73d 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -482,6 +482,7 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, * @if_outgoing: The (optional) if_outgoing to be grabbed * @queue_left: The (optional) queue counter to decrease * @bat_priv: The bat_priv for the mesh of this forw_packet + * @skb: The raw packet this forwarding packet shall contain * * Allocates a forwarding packet and tries to get a reference to the * (optional) if_incoming, if_outgoing and queue_left. If queue_left @@ -493,7 +494,8 @@ struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, atomic_t *queue_left, - struct batadv_priv *bat_priv) + struct batadv_priv *bat_priv, + struct sk_buff *skb) { struct batadv_forw_packet *forw_packet; const char *qname; @@ -525,7 +527,7 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, INIT_HLIST_NODE(&forw_packet->list); INIT_HLIST_NODE(&forw_packet->cleanup_list); - forw_packet->skb = NULL; + forw_packet->skb = skb; forw_packet->queue_left = queue_left; forw_packet->if_incoming = if_incoming; forw_packet->if_outgoing = if_outgoing; @@ -756,22 +758,23 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, if (!primary_if) goto err; + newskb = skb_copy(skb, GFP_ATOMIC); + if (!newskb) { + batadv_hardif_put(primary_if); + goto err; + } + forw_packet = batadv_forw_packet_alloc(primary_if, NULL, &bat_priv->bcast_queue_left, - bat_priv); + bat_priv, newskb); batadv_hardif_put(primary_if); if (!forw_packet) - goto err; - - newskb = skb_copy(skb, GFP_ATOMIC); - if (!newskb) goto err_packet_free; /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; bcast_packet->ttl--; - forw_packet->skb = newskb; forw_packet->own = own_packet; INIT_DELAYED_WORK(&forw_packet->delayed_work, @@ -781,11 +784,60 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, return NETDEV_TX_OK; err_packet_free: - batadv_forw_packet_free(forw_packet, true); + kfree_skb(newskb); err: return NETDEV_TX_BUSY; } +/** + * batadv_forw_packet_bcasts_left - check if a retransmission is necessary + * @forw_packet: the forwarding packet to check + * @hard_iface: the interface to check on + * + * Checks whether a given packet has any (re)transmissions left on the provided + * interface. + * + * hard_iface may be NULL: In that case the number of transmissions this skb had + * so far is compared with the maximum amount of retransmissions independent of + * any interface instead. + * + * Return: True if (re)transmissions are left, false otherwise. + */ +static bool +batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet, + struct batadv_hard_iface *hard_iface) +{ + unsigned int max; + + if (hard_iface) + max = hard_iface->num_bcasts; + else + max = BATADV_NUM_BCASTS_MAX; + + return BATADV_SKB_CB(forw_packet->skb)->num_bcasts < max; +} + +/** + * batadv_forw_packet_bcasts_inc - increment retransmission counter of a packet + * @forw_packet: the packet to increase the counter for + */ +static void +batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet) +{ + BATADV_SKB_CB(forw_packet->skb)->num_bcasts++; +} + +/** + * batadv_forw_packet_is_rebroadcast - check packet for previous transmissions + * @forw_packet: the packet to check + * + * Return: True if this packet was transmitted before, false otherwise. + */ +bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet) +{ + return BATADV_SKB_CB(forw_packet->skb)->num_bcasts > 0; +} + static void batadv_send_outstanding_bcast_packet(struct work_struct *work) { struct batadv_hard_iface *hard_iface; @@ -826,7 +878,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (hard_iface->soft_iface != soft_iface) continue; - if (forw_packet->num_packets >= hard_iface->num_bcasts) + if (!batadv_forw_packet_bcasts_left(forw_packet, hard_iface)) continue; if (forw_packet->own) { @@ -884,10 +936,10 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) } rcu_read_unlock(); - forw_packet->num_packets++; + batadv_forw_packet_bcasts_inc(forw_packet); /* if we still have some more bcasts to send */ - if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { + if (batadv_forw_packet_bcasts_left(forw_packet, NULL)) { batadv_forw_packet_bcast_queue(bat_priv, forw_packet, send_time); return; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index f21166d10323..a16b34f473ef 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -34,11 +34,13 @@ struct batadv_forw_packet * batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, atomic_t *queue_left, - struct batadv_priv *bat_priv); + struct batadv_priv *bat_priv, + struct sk_buff *skb); bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l); void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time); +bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet); int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index d042c99af028..b25789abf7b9 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -64,28 +64,6 @@ #include "sysfs.h" #include "translation-table.h" -static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); -static void batadv_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info); -static u32 batadv_get_msglevel(struct net_device *dev); -static void batadv_set_msglevel(struct net_device *dev, u32 value); -static u32 batadv_get_link(struct net_device *dev); -static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data); -static void batadv_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data); -static int batadv_get_sset_count(struct net_device *dev, int stringset); - -static const struct ethtool_ops batadv_ethtool_ops = { - .get_settings = batadv_get_settings, - .get_drvinfo = batadv_get_drvinfo, - .get_msglevel = batadv_get_msglevel, - .set_msglevel = batadv_set_msglevel, - .get_link = batadv_get_link, - .get_strings = batadv_get_strings, - .get_ethtool_stats = batadv_get_ethtool_stats, - .get_sset_count = batadv_get_sset_count, -}; - int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) { int result; @@ -140,7 +118,7 @@ static u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) static struct net_device_stats *batadv_interface_stats(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); - struct net_device_stats *stats = &bat_priv->stats; + struct net_device_stats *stats = &dev->stats; stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX); stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES); @@ -230,6 +208,9 @@ static int batadv_interface_tx(struct sk_buff *skb, if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; + /* reset control block to avoid left overs from previous users */ + memset(skb->cb, 0, sizeof(struct batadv_skb_cb)); + netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); @@ -947,6 +928,98 @@ static const struct net_device_ops batadv_netdev_ops = { .ndo_del_slave = batadv_softif_slave_del, }; +static void batadv_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); + strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); +} + +/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 + * Declare each description string in struct.name[] to get fixed sized buffer + * and compile time checking for strings longer than ETH_GSTRING_LEN. + */ +static const struct { + const char name[ETH_GSTRING_LEN]; +} batadv_counters_strings[] = { + { "tx" }, + { "tx_bytes" }, + { "tx_dropped" }, + { "rx" }, + { "rx_bytes" }, + { "forward" }, + { "forward_bytes" }, + { "mgmt_tx" }, + { "mgmt_tx_bytes" }, + { "mgmt_rx" }, + { "mgmt_rx_bytes" }, + { "frag_tx" }, + { "frag_tx_bytes" }, + { "frag_rx" }, + { "frag_rx_bytes" }, + { "frag_fwd" }, + { "frag_fwd_bytes" }, + { "tt_request_tx" }, + { "tt_request_rx" }, + { "tt_response_tx" }, + { "tt_response_rx" }, + { "tt_roam_adv_tx" }, + { "tt_roam_adv_rx" }, +#ifdef CONFIG_BATMAN_ADV_DAT + { "dat_get_tx" }, + { "dat_get_rx" }, + { "dat_put_tx" }, + { "dat_put_rx" }, + { "dat_cached_reply_tx" }, +#endif +#ifdef CONFIG_BATMAN_ADV_NC + { "nc_code" }, + { "nc_code_bytes" }, + { "nc_recode" }, + { "nc_recode_bytes" }, + { "nc_buffer" }, + { "nc_decode" }, + { "nc_decode_bytes" }, + { "nc_decode_failed" }, + { "nc_sniffed" }, +#endif +}; + +static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, batadv_counters_strings, + sizeof(batadv_counters_strings)); +} + +static void batadv_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + int i; + + for (i = 0; i < BATADV_CNT_NUM; i++) + data[i] = batadv_sum_counter(bat_priv, i); +} + +static int batadv_get_sset_count(struct net_device *dev, int stringset) +{ + if (stringset == ETH_SS_STATS) + return BATADV_CNT_NUM; + + return -EOPNOTSUPP; +} + +static const struct ethtool_ops batadv_ethtool_ops = { + .get_drvinfo = batadv_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = batadv_get_strings, + .get_ethtool_stats = batadv_get_ethtool_stats, + .get_sset_count = batadv_get_sset_count, +}; + /** * batadv_softif_free - Deconstructor of batadv_soft_interface * @dev: Device to cleanup and remove @@ -971,8 +1044,6 @@ static void batadv_softif_free(struct net_device *dev) */ static void batadv_softif_init_early(struct net_device *dev) { - struct batadv_priv *priv = netdev_priv(dev); - ether_setup(dev); dev->netdev_ops = &batadv_netdev_ops; @@ -989,8 +1060,6 @@ static void batadv_softif_init_early(struct net_device *dev) eth_hw_addr_random(dev); dev->ethtool_ops = &batadv_ethtool_ops; - - memset(priv, 0, sizeof(*priv)); } struct net_device *batadv_softif_create(struct net *net, const char *name) @@ -1083,118 +1152,3 @@ struct rtnl_link_ops batadv_link_ops __read_mostly = { .setup = batadv_softif_init_early, .dellink = batadv_softif_destroy_netlink, }; - -/* ethtool */ -static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - cmd->supported = 0; - cmd->advertising = 0; - ethtool_cmd_speed_set(cmd, SPEED_10); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_TP; - cmd->phy_address = 0; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_DISABLE; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; - - return 0; -} - -static void batadv_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); - strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); - strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); -} - -static u32 batadv_get_msglevel(struct net_device *dev) -{ - return -EOPNOTSUPP; -} - -static void batadv_set_msglevel(struct net_device *dev, u32 value) -{ -} - -static u32 batadv_get_link(struct net_device *dev) -{ - return 1; -} - -/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 - * Declare each description string in struct.name[] to get fixed sized buffer - * and compile time checking for strings longer than ETH_GSTRING_LEN. - */ -static const struct { - const char name[ETH_GSTRING_LEN]; -} batadv_counters_strings[] = { - { "tx" }, - { "tx_bytes" }, - { "tx_dropped" }, - { "rx" }, - { "rx_bytes" }, - { "forward" }, - { "forward_bytes" }, - { "mgmt_tx" }, - { "mgmt_tx_bytes" }, - { "mgmt_rx" }, - { "mgmt_rx_bytes" }, - { "frag_tx" }, - { "frag_tx_bytes" }, - { "frag_rx" }, - { "frag_rx_bytes" }, - { "frag_fwd" }, - { "frag_fwd_bytes" }, - { "tt_request_tx" }, - { "tt_request_rx" }, - { "tt_response_tx" }, - { "tt_response_rx" }, - { "tt_roam_adv_tx" }, - { "tt_roam_adv_rx" }, -#ifdef CONFIG_BATMAN_ADV_DAT - { "dat_get_tx" }, - { "dat_get_rx" }, - { "dat_put_tx" }, - { "dat_put_rx" }, - { "dat_cached_reply_tx" }, -#endif -#ifdef CONFIG_BATMAN_ADV_NC - { "nc_code" }, - { "nc_code_bytes" }, - { "nc_recode" }, - { "nc_recode_bytes" }, - { "nc_buffer" }, - { "nc_decode" }, - { "nc_decode_bytes" }, - { "nc_decode_failed" }, - { "nc_sniffed" }, -#endif -}; - -static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data) -{ - if (stringset == ETH_SS_STATS) - memcpy(data, batadv_counters_strings, - sizeof(batadv_counters_strings)); -} - -static void batadv_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct batadv_priv *bat_priv = netdev_priv(dev); - int i; - - for (i = 0; i < BATADV_CNT_NUM; i++) - data[i] = batadv_sum_counter(bat_priv, i); -} - -static int batadv_get_sset_count(struct net_device *dev, int stringset) -{ - if (stringset == ETH_SS_STATS) - return BATADV_CNT_NUM; - - return -EOPNOTSUPP; -} diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index c94ebdecdc3d..556f9a865ddf 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -873,8 +873,8 @@ static int batadv_tp_send(void *arg) /* something went wrong during the preparation/transmission */ if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) { batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: batadv_tp_send() cannot send packets (%d)\n", - err); + "Meter: %s() cannot send packets (%d)\n", + __func__, err); /* ensure nobody else tries to stop the thread now */ if (atomic_dec_and_test(&tp_vars->sending)) tp_vars->reason = err; @@ -979,7 +979,8 @@ void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, if (!tp_vars) { spin_unlock_bh(&bat_priv->tp_list_lock); batadv_dbg(BATADV_DBG_TP_METER, bat_priv, - "Meter: batadv_tp_start cannot allocate list elements\n"); + "Meter: %s cannot allocate list elements\n", + __func__); batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, dst, bat_priv, session_cookie); return; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 6077a87d46f0..e75b4937b497 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -617,7 +617,7 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(tt_global->common.vid), message); + batadv_print_vid(tt_global->common.vid), message); batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_tt, &tt_global->common); @@ -671,7 +671,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Re-adding pending client %pM (vid: %d)\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); /* whatever the reason why the PENDING flag was set, * this is a client which was enqueued to be removed in * this orig_interval. Since it popped up again, the @@ -684,7 +684,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Roaming client %pM (vid: %d) came back to its original location\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); /* the ROAM flag is set because this client roamed away * and the node got a roaming_advertisement message. Now * that the client popped up again at its original @@ -716,7 +716,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (!vlan) { net_ratelimited_function(batadv_info, soft_iface, "adding TT local entry %pM to non-existent VLAN %d\n", - addr, BATADV_PRINT_VID(vid)); + addr, batadv_print_vid(vid)); kmem_cache_free(batadv_tl_cache, tt_local); tt_local = NULL; goto out; @@ -724,7 +724,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", - addr, BATADV_PRINT_VID(vid), + addr, batadv_print_vid(vid), (u8)atomic_read(&bat_priv->tt.vn)); ether_addr_copy(tt_local->common.addr, addr); @@ -1097,7 +1097,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, " * %pM %4i [%c%c%c%c%c%c] %3u.%03u (%#.8x)\n", tt_common_entry->addr, - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), ((tt_common_entry->flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), no_purge ? 'P' : '.', @@ -1296,7 +1296,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Local tt entry (%pM, vid: %d) pending to be removed: %s\n", tt_local_entry->common.addr, - BATADV_PRINT_VID(tt_local_entry->common.vid), message); + batadv_print_vid(tt_local_entry->common.vid), message); } /** @@ -1727,7 +1727,7 @@ add_orig_entry: batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (vid: %d, via %pM)\n", - common->addr, BATADV_PRINT_VID(common->vid), + common->addr, batadv_print_vid(common->vid), orig_node->orig); ret = true; @@ -1835,7 +1835,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, if (!vlan) { seq_printf(seq, " * Cannot retrieve VLAN %d for originator %pM\n", - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), best_entry->orig_node->orig); goto print_list; } @@ -1844,7 +1844,7 @@ batadv_tt_global_print_entry(struct batadv_priv *bat_priv, seq_printf(seq, " %c %pM %4i (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '*', tt_global_entry->common.addr, - BATADV_PRINT_VID(tt_global_entry->common.vid), + batadv_print_vid(tt_global_entry->common.vid), best_entry->ttvn, best_entry->orig_node->orig, last_ttvn, vlan->tt.crc, ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), @@ -1867,7 +1867,7 @@ print_list: if (!vlan) { seq_printf(seq, " + Cannot retrieve VLAN %d for originator %pM\n", - BATADV_PRINT_VID(tt_common_entry->vid), + batadv_print_vid(tt_common_entry->vid), orig_entry->orig_node->orig); continue; } @@ -1876,7 +1876,7 @@ print_list: seq_printf(seq, " %c %pM %4d (%3u) via %pM (%3u) (%#.8x) [%c%c%c%c]\n", '+', tt_global_entry->common.addr, - BATADV_PRINT_VID(tt_global_entry->common.vid), + batadv_print_vid(tt_global_entry->common.vid), orig_entry->ttvn, orig_entry->orig_node->orig, last_ttvn, vlan->tt.crc, ((flags & BATADV_TT_CLIENT_ROAM) ? 'R' : '.'), @@ -2213,7 +2213,7 @@ batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv, "Deleting %pM from global tt entry %pM (vid: %d): %s\n", orig_node->orig, tt_global_entry->common.addr, - BATADV_PRINT_VID(vid), message); + batadv_print_vid(vid), message); _batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry); } @@ -2253,12 +2253,13 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, /* its the last one, mark for roaming. */ tt_global_entry->common.flags |= BATADV_TT_CLIENT_ROAM; tt_global_entry->roam_at = jiffies; - } else + } else { /* there is another entry, we can simply delete this * one and can still use the other one. */ batadv_tt_global_del_orig_node(bat_priv, tt_global_entry, orig_node, message); + } } /** @@ -2314,10 +2315,11 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, /* local entry exists, case 2: client roamed to us. */ batadv_tt_global_del_orig_list(tt_global_entry); batadv_tt_global_free(bat_priv, tt_global_entry, message); - } else + } else { /* no local entry exists, case 1: check for roaming */ batadv_tt_global_del_roaming(bat_priv, tt_global_entry, orig_node, message); + } out: if (tt_global_entry) @@ -2375,7 +2377,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(vid), message); + batadv_print_vid(vid), message); hlist_del_rcu(&tt_common_entry->hash_entry); batadv_tt_global_entry_put(tt_global); } @@ -2435,7 +2437,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv) batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, - BATADV_PRINT_VID(tt_global->common.vid), + batadv_print_vid(tt_global->common.vid), msg); hlist_del_rcu(&tt_common->hash_entry); @@ -3650,7 +3652,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n", - orig_node->orig, client, BATADV_PRINT_VID(vid)); + orig_node->orig, client, batadv_print_vid(vid)); batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); @@ -3773,7 +3775,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting local tt entry (%pM, vid: %d): pending\n", tt_common->addr, - BATADV_PRINT_VID(tt_common->vid)); + batadv_print_vid(tt_common->vid)); batadv_tt_local_size_dec(bat_priv, tt_common->vid); hlist_del_rcu(&tt_common->hash_entry); @@ -4017,7 +4019,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n", - addr, BATADV_PRINT_VID(vid), orig_node->orig); + addr, batadv_print_vid(vid), orig_node->orig); ret = true; out: return ret; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 246f21b4973b..ea43a6449247 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1000,7 +1000,6 @@ struct batadv_priv_bat_v { * struct batadv_priv - per mesh interface data * @mesh_state: current status of the mesh (inactive/active/deactivating) * @soft_iface: net device which holds this struct as private data - * @stats: structure holding the data for the ndo_get_stats() call * @bat_counters: mesh internal traffic statistic counters (see batadv_counters) * @aggregated_ogms: bool indicating whether OGM aggregation is enabled * @bonding: bool indicating whether traffic bonding is enabled @@ -1055,7 +1054,6 @@ struct batadv_priv_bat_v { struct batadv_priv { atomic_t mesh_state; struct net_device *soft_iface; - struct net_device_stats stats; u64 __percpu *bat_counters; /* Per cpu counters */ atomic_t aggregated_ogms; atomic_t bonding; @@ -1377,9 +1375,11 @@ struct batadv_nc_packet { * relevant to batman-adv in the skb->cb buffer in skbs. * @decoded: Marks a skb as decoded, which is checked when searching for coding * opportunities in network-coding.c + * @num_bcasts: Counter for broadcast packet retransmissions */ struct batadv_skb_cb { bool decoded; + unsigned int num_bcasts; }; /** @@ -1392,7 +1392,7 @@ struct batadv_skb_cb { * @skb: bcast packet's skb buffer * @packet_len: size of aggregated OGM packet inside the skb buffer * @direct_link_flags: direct link flags for aggregated OGM packets - * @num_packets: counter for bcast packet retransmission + * @num_packets: counter for aggregated OGMv1 packets * @delayed_work: work queue callback item for packet sending * @if_incoming: pointer to incoming hard-iface or primary iface if * locally generated packet diff --git a/net/bpf/Makefile b/net/bpf/Makefile new file mode 100644 index 000000000000..27b2992a0692 --- /dev/null +++ b/net/bpf/Makefile @@ -0,0 +1 @@ +obj-y := test_run.o diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c new file mode 100644 index 000000000000..8a6d0a37c30c --- /dev/null +++ b/net/bpf/test_run.c @@ -0,0 +1,172 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/etherdevice.h> +#include <linux/filter.h> +#include <linux/sched/signal.h> + +static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx) +{ + u32 ret; + + preempt_disable(); + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, ctx); + rcu_read_unlock(); + preempt_enable(); + + return ret; +} + +static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time) +{ + u64 time_start, time_spent = 0; + u32 ret = 0, i; + + if (!repeat) + repeat = 1; + time_start = ktime_get_ns(); + for (i = 0; i < repeat; i++) { + ret = bpf_test_run_one(prog, ctx); + if (need_resched()) { + if (signal_pending(current)) + break; + time_spent += ktime_get_ns() - time_start; + cond_resched(); + time_start = ktime_get_ns(); + } + } + time_spent += ktime_get_ns() - time_start; + do_div(time_spent, repeat); + *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + + return ret; +} + +static int bpf_test_finish(union bpf_attr __user *uattr, const void *data, + u32 size, u32 retval, u32 duration) +{ + void __user *data_out = u64_to_user_ptr(uattr->test.data_out); + int err = -EFAULT; + + if (data_out && copy_to_user(data_out, data, size)) + goto out; + if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) + goto out; + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) + goto out; + if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration))) + goto out; + err = 0; +out: + return err; +} + +static void *bpf_test_init(const union bpf_attr *kattr, u32 size, + u32 headroom, u32 tailroom) +{ + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + void *data; + + if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) + return ERR_PTR(-EINVAL); + + data = kzalloc(size + headroom + tailroom, GFP_USER); + if (!data) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(data + headroom, data_in, size)) { + kfree(data); + return ERR_PTR(-EFAULT); + } + return data; +} + +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + bool is_l2 = false, is_direct_pkt_access = false; + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + u32 retval, duration; + struct sk_buff *skb; + void *data; + int ret; + + data = bpf_test_init(kattr, size, NET_SKB_PAD, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + switch (prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + is_l2 = true; + /* fall through */ + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + is_direct_pkt_access = true; + break; + default: + break; + } + + skb = build_skb(data, 0); + if (!skb) { + kfree(data); + return -ENOMEM; + } + + skb_reserve(skb, NET_SKB_PAD); + __skb_put(skb, size); + skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev); + skb_reset_network_header(skb); + + if (is_l2) + __skb_push(skb, ETH_HLEN); + if (is_direct_pkt_access) + bpf_compute_data_end(skb); + retval = bpf_test_run(prog, skb, repeat, &duration); + if (!is_l2) + __skb_push(skb, ETH_HLEN); + size = skb->len; + /* bpf program can never convert linear skb to non-linear */ + if (WARN_ON_ONCE(skb_is_nonlinear(skb))) + size = skb_headlen(skb); + ret = bpf_test_finish(uattr, skb->data, size, retval, duration); + kfree_skb(skb); + return ret; +} + +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct xdp_buff xdp = {}; + u32 retval, duration; + void *data; + int ret; + + data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + xdp.data_hard_start = data; + xdp.data = data + XDP_PACKET_HEADROOM; + xdp.data_end = xdp.data + size; + + retval = bpf_test_run(prog, &xdp, repeat, &duration); + if (xdp.data != data + XDP_PACKET_HEADROOM) + size = xdp.data_end - xdp.data; + ret = bpf_test_finish(uattr, xdp.data, size, retval, duration); + kfree(data); + return ret; +} diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6e08b7199dd7..5a40a87c4f4f 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -594,6 +594,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->updated = now; if (unlikely(added_by_user)) fdb->added_by_user = 1; + /* Take over HW learned entry */ + if (unlikely(fdb->added_by_external_learn)) + fdb->added_by_external_learn = 0; if (unlikely(fdb_modified)) fdb_notify(br, fdb, RTM_NEWNEIGH); } @@ -854,6 +857,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, br_fdb_update(br, p, addr, vid, true); rcu_read_unlock(); local_bh_enable(); + } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { + err = br_fdb_external_learn_add(br, p, addr, vid); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm->ndm_state, diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8ac1770aa222..6eb52d422dd9 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -22,6 +22,7 @@ #include <linux/rtnetlink.h> #include <linux/if_ether.h> #include <linux/slab.h> +#include <net/dsa.h> #include <net/sock.h> #include <linux/if_vlan.h> #include <net/switchdev.h> diff --git a/net/can/af_can.c b/net/can/af_can.c index 5488e4a6ccd0..abf7d854a94d 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -75,9 +75,7 @@ static int stats_timer __read_mostly = 1; module_param(stats_timer, int, S_IRUGO); MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)"); -/* receive filters subscribed for 'all' CAN devices */ -struct dev_rcv_lists can_rx_alldev_list; -static DEFINE_SPINLOCK(can_rcvlists_lock); +static int can_net_id; static struct kmem_cache *rcv_cache __read_mostly; @@ -145,9 +143,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol, if (protocol < 0 || protocol >= CAN_NPROTO) return -EINVAL; - if (!net_eq(net, &init_net)) - return -EAFNOSUPPORT; - cp = can_get_proto(protocol); #ifdef CONFIG_MODULES @@ -331,10 +326,11 @@ EXPORT_SYMBOL(can_send); * af_can rx path */ -static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) +static struct dev_rcv_lists *find_dev_rcv_lists(struct net *net, + struct net_device *dev) { if (!dev) - return &can_rx_alldev_list; + return net->can.can_rx_alldev_list; else return (struct dev_rcv_lists *)dev->ml_priv; } @@ -467,9 +463,9 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * -ENOMEM on missing cache mem to create subscription entry * -ENODEV unknown device */ -int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, - void (*func)(struct sk_buff *, void *), void *data, - char *ident, struct sock *sk) +int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, + canid_t mask, void (*func)(struct sk_buff *, void *), + void *data, char *ident, struct sock *sk) { struct receiver *r; struct hlist_head *rl; @@ -481,13 +477,16 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, if (dev && dev->type != ARPHRD_CAN) return -ENODEV; + if (dev && !net_eq(net, dev_net(dev))) + return -ENODEV; + r = kmem_cache_alloc(rcv_cache, GFP_KERNEL); if (!r) return -ENOMEM; - spin_lock(&can_rcvlists_lock); + spin_lock(&net->can.can_rcvlists_lock); - d = find_dev_rcv_lists(dev); + d = find_dev_rcv_lists(net, dev); if (d) { rl = find_rcv_list(&can_id, &mask, d); @@ -510,7 +509,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, err = -ENODEV; } - spin_unlock(&can_rcvlists_lock); + spin_unlock(&net->can.can_rcvlists_lock); return err; } @@ -540,8 +539,9 @@ static void can_rx_delete_receiver(struct rcu_head *rp) * Description: * Removes subscription entry depending on given (subscription) values. */ -void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, - void (*func)(struct sk_buff *, void *), void *data) +void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, + canid_t mask, void (*func)(struct sk_buff *, void *), + void *data) { struct receiver *r = NULL; struct hlist_head *rl; @@ -550,9 +550,12 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, if (dev && dev->type != ARPHRD_CAN) return; - spin_lock(&can_rcvlists_lock); + if (dev && !net_eq(net, dev_net(dev))) + return; - d = find_dev_rcv_lists(dev); + spin_lock(&net->can.can_rcvlists_lock); + + d = find_dev_rcv_lists(net, dev); if (!d) { pr_err("BUG: receive list not found for " "dev %s, id %03X, mask %03X\n", @@ -598,7 +601,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, } out: - spin_unlock(&can_rcvlists_lock); + spin_unlock(&net->can.can_rcvlists_lock); /* schedule the receiver item for deletion */ if (r) { @@ -696,10 +699,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ - matches = can_rcv_filter(&can_rx_alldev_list, skb); + matches = can_rcv_filter(dev_net(dev)->can.can_rx_alldev_list, skb); /* find receive list for this device */ - d = find_dev_rcv_lists(dev); + d = find_dev_rcv_lists(dev_net(dev), dev); if (d) matches += can_rcv_filter(d, skb); @@ -719,9 +722,6 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(!net_eq(dev_net(dev), &init_net))) - goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || cfd->len > CAN_MAX_DLEN, @@ -743,9 +743,6 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(!net_eq(dev_net(dev), &init_net))) - goto drop; - if (WARN_ONCE(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || cfd->len > CANFD_MAX_DLEN, @@ -835,9 +832,6 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dev_rcv_lists *d; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; @@ -855,7 +849,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, break; case NETDEV_UNREGISTER: - spin_lock(&can_rcvlists_lock); + spin_lock(&dev_net(dev)->can.can_rcvlists_lock); d = dev->ml_priv; if (d) { @@ -869,7 +863,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, pr_err("can: notifier: receive list not found for dev " "%s\n", dev->name); - spin_unlock(&can_rcvlists_lock); + spin_unlock(&dev_net(dev)->can.can_rcvlists_lock); break; } @@ -877,6 +871,40 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static int can_pernet_init(struct net *net) +{ + net->can.can_rcvlists_lock = + __SPIN_LOCK_UNLOCKED(net->can.can_rcvlists_lock); + net->can.can_rx_alldev_list = + kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); + + if (IS_ENABLED(CONFIG_PROC_FS)) + can_init_proc(net); + + return 0; +} + +static void can_pernet_exit(struct net *net) +{ + struct net_device *dev; + + if (IS_ENABLED(CONFIG_PROC_FS)) + can_remove_proc(net); + + /* remove created dev_rcv_lists from still registered CAN devices */ + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + if (dev->type == ARPHRD_CAN && dev->ml_priv) { + struct dev_rcv_lists *d = dev->ml_priv; + + BUG_ON(d->entries); + kfree(d); + dev->ml_priv = NULL; + } + } + rcu_read_unlock(); +} + /* * af_can module init/exit functions */ @@ -902,6 +930,13 @@ static struct notifier_block can_netdev_notifier __read_mostly = { .notifier_call = can_notifier, }; +static struct pernet_operations can_pernet_ops __read_mostly = { + .init = can_pernet_init, + .exit = can_pernet_exit, + .id = &can_net_id, + .size = 0, +}; + static __init int can_init(void) { /* check for correct padding to be able to use the structs similarly */ @@ -912,8 +947,6 @@ static __init int can_init(void) pr_info("can: controller area network core (" CAN_VERSION_STRING ")\n"); - memset(&can_rx_alldev_list, 0, sizeof(can_rx_alldev_list)); - rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver), 0, 0, NULL); if (!rcv_cache) @@ -925,9 +958,10 @@ static __init int can_init(void) setup_timer(&can_stattimer, can_stat_update, 0); mod_timer(&can_stattimer, round_jiffies(jiffies + HZ)); } - can_init_proc(); } + register_pernet_subsys(&can_pernet_ops); + /* protocol register */ sock_register(&can_family_ops); register_netdevice_notifier(&can_netdev_notifier); @@ -939,13 +973,9 @@ static __init int can_init(void) static __exit void can_exit(void) { - struct net_device *dev; - if (IS_ENABLED(CONFIG_PROC_FS)) { if (stats_timer) del_timer_sync(&can_stattimer); - - can_remove_proc(); } /* protocol unregister */ @@ -954,19 +984,7 @@ static __exit void can_exit(void) unregister_netdevice_notifier(&can_netdev_notifier); sock_unregister(PF_CAN); - /* remove created dev_rcv_lists from still registered CAN devices */ - rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) { - - struct dev_rcv_lists *d = dev->ml_priv; - - BUG_ON(d->entries); - kfree(d); - dev->ml_priv = NULL; - } - } - rcu_read_unlock(); + unregister_pernet_subsys(&can_pernet_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/can/af_can.h b/net/can/af_can.h index b86f5129e838..f273c9d9b129 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -114,8 +114,8 @@ struct s_pstats { extern struct dev_rcv_lists can_rx_alldev_list; /* function prototypes for the CAN networklayer procfs (proc.c) */ -void can_init_proc(void); -void can_remove_proc(void); +void can_init_proc(struct net *net); +void can_remove_proc(struct net *net); void can_stat_update(unsigned long data); /* structures and variables from af_can.c needed in proc.c for reading */ diff --git a/net/can/bcm.c b/net/can/bcm.c index 95d13b233c65..1976629a8463 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -764,8 +764,8 @@ static void bcm_remove_op(struct bcm_op *op) static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { - can_rx_unregister(dev, op->can_id, REGMASK(op->can_id), - bcm_rx_handler, op); + can_rx_unregister(&init_net, dev, op->can_id, + REGMASK(op->can_id), bcm_rx_handler, op); /* mark as removed subscription */ op->rx_reg_dev = NULL; @@ -808,7 +808,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, } } } else - can_rx_unregister(NULL, op->can_id, + can_rx_unregister(&init_net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); @@ -1222,7 +1222,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, dev = dev_get_by_index(&init_net, ifindex); if (dev) { - err = can_rx_register(dev, op->can_id, + err = can_rx_register(&init_net, dev, + op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); @@ -1232,7 +1233,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } } else - err = can_rx_register(NULL, op->can_id, + err = can_rx_register(&init_net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); if (err) { @@ -1528,7 +1529,7 @@ static int bcm_release(struct socket *sock) } } } else - can_rx_unregister(NULL, op->can_id, + can_rx_unregister(&init_net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); diff --git a/net/can/gw.c b/net/can/gw.c index 7056a1a2bb70..3c117a33e15f 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -440,14 +440,14 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) static inline int cgw_register_filter(struct cgw_job *gwj) { - return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id, + return can_rx_register(&init_net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj, "gw", NULL); } static inline void cgw_unregister_filter(struct cgw_job *gwj) { - can_rx_unregister(gwj->src.dev, gwj->ccgw.filter.can_id, + can_rx_unregister(&init_net, gwj->src.dev, gwj->ccgw.filter.can_id, gwj->ccgw.filter.can_mask, can_can_gw_rcv, gwj); } diff --git a/net/can/proc.c b/net/can/proc.c index 85ef7bb0f176..9a8d54d57b22 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -62,17 +62,6 @@ #define CAN_PROC_RCVLIST_EFF "rcvlist_eff" #define CAN_PROC_RCVLIST_ERR "rcvlist_err" -static struct proc_dir_entry *can_dir; -static struct proc_dir_entry *pde_version; -static struct proc_dir_entry *pde_stats; -static struct proc_dir_entry *pde_reset_stats; -static struct proc_dir_entry *pde_rcvlist_all; -static struct proc_dir_entry *pde_rcvlist_fil; -static struct proc_dir_entry *pde_rcvlist_inv; -static struct proc_dir_entry *pde_rcvlist_sff; -static struct proc_dir_entry *pde_rcvlist_eff; -static struct proc_dir_entry *pde_rcvlist_err; - static int user_reset; static const char rx_list_name[][8] = { @@ -351,20 +340,21 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, static int can_rcvlist_proc_show(struct seq_file *m, void *v) { /* double cast to prevent GCC warning */ - int idx = (int)(long)m->private; + int idx = (int)(long)PDE_DATA(m->file->f_inode); struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); rcu_read_lock(); /* receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_one(m, idx, NULL, d); /* receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv); } @@ -377,7 +367,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) static int can_rcvlist_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode)); + return single_open_net(inode, file, can_rcvlist_proc_show); } static const struct file_operations can_rcvlist_proc_fops = { @@ -417,6 +407,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; /* RX_SFF */ seq_puts(m, "\nreceive list 'rx_sff':\n"); @@ -424,11 +415,11 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* sff receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff)); /* sff receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { d = dev->ml_priv; can_rcvlist_proc_show_array(m, dev, d->rx_sff, @@ -444,7 +435,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_sff_proc_show, NULL); + return single_open_net(inode, file, can_rcvlist_sff_proc_show); } static const struct file_operations can_rcvlist_sff_proc_fops = { @@ -460,6 +451,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; struct dev_rcv_lists *d; + struct net *net = m->private; /* RX_EFF */ seq_puts(m, "\nreceive list 'rx_eff':\n"); @@ -467,11 +459,11 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* eff receive list for 'all' CAN devices (dev == NULL) */ - d = &can_rx_alldev_list; + d = net->can.can_rx_alldev_list; can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff)); /* eff receive list for registered CAN devices */ - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { d = dev->ml_priv; can_rcvlist_proc_show_array(m, dev, d->rx_eff, @@ -487,7 +479,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file) { - return single_open(file, can_rcvlist_eff_proc_show, NULL); + return single_open_net(inode, file, can_rcvlist_eff_proc_show); } static const struct file_operations can_rcvlist_eff_proc_fops = { @@ -499,81 +491,85 @@ static const struct file_operations can_rcvlist_eff_proc_fops = { }; /* - * proc utility functions - */ - -static void can_remove_proc_readentry(const char *name) -{ - if (can_dir) - remove_proc_entry(name, can_dir); -} - -/* * can_init_proc - create main CAN proc directory and procfs entries */ -void can_init_proc(void) +void can_init_proc(struct net *net) { /* create /proc/net/can directory */ - can_dir = proc_mkdir("can", init_net.proc_net); + net->can.proc_dir = proc_net_mkdir(net, "can", net->proc_net); - if (!can_dir) { - pr_info("can: failed to create /proc/net/can.\n"); + if (!net->can.proc_dir) { + printk(KERN_INFO "can: failed to create /proc/net/can . " + "CONFIG_PROC_FS missing?\n"); return; } /* own procfs entries from the AF_CAN core */ - pde_version = proc_create(CAN_PROC_VERSION, 0644, can_dir, - &can_version_proc_fops); - pde_stats = proc_create(CAN_PROC_STATS, 0644, can_dir, - &can_stats_proc_fops); - pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, can_dir, - &can_reset_stats_proc_fops); - pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_ERR); - pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_ALL); - pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_FIL); - pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, can_dir, - &can_rcvlist_proc_fops, (void *)RX_INV); - pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, can_dir, - &can_rcvlist_eff_proc_fops); - pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, can_dir, - &can_rcvlist_sff_proc_fops); + net->can.pde_version = proc_create(CAN_PROC_VERSION, 0644, + net->can.proc_dir, + &can_version_proc_fops); + net->can.pde_stats = proc_create(CAN_PROC_STATS, 0644, + net->can.proc_dir, + &can_stats_proc_fops); + net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644, + net->can.proc_dir, + &can_reset_stats_proc_fops); + net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_ERR); + net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_ALL); + net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_FIL); + net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644, + net->can.proc_dir, + &can_rcvlist_proc_fops, + (void *)RX_INV); + net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644, + net->can.proc_dir, + &can_rcvlist_eff_proc_fops); + net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644, + net->can.proc_dir, + &can_rcvlist_sff_proc_fops); } /* * can_remove_proc - remove procfs entries and main CAN proc directory */ -void can_remove_proc(void) +void can_remove_proc(struct net *net) { - if (pde_version) - can_remove_proc_readentry(CAN_PROC_VERSION); + if (net->can.pde_version) + remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir); - if (pde_stats) - can_remove_proc_readentry(CAN_PROC_STATS); + if (net->can.pde_stats) + remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir); - if (pde_reset_stats) - can_remove_proc_readentry(CAN_PROC_RESET_STATS); + if (net->can.pde_reset_stats) + remove_proc_entry(CAN_PROC_RESET_STATS, net->can.proc_dir); - if (pde_rcvlist_err) - can_remove_proc_readentry(CAN_PROC_RCVLIST_ERR); + if (net->can.pde_rcvlist_err) + remove_proc_entry(CAN_PROC_RCVLIST_ERR, net->can.proc_dir); - if (pde_rcvlist_all) - can_remove_proc_readentry(CAN_PROC_RCVLIST_ALL); + if (net->can.pde_rcvlist_all) + remove_proc_entry(CAN_PROC_RCVLIST_ALL, net->can.proc_dir); - if (pde_rcvlist_fil) - can_remove_proc_readentry(CAN_PROC_RCVLIST_FIL); + if (net->can.pde_rcvlist_fil) + remove_proc_entry(CAN_PROC_RCVLIST_FIL, net->can.proc_dir); - if (pde_rcvlist_inv) - can_remove_proc_readentry(CAN_PROC_RCVLIST_INV); + if (net->can.pde_rcvlist_inv) + remove_proc_entry(CAN_PROC_RCVLIST_INV, net->can.proc_dir); - if (pde_rcvlist_eff) - can_remove_proc_readentry(CAN_PROC_RCVLIST_EFF); + if (net->can.pde_rcvlist_eff) + remove_proc_entry(CAN_PROC_RCVLIST_EFF, net->can.proc_dir); - if (pde_rcvlist_sff) - can_remove_proc_readentry(CAN_PROC_RCVLIST_SFF); + if (net->can.pde_rcvlist_sff) + remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir); - if (can_dir) - remove_proc_entry("can", init_net.proc_net); + if (net->can.proc_dir) + remove_proc_entry("can", net->proc_net); } diff --git a/net/can/raw.c b/net/can/raw.c index 6dc546a06673..864c80dbdb72 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -181,20 +181,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data) kfree_skb(skb); } -static int raw_enable_filters(struct net_device *dev, struct sock *sk, - struct can_filter *filter, int count) +static int raw_enable_filters(struct net *net, struct net_device *dev, + struct sock *sk, struct can_filter *filter, + int count) { int err = 0; int i; for (i = 0; i < count; i++) { - err = can_rx_register(dev, filter[i].can_id, + err = can_rx_register(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk, "raw", sk); if (err) { /* clean up successfully registered filters */ while (--i >= 0) - can_rx_unregister(dev, filter[i].can_id, + can_rx_unregister(net, dev, filter[i].can_id, filter[i].can_mask, raw_rcv, sk); break; @@ -204,57 +205,62 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk, return err; } -static int raw_enable_errfilter(struct net_device *dev, struct sock *sk, - can_err_mask_t err_mask) +static int raw_enable_errfilter(struct net *net, struct net_device *dev, + struct sock *sk, can_err_mask_t err_mask) { int err = 0; if (err_mask) - err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG, + err = can_rx_register(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk, "raw", sk); return err; } -static void raw_disable_filters(struct net_device *dev, struct sock *sk, - struct can_filter *filter, int count) +static void raw_disable_filters(struct net *net, struct net_device *dev, + struct sock *sk, struct can_filter *filter, + int count) { int i; for (i = 0; i < count; i++) - can_rx_unregister(dev, filter[i].can_id, filter[i].can_mask, - raw_rcv, sk); + can_rx_unregister(net, dev, filter[i].can_id, + filter[i].can_mask, raw_rcv, sk); } -static inline void raw_disable_errfilter(struct net_device *dev, +static inline void raw_disable_errfilter(struct net *net, + struct net_device *dev, struct sock *sk, can_err_mask_t err_mask) { if (err_mask) - can_rx_unregister(dev, 0, err_mask | CAN_ERR_FLAG, + can_rx_unregister(net, dev, 0, err_mask | CAN_ERR_FLAG, raw_rcv, sk); } -static inline void raw_disable_allfilters(struct net_device *dev, +static inline void raw_disable_allfilters(struct net *net, + struct net_device *dev, struct sock *sk) { struct raw_sock *ro = raw_sk(sk); - raw_disable_filters(dev, sk, ro->filter, ro->count); - raw_disable_errfilter(dev, sk, ro->err_mask); + raw_disable_filters(net, dev, sk, ro->filter, ro->count); + raw_disable_errfilter(net, dev, sk, ro->err_mask); } -static int raw_enable_allfilters(struct net_device *dev, struct sock *sk) +static int raw_enable_allfilters(struct net *net, struct net_device *dev, + struct sock *sk) { struct raw_sock *ro = raw_sk(sk); int err; - err = raw_enable_filters(dev, sk, ro->filter, ro->count); + err = raw_enable_filters(net, dev, sk, ro->filter, ro->count); if (!err) { - err = raw_enable_errfilter(dev, sk, ro->err_mask); + err = raw_enable_errfilter(net, dev, sk, ro->err_mask); if (err) - raw_disable_filters(dev, sk, ro->filter, ro->count); + raw_disable_filters(net, dev, sk, ro->filter, + ro->count); } return err; @@ -267,7 +273,7 @@ static int raw_notifier(struct notifier_block *nb, struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), sock_net(sk))) return NOTIFY_DONE; if (dev->type != ARPHRD_CAN) @@ -282,7 +288,7 @@ static int raw_notifier(struct notifier_block *nb, lock_sock(sk); /* remove current filters & unregister */ if (ro->bound) - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), dev, sk); if (ro->count > 1) kfree(ro->filter); @@ -358,13 +364,13 @@ static int raw_release(struct socket *sock) if (ro->ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); if (dev) { - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), dev, sk); dev_put(dev); } } else - raw_disable_allfilters(NULL, sk); + raw_disable_allfilters(sock_net(sk), NULL, sk); } if (ro->count > 1) @@ -404,7 +410,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (addr->can_ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, addr->can_ifindex); + dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); if (!dev) { err = -ENODEV; goto out; @@ -420,13 +426,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; /* filters set by default/setsockopt */ - err = raw_enable_allfilters(dev, sk); + err = raw_enable_allfilters(sock_net(sk), dev, sk); dev_put(dev); } else { ifindex = 0; /* filters set by default/setsockopt */ - err = raw_enable_allfilters(NULL, sk); + err = raw_enable_allfilters(sock_net(sk), NULL, sk); } if (!err) { @@ -435,13 +441,15 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (ro->ifindex) { struct net_device *dev; - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), + ro->ifindex); if (dev) { - raw_disable_allfilters(dev, sk); + raw_disable_allfilters(dev_net(dev), + dev, sk); dev_put(dev); } } else - raw_disable_allfilters(NULL, sk); + raw_disable_allfilters(sock_net(sk), NULL, sk); } ro->ifindex = ifindex; ro->bound = 1; @@ -517,15 +525,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (ro->bound && ro->ifindex) - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); if (ro->bound) { /* (try to) register the new filters */ if (count == 1) - err = raw_enable_filters(dev, sk, &sfilter, 1); + err = raw_enable_filters(sock_net(sk), dev, sk, + &sfilter, 1); else - err = raw_enable_filters(dev, sk, filter, - count); + err = raw_enable_filters(sock_net(sk), dev, sk, + filter, count); if (err) { if (count > 1) kfree(filter); @@ -533,7 +542,8 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, } /* remove old filter registrations */ - raw_disable_filters(dev, sk, ro->filter, ro->count); + raw_disable_filters(sock_net(sk), dev, sk, ro->filter, + ro->count); } /* remove old filter space */ @@ -569,18 +579,20 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (ro->bound && ro->ifindex) - dev = dev_get_by_index(&init_net, ro->ifindex); + dev = dev_get_by_index(sock_net(sk), ro->ifindex); /* remove current error mask */ if (ro->bound) { /* (try to) register the new err_mask */ - err = raw_enable_errfilter(dev, sk, err_mask); + err = raw_enable_errfilter(sock_net(sk), dev, sk, + err_mask); if (err) goto out_err; /* remove old err_mask registration */ - raw_disable_errfilter(dev, sk, ro->err_mask); + raw_disable_errfilter(sock_net(sk), dev, sk, + ro->err_mask); } /* link new err_mask to the socket */ @@ -741,7 +753,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return -EINVAL; } - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENXIO; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 38dcf1eb427d..f76bb3332613 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -7,6 +7,7 @@ #include <linux/kthread.h> #include <linux/net.h> #include <linux/nsproxy.h> +#include <linux/sched/mm.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> @@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) { struct sockaddr_storage *paddr = &con->peer_addr.in_addr; struct socket *sock; + unsigned int noio_flag; int ret; BUG_ON(con->sock); + + /* sock_create_kern() allocates with GFP_KERNEL */ + noio_flag = memalloc_noio_save(); ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); + memalloc_noio_restore(noio_flag); if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; diff --git a/net/core/datagram.c b/net/core/datagram.c index ea633342ab0d..4608aa245410 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -256,8 +256,12 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); - } while (sk_can_busy_loop(sk) && - sk_busy_loop(sk, flags & MSG_DONTWAIT)); + + if (!sk_can_busy_loop(sk)) + break; + + sk_busy_loop(sk, flags & MSG_DONTWAIT); + } while (!skb_queue_empty(&sk->sk_receive_queue)); error = -EAGAIN; diff --git a/net/core/dev.c b/net/core/dev.c index 7869ae3837ca..ef9fe60ee294 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5060,27 +5060,28 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) do_softirq(); } -bool sk_busy_loop(struct sock *sk, int nonblock) +void napi_busy_loop(unsigned int napi_id, + bool (*loop_end)(void *, unsigned long), + void *loop_end_arg) { - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; + unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); void *have_poll_lock = NULL; struct napi_struct *napi; - int rc; restart: - rc = false; napi_poll = NULL; rcu_read_lock(); - napi = napi_by_id(sk->sk_napi_id); + napi = napi_by_id(napi_id); if (!napi) goto out; preempt_disable(); for (;;) { - rc = 0; + int work = 0; + local_bh_disable(); if (!napi_poll) { unsigned long val = READ_ONCE(napi->state); @@ -5098,16 +5099,15 @@ restart: have_poll_lock = netpoll_poll_lock(napi); napi_poll = napi->poll; } - rc = napi_poll(napi, BUSY_POLL_BUDGET); - trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); + work = napi_poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi, work, BUSY_POLL_BUDGET); count: - if (rc > 0) - __NET_ADD_STATS(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); + if (work > 0) + __NET_ADD_STATS(dev_net(napi->dev), + LINUX_MIB_BUSYPOLLRXPACKETS, work); local_bh_enable(); - if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || - busy_loop_timeout(end_time)) + if (!loop_end || loop_end(loop_end_arg, start_time)) break; if (unlikely(need_resched())) { @@ -5116,9 +5116,8 @@ count: preempt_enable(); rcu_read_unlock(); cond_resched(); - rc = !skb_queue_empty(&sk->sk_receive_queue); - if (rc || busy_loop_timeout(end_time)) - return rc; + if (loop_end(loop_end_arg, start_time)) + return; goto restart; } cpu_relax(); @@ -5126,12 +5125,10 @@ count: if (napi_poll) busy_poll_stop(napi, have_poll_lock); preempt_enable(); - rc = !skb_queue_empty(&sk->sk_receive_queue); out: rcu_read_unlock(); - return rc; } -EXPORT_SYMBOL(sk_busy_loop); +EXPORT_SYMBOL(napi_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ @@ -5143,10 +5140,10 @@ static void napi_hash_add(struct napi_struct *napi) spin_lock(&napi_hash_lock); - /* 0..NR_CPUS+1 range is reserved for sender_cpu use */ + /* 0..NR_CPUS range is reserved for sender_cpu use */ do { - if (unlikely(++napi_gen_id < NR_CPUS + 1)) - napi_gen_id = NR_CPUS + 1; + if (unlikely(++napi_gen_id < MIN_NAPI_ID)) + napi_gen_id = MIN_NAPI_ID; } while (napi_by_id(napi_gen_id)); napi->napi_id = napi_gen_id; diff --git a/net/core/devlink.c b/net/core/devlink.c index e9c1e6acfb6d..24b766003a61 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1493,8 +1493,686 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, if (err) return err; } + return 0; +} + +int devlink_dpipe_match_put(struct sk_buff *skb, + struct devlink_dpipe_match *match) +{ + struct devlink_dpipe_header *header = match->header; + struct devlink_dpipe_field *field = &header->fields[match->field_id]; + struct nlattr *match_attr; + + match_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_MATCH); + if (!match_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_MATCH_TYPE, match->type) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, match->header_index) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + nla_nest_end(skb, match_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, match_attr); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_match_put); + +static int devlink_dpipe_matches_put(struct devlink_dpipe_table *table, + struct sk_buff *skb) +{ + struct nlattr *matches_attr; + + matches_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_MATCHES); + if (!matches_attr) + return -EMSGSIZE; + + if (table->table_ops->matches_dump(table->priv, skb)) + goto nla_put_failure; + + nla_nest_end(skb, matches_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, matches_attr); + return -EMSGSIZE; +} + +int devlink_dpipe_action_put(struct sk_buff *skb, + struct devlink_dpipe_action *action) +{ + struct devlink_dpipe_header *header = action->header; + struct devlink_dpipe_field *field = &header->fields[action->field_id]; + struct nlattr *action_attr; + + action_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ACTION); + if (!action_attr) + return -EMSGSIZE; + + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_ACTION_TYPE, action->type) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_INDEX, action->header_index) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + nla_nest_end(skb, action_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, action_attr); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_action_put); + +static int devlink_dpipe_actions_put(struct devlink_dpipe_table *table, + struct sk_buff *skb) +{ + struct nlattr *actions_attr; + + actions_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE_ACTIONS); + if (!actions_attr) + return -EMSGSIZE; + + if (table->table_ops->actions_dump(table->priv, skb)) + goto nla_put_failure; + + nla_nest_end(skb, actions_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, actions_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_table_put(struct sk_buff *skb, + struct devlink_dpipe_table *table) +{ + struct nlattr *table_attr; + + table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE); + if (!table_attr) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) || + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, + table->counters_enabled)) + goto nla_put_failure; + + if (devlink_dpipe_matches_put(table, skb)) + goto nla_put_failure; + + if (devlink_dpipe_actions_put(table, skb)) + goto nla_put_failure; + + nla_nest_end(skb, table_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, table_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_send_and_alloc_skb(struct sk_buff **pskb, + struct genl_info *info) +{ + int err; + + if (*pskb) { + err = genlmsg_reply(*pskb, info); + if (err) + return err; + } + *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!*pskb) + return -ENOMEM; + return 0; +} + +static int devlink_dpipe_tables_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct list_head *dpipe_tables, + const char *table_name) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_dpipe_table *table; + struct nlattr *tables_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + bool incomplete; + void *hdr; + int i; + int err; + + table = list_first_entry(dpipe_tables, + struct devlink_dpipe_table, list); +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + tables_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLES); + if (!tables_attr) + goto nla_put_failure; + + i = 0; + incomplete = false; + list_for_each_entry_from(table, dpipe_tables, list) { + if (!table_name) { + err = devlink_dpipe_table_put(skb, table); + if (err) { + if (!i) + goto err_table_put; + incomplete = true; + break; + } + } else { + if (!strcmp(table->name, table_name)) { + err = devlink_dpipe_table_put(skb, table); + if (err) + break; + } + } + i++; + } + + nla_nest_end(skb, tables_attr); + genlmsg_end(skb, hdr); + if (incomplete) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_table_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *table_name = NULL; + + if (info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + + return devlink_dpipe_tables_fill(info, DEVLINK_CMD_DPIPE_TABLE_GET, 0, + &devlink->dpipe_table_list, + table_name); +} + +static int devlink_dpipe_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE, + value->value_size, value->value)) + return -EMSGSIZE; + if (value->mask) + if (nla_put(skb, DEVLINK_ATTR_DPIPE_VALUE_MASK, + value->value_size, value->mask)) + return -EMSGSIZE; + if (value->mapping_valid) + if (nla_put_u32(skb, DEVLINK_ATTR_DPIPE_VALUE_MAPPING, + value->mapping_value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_action_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (!value->action) + return -EINVAL; + if (devlink_dpipe_action_put(skb, value->action)) + return -EMSGSIZE; + if (devlink_dpipe_value_put(skb, value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_action_values_put(struct sk_buff *skb, + struct devlink_dpipe_value *values, + unsigned int values_count) +{ + struct nlattr *action_attr; + int i; + int err; + + for (i = 0; i < values_count; i++) { + action_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ACTION_VALUE); + if (!action_attr) + return -EMSGSIZE; + err = devlink_dpipe_action_value_put(skb, &values[i]); + if (err) + goto err_action_value_put; + nla_nest_end(skb, action_attr); + } + return 0; + +err_action_value_put: + nla_nest_cancel(skb, action_attr); + return err; +} + +static int devlink_dpipe_match_value_put(struct sk_buff *skb, + struct devlink_dpipe_value *value) +{ + if (!value->match) + return -EINVAL; + if (devlink_dpipe_match_put(skb, value->match)) + return -EMSGSIZE; + if (devlink_dpipe_value_put(skb, value)) + return -EMSGSIZE; + return 0; +} + +static int devlink_dpipe_match_values_put(struct sk_buff *skb, + struct devlink_dpipe_value *values, + unsigned int values_count) +{ + struct nlattr *match_attr; + int i; + int err; + + for (i = 0; i < values_count; i++) { + match_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_MATCH_VALUE); + if (!match_attr) + return -EMSGSIZE; + err = devlink_dpipe_match_value_put(skb, &values[i]); + if (err) + goto err_match_value_put; + nla_nest_end(skb, match_attr); + } + return 0; + +err_match_value_put: + nla_nest_cancel(skb, match_attr); + return err; +} + +static int devlink_dpipe_entry_put(struct sk_buff *skb, + struct devlink_dpipe_entry *entry) +{ + struct nlattr *entry_attr, *matches_attr, *actions_attr; + int err; + + entry_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_ENTRY); + if (!entry_attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_INDEX, entry->index, + DEVLINK_ATTR_PAD)) + goto nla_put_failure; + if (entry->counter_valid) + if (nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_ENTRY_COUNTER, + entry->counter, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + matches_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES); + if (!matches_attr) + goto nla_put_failure; + + err = devlink_dpipe_match_values_put(skb, entry->match_values, + entry->match_values_count); + if (err) { + nla_nest_cancel(skb, matches_attr); + goto err_match_values_put; + } + nla_nest_end(skb, matches_attr); + + actions_attr = nla_nest_start(skb, + DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES); + if (!actions_attr) + goto nla_put_failure; + + err = devlink_dpipe_action_values_put(skb, entry->action_values, + entry->action_values_count); + if (err) { + nla_nest_cancel(skb, actions_attr); + goto err_action_values_put; + } + nla_nest_end(skb, actions_attr); + nla_nest_end(skb, entry_attr); return 0; + +nla_put_failure: + err = -EMSGSIZE; +err_match_values_put: +err_action_values_put: + nla_nest_cancel(skb, entry_attr); + return err; +} + +static struct devlink_dpipe_table * +devlink_dpipe_table_find(struct list_head *dpipe_tables, + const char *table_name) +{ + struct devlink_dpipe_table *table; + + list_for_each_entry_rcu(table, dpipe_tables, list) { + if (!strcmp(table->name, table_name)) + return table; + } + return NULL; +} + +int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink *devlink; + int err; + + err = devlink_dpipe_send_and_alloc_skb(&dump_ctx->skb, + dump_ctx->info); + if (err) + return err; + + dump_ctx->hdr = genlmsg_put(dump_ctx->skb, + dump_ctx->info->snd_portid, + dump_ctx->info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, + dump_ctx->cmd); + if (!dump_ctx->hdr) + goto nla_put_failure; + + devlink = dump_ctx->info->user_ptr[0]; + if (devlink_nl_put_handle(dump_ctx->skb, devlink)) + goto nla_put_failure; + dump_ctx->nest = nla_nest_start(dump_ctx->skb, + DEVLINK_ATTR_DPIPE_ENTRIES); + if (!dump_ctx->nest) + goto nla_put_failure; + return 0; + +nla_put_failure: + genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr); + nlmsg_free(dump_ctx->skb); + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_prepare); + +int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, + struct devlink_dpipe_entry *entry) +{ + return devlink_dpipe_entry_put(dump_ctx->skb, entry); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_append); + +int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) +{ + nla_nest_end(dump_ctx->skb, dump_ctx->nest); + genlmsg_end(dump_ctx->skb, dump_ctx->hdr); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close); + +static int devlink_dpipe_entries_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_dpipe_table *table) +{ + struct devlink_dpipe_dump_ctx dump_ctx; + struct nlmsghdr *nlh; + int err; + + dump_ctx.skb = NULL; + dump_ctx.cmd = cmd; + dump_ctx.info = info; + + err = table->table_ops->entries_dump(table->priv, + table->counters_enabled, + &dump_ctx); + if (err) + goto err_entries_dump; + +send_done: + nlh = nlmsg_put(dump_ctx.skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&dump_ctx.skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(dump_ctx.skb, info); + +err_entries_dump: +err_skb_send_alloc: + genlmsg_cancel(dump_ctx.skb, dump_ctx.hdr); + nlmsg_free(dump_ctx.skb); + return err; +} + +static int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_dpipe_table *table; + const char *table_name; + + if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]) + return -EINVAL; + + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + return -EINVAL; + + if (!table->table_ops->entries_dump) + return -EINVAL; + + return devlink_dpipe_entries_fill(info, DEVLINK_CMD_DPIPE_ENTRIES_GET, + 0, table); +} + +static int devlink_dpipe_fields_put(struct sk_buff *skb, + const struct devlink_dpipe_header *header) +{ + struct devlink_dpipe_field *field; + struct nlattr *field_attr; + int i; + + for (i = 0; i < header->fields_count; i++) { + field = &header->fields[i]; + field_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_FIELD); + if (!field_attr) + return -EMSGSIZE; + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_FIELD_NAME, field->name) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_ID, field->id) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH, field->bitwidth) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE, field->mapping_type)) + goto nla_put_failure; + nla_nest_end(skb, field_attr); + } + return 0; + +nla_put_failure: + nla_nest_cancel(skb, field_attr); + return -EMSGSIZE; +} + +static int devlink_dpipe_header_put(struct sk_buff *skb, + struct devlink_dpipe_header *header) +{ + struct nlattr *fields_attr, *header_attr; + int err; + + header_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER); + if (!header) + return -EMSGSIZE; + + if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_HEADER_NAME, header->name) || + nla_put_u32(skb, DEVLINK_ATTR_DPIPE_HEADER_ID, header->id) || + nla_put_u8(skb, DEVLINK_ATTR_DPIPE_HEADER_GLOBAL, header->global)) + goto nla_put_failure; + + fields_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADER_FIELDS); + if (!fields_attr) + goto nla_put_failure; + + err = devlink_dpipe_fields_put(skb, header); + if (err) { + nla_nest_cancel(skb, fields_attr); + goto nla_put_failure; + } + nla_nest_end(skb, fields_attr); + nla_nest_end(skb, header_attr); + return 0; + +nla_put_failure: + err = -EMSGSIZE; + nla_nest_cancel(skb, header_attr); + return err; +} + +static int devlink_dpipe_headers_fill(struct genl_info *info, + enum devlink_command cmd, int flags, + struct devlink_dpipe_headers * + dpipe_headers) +{ + struct devlink *devlink = info->user_ptr[0]; + struct nlattr *headers_attr; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + void *hdr; + int i, j; + int err; + + i = 0; +start_again: + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + return err; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, NLM_F_MULTI, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + headers_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_HEADERS); + if (!headers_attr) + goto nla_put_failure; + + j = 0; + for (; i < dpipe_headers->headers_count; i++) { + err = devlink_dpipe_header_put(skb, dpipe_headers->headers[i]); + if (err) { + if (!j) + goto err_table_put; + break; + } + j++; + } + nla_nest_end(skb, headers_attr); + genlmsg_end(skb, hdr); + if (i != dpipe_headers->headers_count) + goto start_again; + +send_done: + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = devlink_dpipe_send_and_alloc_skb(&skb, info); + if (err) + goto err_skb_send_alloc; + goto send_done; + } + return genlmsg_reply(skb, info); + +nla_put_failure: + err = -EMSGSIZE; +err_table_put: +err_skb_send_alloc: + genlmsg_cancel(skb, hdr); + nlmsg_free(skb); + return err; +} + +static int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + + if (!devlink->dpipe_headers) + return -EOPNOTSUPP; + return devlink_dpipe_headers_fill(info, DEVLINK_CMD_DPIPE_HEADERS_GET, + 0, devlink->dpipe_headers); +} + +static int devlink_dpipe_table_counters_set(struct devlink *devlink, + const char *table_name, + bool enable) +{ + struct devlink_dpipe_table *table; + + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + return -EINVAL; + + if (table->counter_control_extern) + return -EOPNOTSUPP; + + if (!(table->counters_enabled ^ enable)) + return 0; + + table->counters_enabled = enable; + if (table->table_ops->counters_set_update) + table->table_ops->counters_set_update(table->priv, enable); + return 0; +} + +static int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *table_name; + bool counters_enable; + + if (!info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME] || + !info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]) + return -EINVAL; + + table_name = nla_data(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_NAME]); + counters_enable = !!nla_get_u8(info->attrs[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED]); + + return devlink_dpipe_table_counters_set(devlink, table_name, + counters_enable); } static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { @@ -1512,6 +2190,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -1644,6 +2324,34 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, + .doit = devlink_nl_cmd_dpipe_table_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, + .doit = devlink_nl_cmd_dpipe_entries_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, + .doit = devlink_nl_cmd_dpipe_headers_get, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + .doit = devlink_nl_cmd_dpipe_table_counters_set, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -1680,6 +2388,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) devlink_net_set(devlink, &init_net); INIT_LIST_HEAD(&devlink->port_list); INIT_LIST_HEAD(&devlink->sb_list); + INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc); @@ -1880,6 +2589,133 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index) } EXPORT_SYMBOL_GPL(devlink_sb_unregister); +/** + * devlink_dpipe_headers_register - register dpipe headers + * + * @devlink: devlink + * @dpipe_headers: dpipe header array + * + * Register the headers supported by hardware. + */ +int devlink_dpipe_headers_register(struct devlink *devlink, + struct devlink_dpipe_headers *dpipe_headers) +{ + mutex_lock(&devlink_mutex); + devlink->dpipe_headers = dpipe_headers; + mutex_unlock(&devlink_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_headers_register); + +/** + * devlink_dpipe_headers_unregister - unregister dpipe headers + * + * @devlink: devlink + * + * Unregister the headers supported by hardware. + */ +void devlink_dpipe_headers_unregister(struct devlink *devlink) +{ + mutex_lock(&devlink_mutex); + devlink->dpipe_headers = NULL; + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_headers_unregister); + +/** + * devlink_dpipe_table_counter_enabled - check if counter allocation + * required + * @devlink: devlink + * @table_name: tables name + * + * Used by driver to check if counter allocation is required. + * After counter allocation is turned on the table entries + * are updated to include counter statistics. + * + * After that point on the driver must respect the counter + * state so that each entry added to the table is added + * with a counter. + */ +bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, + const char *table_name) +{ + struct devlink_dpipe_table *table; + bool enabled; + + rcu_read_lock(); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + enabled = false; + if (table) + enabled = table->counters_enabled; + rcu_read_unlock(); + return enabled; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); + +/** + * devlink_dpipe_table_register - register dpipe table + * + * @devlink: devlink + * @table_name: table name + * @table_ops: table ops + * @priv: priv + * @size: size + * @counter_control_extern: external control for counters + */ +int devlink_dpipe_table_register(struct devlink *devlink, + const char *table_name, + struct devlink_dpipe_table_ops *table_ops, + void *priv, u64 size, + bool counter_control_extern) +{ + struct devlink_dpipe_table *table; + + if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) + return -EEXIST; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->name = table_name; + table->table_ops = table_ops; + table->priv = priv; + table->size = size; + table->counter_control_extern = counter_control_extern; + + mutex_lock(&devlink_mutex); + list_add_tail_rcu(&table->list, &devlink->dpipe_table_list); + mutex_unlock(&devlink_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_register); + +/** + * devlink_dpipe_table_unregister - unregister dpipe table + * + * @devlink: devlink + * @table_name: table name + */ +void devlink_dpipe_table_unregister(struct devlink *devlink, + const char *table_name) +{ + struct devlink_dpipe_table *table; + + mutex_lock(&devlink_mutex); + table = devlink_dpipe_table_find(&devlink->dpipe_table_list, + table_name); + if (!table) + goto unlock; + list_del_rcu(&table->list); + mutex_unlock(&devlink_mutex); + kfree_rcu(table, rcu); + return; +unlock: + mutex_unlock(&devlink_mutex); +} +EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); + static int __init devlink_module_init(void) { return genl_register_family(&devlink_nl_family); diff --git a/net/core/filter.c b/net/core/filter.c index dfb9f61a2fd5..15e9a81ffebe 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3309,24 +3309,28 @@ static const struct bpf_verifier_ops tc_cls_act_ops = { .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, + .test_run = bpf_prog_test_run_skb, }; static const struct bpf_verifier_ops xdp_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, + .test_run = bpf_prog_test_run_xdp, }; static const struct bpf_verifier_ops cg_skb_ops = { .get_func_proto = cg_skb_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, + .test_run = bpf_prog_test_run_skb, }; static const struct bpf_verifier_ops lwt_inout_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, + .test_run = bpf_prog_test_run_skb, }; static const struct bpf_verifier_ops lwt_xmit_ops = { @@ -3334,6 +3338,7 @@ static const struct bpf_verifier_ops lwt_xmit_ops = { .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, + .test_run = bpf_prog_test_run_skb, }; static const struct bpf_verifier_ops cg_sock_ops = { diff --git a/net/core/flow.c b/net/core/flow.c index f765c11d8df5..f7f5d1932a27 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -47,7 +47,7 @@ struct flow_flush_info { static struct kmem_cache *flow_cachep __read_mostly; -#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) +#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift) #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) static void flow_cache_new_hashrnd(unsigned long arg) @@ -99,7 +99,8 @@ static void flow_cache_gc_task(struct work_struct *work) } static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - int deleted, struct list_head *gc_list, + unsigned int deleted, + struct list_head *gc_list, struct netns_xfrm *xfrm) { if (deleted) { @@ -114,17 +115,18 @@ static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, static void __flow_cache_shrink(struct flow_cache *fc, struct flow_cache_percpu *fcp, - int shrink_to) + unsigned int shrink_to) { struct flow_cache_entry *fle; struct hlist_node *tmp; LIST_HEAD(gc_list); - int i, deleted = 0; + unsigned int deleted = 0; struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, flow_cache_global); + unsigned int i; for (i = 0; i < flow_cache_hash_size(fc); i++) { - int saved = 0; + unsigned int saved = 0; hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { @@ -145,7 +147,7 @@ static void __flow_cache_shrink(struct flow_cache *fc, static void flow_cache_shrink(struct flow_cache *fc, struct flow_cache_percpu *fcp) { - int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); + unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); __flow_cache_shrink(fc, fcp, shrink_to); } @@ -161,7 +163,7 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, const struct flowi *key, - size_t keysize) + unsigned int keysize) { const u32 *k = (const u32 *) key; const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); @@ -174,7 +176,7 @@ static u32 flow_hash_code(struct flow_cache *fc, * important assumptions that we can here, such as alignment. */ static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, - size_t keysize) + unsigned int keysize) { const flow_compare_t *k1, *k1_lim, *k2; @@ -199,7 +201,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; struct flow_cache_object *flo; - size_t keysize; + unsigned int keysize; unsigned int hash; local_bh_disable(); @@ -295,9 +297,10 @@ static void flow_cache_flush_tasklet(unsigned long data) struct flow_cache_entry *fle; struct hlist_node *tmp; LIST_HEAD(gc_list); - int i, deleted = 0; + unsigned int deleted = 0; struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, flow_cache_global); + unsigned int i; fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { @@ -327,7 +330,7 @@ static void flow_cache_flush_tasklet(unsigned long data) static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp; - int i; + unsigned int i; fcp = per_cpu_ptr(fc->percpu, cpu); for (i = 0; i < flow_cache_hash_size(fc); i++) @@ -402,12 +405,12 @@ void flow_cache_flush_deferred(struct net *net) static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); + unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); if (!fcp->hash_table) { fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); if (!fcp->hash_table) { - pr_err("NET: failed to allocate flow cache sz %zu\n", sz); + pr_err("NET: failed to allocate flow cache sz %u\n", sz); return -ENOMEM; } fcp->hash_rnd_recalc = 1; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 5f3ae922fcd1..c9cf425303f8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -159,7 +159,7 @@ __skb_flow_dissect_arp(const struct sk_buff *skb, unsigned char ar_tip[4]; } *arp_eth, _arp_eth; const struct arphdr *arp; - struct arphdr *_arp; + struct arphdr _arp; if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP)) return FLOW_DISSECT_RET_OUT_GOOD; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7069f5e4a361..8ae87c591c8e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -861,7 +861,8 @@ static void neigh_probe(struct neighbour *neigh) if (skb) skb = skb_clone(skb, GFP_ATOMIC); write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); + if (neigh->ops->solicit) + neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); kfree_skb(skb); } diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 0f9275ee5595..1c4810919a0a 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/module.h> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9c3947a43eff..58419da7961b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4116,22 +4116,25 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_PRE_UP: - case NETDEV_POST_INIT: - case NETDEV_REGISTER: - case NETDEV_CHANGE: - case NETDEV_PRE_TYPE_CHANGE: - case NETDEV_GOING_DOWN: - case NETDEV_UNREGISTER: - case NETDEV_UNREGISTER_FINAL: - case NETDEV_RELEASE: - case NETDEV_JOIN: - case NETDEV_BONDING_INFO: + case NETDEV_REBOOT: + case NETDEV_CHANGEMTU: + case NETDEV_CHANGEADDR: + case NETDEV_CHANGENAME: + case NETDEV_FEAT_CHANGE: + case NETDEV_BONDING_FAILOVER: + case NETDEV_POST_TYPE_CHANGE: + case NETDEV_NOTIFY_PEERS: + case NETDEV_CHANGEUPPER: + case NETDEV_RESEND_IGMP: + case NETDEV_PRECHANGEMTU: + case NETDEV_CHANGEINFODATA: + case NETDEV_PRECHANGEUPPER: + case NETDEV_CHANGELOWERSTATE: + case NETDEV_UDP_TUNNEL_PUSH_INFO: + case NETDEV_CHANGE_TX_QUEUE_LEN: + rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; default: - rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); break; } return NOTIFY_DONE; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index fb87e78a2cc7..6bd2f8fb0476 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -20,9 +20,11 @@ #include <net/tcp.h> static siphash_key_t net_secret __read_mostly; +static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { + net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } #endif @@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) +static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +{ + const struct { + struct in6_addr saddr; + struct in6_addr daddr; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .daddr = *(struct in6_addr *)daddr, + }; + + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash(&combined, offsetofend(typeof(combined), daddr), + &ts_secret); +} + u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport, u32 *tsoff) { @@ -63,7 +82,7 @@ u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr, net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff); @@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET +static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +{ + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash_2u32((__force u32)saddr, (__force u32)daddr, + &ts_secret); +} /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, @@ -102,7 +129,7 @@ u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr, hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } diff --git a/net/core/sock.c b/net/core/sock.c index 1b9030ee6f4b..a06bb7a2a689 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1083,6 +1083,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, union { int val; + u64 val64; struct linger ling; struct timeval tm; } v; @@ -1328,6 +1329,25 @@ int sock_getsockopt(struct socket *sock, int level, int optname, goto lenout; } + +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_INCOMING_NAPI_ID: + v.val = READ_ONCE(sk->sk_napi_id); + + /* aggregate non-NAPI IDs down to 0 */ + if (v.val < MIN_NAPI_ID) + v.val = 0; + + break; +#endif + + case SO_COOKIE: + lv = sizeof(u64); + if (len < lv) + return -EINVAL; + v.val64 = sock_gen_cookie(sk); + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -2601,7 +2621,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; - sk->sk_stamp = ktime_set(-1L, 0); + sk->sk_stamp = SK_DEFAULT_STAMP; #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; @@ -3237,3 +3257,14 @@ static int __init proto_init(void) subsys_initcall(proto_init); #endif /* PROC_FS */ + +#ifdef CONFIG_NET_RX_BUSY_POLL +bool sk_busy_loop_end(void *p, unsigned long start_time) +{ + struct sock *sk = p; + + return !skb_queue_empty(&sk->sk_receive_queue) || + sk_busy_loop_timeout(sk, start_time); +} +EXPORT_SYMBOL(sk_busy_loop_end); +#endif /* CONFIG_NET_RX_BUSY_POLL */ diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 9a1a352fd1eb..eed1ebf7f29d 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -13,9 +13,9 @@ static DEFINE_SPINLOCK(reuseport_lock); -static struct sock_reuseport *__reuseport_alloc(u16 max_socks) +static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) { - size_t size = sizeof(struct sock_reuseport) + + unsigned int size = sizeof(struct sock_reuseport) + sizeof(struct sock *) * max_socks; struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 4ead336e14ea..7f9cc400eca0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "busy_read", .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, #endif #ifdef CONFIG_NET_SCHED diff --git a/net/core/utils.c b/net/core/utils.c index 6592d7bbed39..d758880c09a7 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(net_ratelimit); __be32 in_aton(const char *str) { - unsigned long l; + unsigned int l; unsigned int val; int i; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 9649238eef40..aa21f49f1215 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -6,7 +6,7 @@ config HAVE_NET_DSA config NET_DSA tristate "Distributed Switch Architecture" - depends on HAVE_NET_DSA + depends on HAVE_NET_DSA && MAY_USE_DEVLINK select NET_SWITCHDEV select PHYLIB ---help--- @@ -31,4 +31,6 @@ config NET_DSA_TAG_TRAILER config NET_DSA_TAG_QCA bool +config NET_DSA_TAG_MTK + bool endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 31d343796251..9b1d478f3713 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -8,3 +8,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o +dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index b6d4f6a23f06..1fb9cf7aaaf4 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -14,15 +14,17 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/module.h> -#include <net/dsa.h> #include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_platform.h> #include <linux/of_net.h> #include <linux/of_gpio.h> +#include <linux/netdevice.h> #include <linux/sysfs.h> #include <linux/phy_fixed.h> #include <linux/gpio/consumer.h> +#include <linux/etherdevice.h> +#include <net/dsa.h> #include "dsa_priv.h" static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, @@ -53,6 +55,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #ifdef CONFIG_NET_DSA_TAG_QCA [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, #endif +#ifdef CONFIG_NET_DSA_TAG_MTK + [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops, +#endif [DSA_TAG_PROTO_NONE] = &none_ops, }; @@ -896,13 +901,34 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; + struct sk_buff *nskb = NULL; if (unlikely(dst == NULL)) { kfree_skb(skb); return 0; } - return dst->rcv(skb, dev, pt, orig_dev); + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + return 0; + + nskb = dst->rcv(skb, dev, pt, orig_dev); + if (!nskb) { + kfree_skb(skb); + return 0; + } + + skb = nskb; + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; } static struct packet_type dsa_pack_type __read_mostly = { diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 737be6470c7f..033b3bfb63dc 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -13,16 +13,20 @@ #include <linux/device.h> #include <linux/err.h> #include <linux/list.h> +#include <linux/netdevice.h> #include <linux/slab.h> #include <linux/rtnetlink.h> -#include <net/dsa.h> #include <linux/of.h> #include <linux/of_net.h> +#include <net/dsa.h> #include "dsa_priv.h" static LIST_HEAD(dsa_switch_trees); static DEFINE_MUTEX(dsa2_mutex); +static const struct devlink_ops dsa_devlink_ops = { +}; + static struct dsa_switch_tree *dsa_get_dst(u32 tree) { struct dsa_switch_tree *dst; @@ -222,12 +226,18 @@ static int dsa_dsa_port_apply(struct dsa_port *port, u32 index, return err; } - return 0; + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + + return devlink_port_register(ds->devlink, + &ds->ports[index].devlink_port, + index); } static void dsa_dsa_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); dsa_cpu_dsa_destroy(port); } @@ -245,12 +255,17 @@ static int dsa_cpu_port_apply(struct dsa_port *port, u32 index, ds->cpu_port_mask |= BIT(index); - return 0; + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, + index); + return err; } static void dsa_cpu_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); dsa_cpu_dsa_destroy(port); ds->cpu_port_mask &= ~BIT(index); @@ -275,12 +290,23 @@ static int dsa_user_port_apply(struct dsa_port *port, u32 index, return err; } + memset(&ds->ports[index].devlink_port, 0, + sizeof(ds->ports[index].devlink_port)); + err = devlink_port_register(ds->devlink, &ds->ports[index].devlink_port, + index); + if (err) + return err; + + devlink_port_type_eth_set(&ds->ports[index].devlink_port, + ds->ports[index].netdev); + return 0; } static void dsa_user_port_unapply(struct dsa_port *port, u32 index, struct dsa_switch *ds) { + devlink_port_unregister(&ds->ports[index].devlink_port); if (ds->ports[index].netdev) { dsa_slave_destroy(ds->ports[index].netdev); ds->ports[index].netdev = NULL; @@ -301,6 +327,17 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) */ ds->phys_mii_mask = ds->enabled_port_mask; + /* Add the switch to devlink before calling setup, so that setup can + * add dpipe tables + */ + ds->devlink = devlink_alloc(&dsa_devlink_ops, 0); + if (!ds->devlink) + return -ENOMEM; + + err = devlink_register(ds->devlink, ds->dev); + if (err) + return err; + err = ds->ops->setup(ds); if (err < 0) return err; @@ -381,6 +418,13 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) mdiobus_unregister(ds->slave_mii_bus); dsa_switch_unregister_notifier(ds); + + if (ds->devlink) { + devlink_unregister(ds->devlink); + devlink_free(ds->devlink); + ds->devlink = NULL; + } + } static int dsa_dst_apply(struct dsa_switch_tree *dst) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 0706a511244e..107138a55bd8 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -17,8 +17,9 @@ struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); - int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev); }; struct dsa_slave_priv { @@ -85,4 +86,7 @@ extern const struct dsa_device_ops brcm_netdev_ops; /* tag_qca.c */ extern const struct dsa_device_ops qca_netdev_ops; +/* tag_mtk.c */ +extern const struct dsa_device_ops mtk_netdev_ops; + #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 78128acfbf63..7693182df81e 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -17,6 +17,7 @@ #include <linux/of_mdio.h> #include <linux/mdio.h> #include <linux/list.h> +#include <net/dsa.h> #include <net/rtnetlink.h> #include <net/switchdev.h> #include <net/pkt_cls.h> diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 6456dacf9ae9..ca6e26e514f0 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -1,7 +1,8 @@ /* * Handling of a single switch chip, part of a switch fabric * - * Copyright (c) 2017 Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot <vivien.didelot@savoirfairelinux.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,9 +20,9 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, if (ds->index == info->sw_index && ds->ops->port_bridge_join) return ds->ops->port_bridge_join(ds, info->port, info->br); - if (ds->index != info->sw_index) - dev_dbg(ds->dev, "crosschip DSA port %d.%d bridged to %s\n", - info->sw_index, info->port, netdev_name(info->br)); + if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join) + return ds->ops->crosschip_bridge_join(ds, info->sw_index, + info->port, info->br); return 0; } @@ -32,9 +33,9 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, if (ds->index == info->sw_index && ds->ops->port_bridge_leave) ds->ops->port_bridge_leave(ds, info->port, info->br); - if (ds->index != info->sw_index) - dev_dbg(ds->dev, "crosschip DSA port %d.%d unbridged from %s\n", - info->sw_index, info->port, netdev_name(info->br)); + if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave) + ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port, + info->br); return 0; } diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 5d925b6b2bb1..2a9b52c5af86 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -12,6 +12,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/dsa.h> #include "dsa_priv.h" /* This tag length is 4 bytes, older ones were 6 bytes, we do not @@ -91,23 +92,17 @@ out_free: return NULL; } -static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; int source_port; u8 *brcm_tag; - if (unlikely(dst == NULL)) - goto out_drop; - ds = dst->cpu_switch; - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) goto out_drop; @@ -139,22 +134,12 @@ static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->data - ETH_HLEN - BRCM_TAG_LEN, 2 * ETH_ALEN); - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; skb->dev = ds->ports[source_port].netdev; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops brcm_netdev_ops = { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 72579ceea381..1c6633f0de01 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -11,6 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/dsa.h> #include "dsa_priv.h" #define DSA_HLEN 4 @@ -67,8 +68,9 @@ out_free: return NULL; } -static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -76,13 +78,6 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_device; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) goto out_drop; @@ -164,21 +159,11 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, } skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops dsa_netdev_ops = { diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 648c051817a1..d9c668aa5e54 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -11,6 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/dsa.h> #include "dsa_priv.h" #define DSA_HLEN 4 @@ -80,8 +81,9 @@ out_free: return NULL; } -static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -89,13 +91,6 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, int source_device; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) goto out_drop; @@ -183,21 +178,11 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, } skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops edsa_netdev_ops = { diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c new file mode 100644 index 000000000000..837cdddb53f0 --- /dev/null +++ b/net/dsa/tag_mtk.c @@ -0,0 +1,100 @@ +/* + * Mediatek DSA Tag support + * Copyright (C) 2017 Landen Chao <landen.chao@mediatek.com> + * Sean Wang <sean.wang@mediatek.com> + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/etherdevice.h> +#include <net/dsa.h> +#include "dsa_priv.h" + +#define MTK_HDR_LEN 4 +#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) +#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) + +static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *mtk_tag; + + if (skb_cow_head(skb, MTK_HDR_LEN) < 0) + goto out_free; + + skb_push(skb, MTK_HDR_LEN); + + memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); + + /* Build the tag after the MAC Source Address */ + mtk_tag = skb->data + 2 * ETH_ALEN; + mtk_tag[0] = 0; + mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + mtk_tag[2] = 0; + mtk_tag[3] = 0; + + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + +static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + int port; + __be16 *phdr, hdr; + + if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) + goto out_drop; + + /* The MTK header is added by the switch between src addr + * and ethertype at this point, skb->data points to 2 bytes + * after src addr so header should be 2 bytes right before. + */ + phdr = (__be16 *)(skb->data - 2); + hdr = ntohs(*phdr); + + /* Remove MTK tag and recalculate checksum. */ + skb_pull_rcsum(skb, MTK_HDR_LEN); + + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - MTK_HDR_LEN, + 2 * ETH_ALEN); + + /* This protocol doesn't support cascading multiple + * switches so it's safe to assume the switch is first + * in the tree. + */ + ds = dst->ds[0]; + if (!ds) + goto out_drop; + + /* Get source port information */ + port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); + if (!ds->ports[port].netdev) + goto out_drop; + + skb->dev = ds->ports[port].netdev; + + return skb; + +out_drop: + return NULL; +} + +const struct dsa_device_ops mtk_netdev_ops = { + .xmit = mtk_tag_xmit, + .rcv = mtk_tag_rcv, +}; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 30240f343aea..3ba3f59f7a34 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -12,6 +12,7 @@ */ #include <linux/etherdevice.h> +#include <net/dsa.h> #include "dsa_priv.h" #define QCA_HDR_LEN 2 @@ -65,8 +66,9 @@ out_free: return NULL; } -static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; @@ -74,13 +76,6 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, int port; __be16 *phdr, hdr; - if (unlikely(!dst)) - goto out_drop; - - skb = skb_unshare(skb, GFP_ATOMIC); - if (!skb) - goto out; - if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) goto out_drop; @@ -114,22 +109,12 @@ static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, goto out_drop; /* Update skb & forward the frame accordingly */ - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; skb->dev = ds->ports[port].netdev; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops qca_netdev_ops = { diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 26f977176978..aafc2fc74c30 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -11,6 +11,7 @@ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> +#include <net/dsa.h> #include "dsa_priv.h" static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) @@ -57,22 +58,17 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) return nskb; } -static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, + struct net_device *orig_dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_switch *ds; u8 *trailer; int source_port; - if (unlikely(dst == NULL)) - goto out_drop; ds = dst->cpu_switch; - skb = skb_unshare(skb, GFP_ATOMIC); - if (skb == NULL) - goto out; - if (skb_linearize(skb)) goto out_drop; @@ -88,21 +84,11 @@ static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, pskb_trim_rcsum(skb, skb->len - 4); skb->dev = ds->ports[source_port].netdev; - skb_push(skb, ETH_HLEN); - skb->pkt_type = PACKET_HOST; - skb->protocol = eth_type_trans(skb, skb->dev); - - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; - netif_receive_skb(skb); - - return 0; + return skb; out_drop: - kfree_skb(skb); -out: - return 0; + return NULL; } const struct dsa_device_ops trailer_netdev_ops = { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6b1fc6e4278e..d1a11707a126 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1599,8 +1599,9 @@ static const struct net_protocol igmp_protocol = { }; #endif -static const struct net_protocol tcp_protocol = { +static struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, + .early_demux_handler = tcp_v4_early_demux, .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, @@ -1608,8 +1609,9 @@ static const struct net_protocol tcp_protocol = { .icmp_strict_tag_validation = 1, }; -static const struct net_protocol udp_protocol = { +static struct net_protocol udp_protocol = { .early_demux = udp_v4_early_demux, + .early_demux_handler = udp_v4_early_demux, .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, @@ -1720,6 +1722,8 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; + net->ipv4.sysctl_udp_early_demux = 1; + net->ipv4.sysctl_tcp_early_demux = 1; #ifdef CONFIG_SYSCTL net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 927f1d4b8c80..6d3602ec640c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1802,6 +1802,9 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; + if (!devconf) + goto out; + if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, IPV4_DEVCONF(*devconf, FORWARDING)) < 0) @@ -1823,6 +1826,7 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0) goto nla_put_failure; +out: nlmsg_end(skb, nlh); return 0; @@ -1831,8 +1835,8 @@ nla_put_failure: return -EMSGSIZE; } -void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv4_devconf *devconf) +void inet_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv4_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; @@ -1842,7 +1846,7 @@ void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, goto errout; err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, - RTM_NEWNETCONF, 0, type); + event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -2021,10 +2025,12 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt); @@ -2037,7 +2043,8 @@ static void inet_forward_change(struct net *net) in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, dev->ifindex, &in_dev->cnf); } } @@ -2082,19 +2089,22 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, if (i == IPV4_DEVCONF_RP_FILTER - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_RP_FILTER, ifindex, cnf); } if (i == IPV4_DEVCONF_PROXY_ARP - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, ifindex, cnf); } if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 && new_value != old_value) { ifindex = devinet_conf_ifindex(net, cnf); - inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, ifindex, cnf); } } @@ -2129,7 +2139,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, container_of(cnf, struct in_device, cnf); if (*valp) dev_disable_lro(idev->dev); - inet_netconf_notify_devconf(net, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_FORWARDING, idev->dev->ifindex, cnf); @@ -2137,7 +2147,8 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, rtnl_unlock(); rt_cache_flush(net); } else - inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv4.devconf_dflt); } @@ -2259,7 +2270,8 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, p->sysctl = t; - inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, + ifindex, p); return 0; free: @@ -2268,16 +2280,18 @@ out: return -ENOBUFS; } -static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) +static void __devinet_sysctl_unregister(struct net *net, + struct ipv4_devconf *cnf, int ifindex) { struct devinet_sysctl_table *t = cnf->sysctl; - if (!t) - return; + if (t) { + cnf->sysctl = NULL; + unregister_net_sysctl_table(t->sysctl_header); + kfree(t); + } - cnf->sysctl = NULL; - unregister_net_sysctl_table(t->sysctl_header); - kfree(t); + inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); } static int devinet_sysctl_register(struct in_device *idev) @@ -2299,7 +2313,9 @@ static int devinet_sysctl_register(struct in_device *idev) static void devinet_sysctl_unregister(struct in_device *idev) { - __devinet_sysctl_unregister(&idev->cnf); + struct net *net = dev_net(idev->dev); + + __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex); neigh_sysctl_unregister(idev->arp_parms); } @@ -2374,9 +2390,9 @@ static __net_init int devinet_init_net(struct net *net) #ifdef CONFIG_SYSCTL err_reg_ctl: - __devinet_sysctl_unregister(dflt); + __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT); err_reg_dflt: - __devinet_sysctl_unregister(all); + __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: if (tbl != ctl_forward_entry) kfree(tbl); @@ -2398,8 +2414,10 @@ static __net_exit void devinet_exit_net(struct net *net) tbl = net->ipv4.forw_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.forw_hdr); - __devinet_sysctl_unregister(net->ipv4.devconf_dflt); - __devinet_sysctl_unregister(net->ipv4.devconf_all); + __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt, + NETCONFA_IFINDEX_DEFAULT); + __devinet_sysctl_unregister(net, net->ipv4.devconf_all, + NETCONFA_IFINDEX_ALL); kfree(tbl); #endif kfree(net->ipv4.devconf_dflt); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d6feabb03516..fa2dc8f692c6 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -313,6 +313,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct net_device *dev = skb->dev; + void (*edemux)(struct sk_buff *skb); /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -329,8 +330,8 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) int protocol = iph->protocol; ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && ipprot->early_demux) { - ipprot->early_demux(skb); + if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { + edemux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index fd9f34bbd740..c3b12b1c7162 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -57,6 +57,7 @@ #include <linux/export.h> #include <net/net_namespace.h> #include <net/arp.h> +#include <net/dsa.h> #include <net/ip.h> #include <net/ipconfig.h> #include <net/route.h> @@ -306,7 +307,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { + if (d != ic_dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c0317c940bcd..5bca64fc71b7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -631,7 +631,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; - inet_netconf_notify_devconf(dev_net(dev), + inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); @@ -820,8 +820,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRNOTAVAIL; } IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex, - &in_dev->cnf); + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, + dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); /* Fill in the VIF structures */ @@ -1282,7 +1282,8 @@ static void mrtsock_destruct(struct sock *sk) ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); @@ -1344,7 +1345,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (ret == 0) { rcu_assign_pointer(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; - inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ef49989c93b1..da04b9c33ef3 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1249,16 +1249,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper snmp_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .me = THIS_MODULE, .help = help, @@ -1277,22 +1267,16 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { static int __init nf_nat_snmp_basic_init(void) { - int ret = 0; - BUG_ON(nf_nat_snmp_hook != NULL); RCU_INIT_POINTER(nf_nat_snmp_hook, help); - ret = nf_conntrack_helper_register(&snmp_trap_helper); - if (ret < 0) { - nf_conntrack_helper_unregister(&snmp_helper); - return ret; - } - return ret; + return nf_conntrack_helper_register(&snmp_trap_helper); } static void __exit nf_nat_snmp_basic_fini(void) { RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2af6244b83e2..ccfbce13a633 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -156,17 +156,18 @@ int ping_hash(struct sock *sk) void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + write_lock_bh(&ping_table.lock); if (sk_hashed(sk)) { - write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&ping_table.lock); } + write_unlock_bh(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_unhash); diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 4b7c0ec65251..32a691b7ce2c 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -28,7 +28,7 @@ #include <linux/spinlock.h> #include <net/protocol.h> -const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; +struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; EXPORT_SYMBOL(inet_offloads); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5dda1ef81c7e..5e1e60546fce 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2667,10 +2667,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) skb_reset_mac_header(skb); skb_reset_network_header(skb); - /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */ - ip_hdr(skb)->protocol = IPPROTO_ICMP; - skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; @@ -2680,6 +2676,15 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) else uid = (iif ? INVALID_UID : current_uid()); + /* Bugfix: need to give ip_route_input enough of an IP header to + * not gag. + */ + ip_hdr(skb)->protocol = IPPROTO_UDP; + ip_hdr(skb)->saddr = src; + ip_hdr(skb)->daddr = dst; + + skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); + memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; fl4.saddr = src; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 711c3e2e17b1..6fb25693c00b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -24,6 +24,7 @@ #include <net/cipso_ipv4.h> #include <net/inet_frag.h> #include <net/ping.h> +#include <net/protocol.h> static int zero; static int one = 1; @@ -294,6 +295,58 @@ bad_key: return ret; } +static void proc_configure_early_demux(int enabled, int protocol) +{ + struct net_protocol *ipprot; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_protocol *ip6prot; +#endif + + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot) + ipprot->early_demux = enabled ? ipprot->early_demux_handler : + NULL; + +#if IS_ENABLED(CONFIG_IPV6) + ip6prot = rcu_dereference(inet6_protos[protocol]); + if (ip6prot) + ip6prot->early_demux = enabled ? ip6prot->early_demux_handler : + NULL; +#endif +} + +static int proc_tcp_early_demux(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = 0; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (write && !ret) { + int enabled = init_net.ipv4.sysctl_tcp_early_demux; + + proc_configure_early_demux(enabled, IPPROTO_TCP); + } + + return ret; +} + +static int proc_udp_early_demux(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = 0; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (write && !ret) { + int enabled = init_net.ipv4.sysctl_udp_early_demux; + + proc_configure_early_demux(enabled, IPPROTO_UDP); + } + + return ret; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -750,6 +803,20 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "udp_early_demux", + .data = &init_net.ipv4.sysctl_udp_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_udp_early_demux + }, + { + .procname = "tcp_early_demux", + .data = &init_net.ipv4.sysctl_tcp_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tcp_early_demux + }, + { .procname = "ip_default_ttl", .data = &init_net.ipv4.sysctl_ip_default_ttl, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1665948dff8c..94f0b5b50e0d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2393,7 +2393,7 @@ static int tcp_repair_options_est(struct tcp_sock *tp, u16 snd_wscale = opt.opt_val & 0xFFFF; u16 rcv_wscale = opt.opt_val >> 16; - if (snd_wscale > 14 || rcv_wscale > 14) + if (snd_wscale > TCP_MAX_WSCALE || rcv_wscale > TCP_MAX_WSCALE) return -EFBIG; tp->rx_opt.snd_wscale = snd_wscale; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a75c48f62e27..31f2765ef851 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ -static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) +static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb, + unsigned int len) { static bool __once __read_mostly; @@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); - pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", - dev ? dev->name : "Unknown driver"); + if (!dev || len >= dev->mtu) + pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", + dev ? dev->name : "Unknown driver"); rcu_read_unlock(); } } @@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); - if (unlikely(icsk->icsk_ack.rcv_mss != len)) - tcp_gro_dev_warn(sk, skb); + /* Account for possibly-removed options */ + if (unlikely(len > icsk->icsk_ack.rcv_mss + + MAX_TCP_OPTION_SPACE)) + tcp_gro_dev_warn(sk, skb, len); } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. @@ -874,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric, const int ts) { struct tcp_sock *tp = tcp_sk(sk); - if (metric > tp->reordering) { - int mib_idx; + int mib_idx; + if (metric > tp->reordering) { tp->reordering = min(sysctl_tcp_max_reordering, metric); - /* This exciting event is worth to be remembered. 8) */ - if (ts) - mib_idx = LINUX_MIB_TCPTSREORDER; - else if (tcp_is_reno(tp)) - mib_idx = LINUX_MIB_TCPRENOREORDER; - else if (tcp_is_fack(tp)) - mib_idx = LINUX_MIB_TCPFACKREORDER; - else - mib_idx = LINUX_MIB_TCPSACKREORDER; - - NET_INC_STATS(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -902,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } tp->rack.reord = 1; + + /* This exciting event is worth to be remembered. 8) */ + if (ts) + mib_idx = LINUX_MIB_TCPTSREORDER; + else if (tcp_is_reno(tp)) + mib_idx = LINUX_MIB_TCPRENOREORDER; + else if (tcp_is_fack(tp)) + mib_idx = LINUX_MIB_TCPFACKREORDER; + else + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS(sock_net(sk), mib_idx); } /* This must be called before lost_out is incremented */ @@ -3759,11 +3764,12 @@ void tcp_parse_options(const struct sk_buff *skb, !estab && sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; - if (snd_wscale > 14) { - net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n", + if (snd_wscale > TCP_MAX_WSCALE) { + net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n", __func__, - snd_wscale); - snd_wscale = 14; + snd_wscale, + TCP_MAX_WSCALE); + snd_wscale = TCP_MAX_WSCALE; } opt_rx->snd_wscale = snd_wscale; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7482b5d11861..20cbd2f07f28 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1409,8 +1409,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (!nsk) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1e217948be62..8f6373b0cd77 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -26,6 +26,7 @@ #include <net/tcp.h> #include <net/inet_common.h> #include <net/xfrm.h> +#include <net/busy_poll.h> int sysctl_tcp_abort_on_overflow __read_mostly; @@ -799,6 +800,9 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; + /* record NAPI ID of child */ + sk_mark_napi_id(child, skb); + tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 22548b5f05cb..0e807a83c1bc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -212,12 +212,12 @@ void tcp_select_initial_window(int __space, __u32 mss, /* If no clamp set the clamp to the max possible scaled window */ if (*window_clamp == 0) - (*window_clamp) = (65535 << 14); + (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE); space = min(*window_clamp, space); /* Quantize space offering to a multiple of mss if possible. */ if (space > mss) - space = (space / mss) * mss; + space = rounddown(space, mss); /* NOTE: offering an initial window larger than 32767 * will break some buggy TCP stacks. If the admin tells us @@ -234,13 +234,11 @@ void tcp_select_initial_window(int __space, __u32 mss, (*rcv_wscale) = 0; if (wscale_ok) { - /* Set window scaling on max possible window - * See RFC1323 for an explanation of the limit to 14 - */ + /* Set window scaling on max possible window */ space = max_t(u32, space, sysctl_tcp_rmem[2]); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); - while (space > 65535 && (*rcv_wscale) < 14) { + while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { space >>= 1; (*rcv_wscale)++; } @@ -253,7 +251,7 @@ void tcp_select_initial_window(int __space, __u32 mss, } /* Set the clamp no higher than max representable value */ - (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); + (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); } EXPORT_SYMBOL(tcp_select_initial_window); @@ -2561,7 +2559,6 @@ u32 __tcp_select_window(struct sock *sk) /* Don't do rounding if we are using window scaling, since the * scaled window will not line up with the MSS boundary anyway. */ - window = tp->rcv_wnd; if (tp->rx_opt.rcv_wscale) { window = free_space; @@ -2569,10 +2566,9 @@ u32 __tcp_select_window(struct sock *sk) * Import case: prevent zero window announcement if * 1<<rcv_wscale > mss. */ - if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window) - window = (((window >> tp->rx_opt.rcv_wscale) + 1) - << tp->rx_opt.rcv_wscale); + window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale)); } else { + window = tp->rcv_wnd; /* Get the largest window that is a nice multiple of mss. * Window clamp already applied above. * If our current window offering is within 1 mss of the @@ -2582,7 +2578,7 @@ u32 __tcp_select_window(struct sock *sk) * is too small. */ if (window <= free_space - mss || window > free_space) - window = (free_space / mss) * mss; + window = rounddown(free_space, mss); else if (mss == full_space && free_space > window + (full_space >> 1)) window = free_space; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 4ecb38ae8504..d8acbd9f477a 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) /* Account for retransmits that are lost again */ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, + tcp_skb_pcount(skb)); } } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e2afe677a9d9..48c452959d2c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -307,6 +307,7 @@ config IPV6_SEG6_LWTUNNEL bool "IPv6: Segment Routing Header encapsulation support" depends on IPV6 select LWTUNNEL + select DST_CACHE ---help--- Support for encapsulation of packets within an outer IPv6 header and a Segment Routing Header using the lightweight diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dff5beb26a01..67ec87ea5fb6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -549,6 +549,9 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) goto nla_put_failure; + if (!devconf) + goto out; + if ((all || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; @@ -567,6 +570,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, devconf->ignore_routes_with_linkdown) < 0) goto nla_put_failure; +out: nlmsg_end(skb, nlh); return 0; @@ -575,8 +579,8 @@ nla_put_failure: return -EMSGSIZE; } -void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv6_devconf *devconf) +void inet6_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv6_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; @@ -586,7 +590,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, - RTM_NEWNETCONF, 0, type); + event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -769,7 +773,8 @@ static void dev_forward_change(struct inet6_dev *idev) else addrconf_leave_anycast(ifa); } - inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, + NETCONFA_FORWARDING, dev->ifindex, &idev->cnf); } @@ -804,7 +809,8 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_dflt->forwarding) { if ((!newf) ^ (!old)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); rtnl_unlock(); @@ -816,13 +822,15 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) net->ipv6.devconf_dflt->forwarding = newf; if ((!newf) ^ (!old_dflt)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); addrconf_forward_change(net, newf); if ((!newf) ^ (!old)) - inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) @@ -847,6 +855,7 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf) idev->cnf.ignore_routes_with_linkdown = newf; if (changed) inet6_netconf_notify_devconf(dev_net(dev), + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, dev->ifindex, &idev->cnf); @@ -869,6 +878,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) { if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); @@ -881,6 +891,7 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf) addrconf_linkdown_change(net, newf); if ((!newf) ^ (!old)) inet6_netconf_notify_devconf(net, + RTM_NEWNETCONF, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); @@ -5675,17 +5686,20 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return restart_syscall(); if (valp == &net->ipv6.devconf_dflt->proxy_ndp) - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, NETCONFA_IFINDEX_DEFAULT, net->ipv6.devconf_dflt); else if (valp == &net->ipv6.devconf_all->proxy_ndp) - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); else { struct inet6_dev *idev = ctl->extra1; - inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_PROXY_NEIGH, idev->dev->ifindex, &idev->cnf); } @@ -6348,7 +6362,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, ifindex = NETCONFA_IFINDEX_DEFAULT; else ifindex = idev->dev->ifindex; - inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p); + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, + ifindex, p); return 0; free: @@ -6357,7 +6372,8 @@ out: return -ENOBUFS; } -static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) +static void __addrconf_sysctl_unregister(struct net *net, + struct ipv6_devconf *p, int ifindex) { struct ctl_table *table; @@ -6368,6 +6384,8 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) unregister_net_sysctl_table(p->sysctl_header); p->sysctl_header = NULL; kfree(table); + + inet6_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL); } static int addrconf_sysctl_register(struct inet6_dev *idev) @@ -6391,7 +6409,8 @@ static int addrconf_sysctl_register(struct inet6_dev *idev) static void addrconf_sysctl_unregister(struct inet6_dev *idev) { - __addrconf_sysctl_unregister(&idev->cnf); + __addrconf_sysctl_unregister(dev_net(idev->dev), &idev->cnf, + idev->dev->ifindex); neigh_sysctl_unregister(idev->nd_parms); } @@ -6434,7 +6453,7 @@ static int __net_init addrconf_init_net(struct net *net) #ifdef CONFIG_SYSCTL err_reg_dflt: - __addrconf_sysctl_unregister(all); + __addrconf_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: kfree(dflt); #endif @@ -6447,8 +6466,10 @@ err_alloc_all: static void __net_exit addrconf_exit_net(struct net *net) { #ifdef CONFIG_SYSCTL - __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); - __addrconf_sysctl_unregister(net->ipv6.devconf_all); + __addrconf_sysctl_unregister(net, net->ipv6.devconf_dflt, + NETCONFA_IFINDEX_DEFAULT); + __addrconf_sysctl_unregister(net, net->ipv6.devconf_all, + NETCONFA_IFINDEX_ALL); #endif kfree(net->ipv6.devconf_dflt); kfree(net->ipv6.devconf_all); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a9a9553ee63d..1635d218735e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1005,6 +1005,10 @@ static int __init inet6_init(void) if (err) goto seg6_fail; + err = igmp6_late_init(); + if (err) + goto igmp6_late_err; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1015,8 +1019,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - seg6_exit(); + igmp6_late_cleanup(); #endif +igmp6_late_err: + seg6_exit(); seg6_fail: calipso_exit(); calipso_fail: diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacfb4bce153..b04539dd4629 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,6 +49,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + void (*edemux)(struct sk_buff *skb); + /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ @@ -60,8 +62,8 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && ipprot->early_demux) - ipprot->early_demux(skb); + if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) + edemux(skb); } if (!skb_valid_dst(skb)) ip6_route_input(skb); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6ba6c900ebcf..fb4546e80c82 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -815,7 +815,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) in6_dev = __in6_dev_get(dev); if (in6_dev) { in6_dev->cnf.mc_forwarding--; - inet6_netconf_notify_devconf(dev_net(dev), + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); } @@ -974,7 +974,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, in6_dev = __in6_dev_get(dev); if (in6_dev) { in6_dev->cnf.mc_forwarding++; - inet6_netconf_notify_devconf(dev_net(dev), + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, dev->ifindex, &in6_dev->cnf); } @@ -1599,7 +1599,8 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) write_unlock_bh(&mrt_lock); if (!err) - inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); rtnl_unlock(); @@ -1620,7 +1621,7 @@ int ip6mr_sk_done(struct sock *sk) mrt->mroute6_sk = NULL; net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); - inet6_netconf_notify_devconf(net, + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, net->ipv6.devconf_all); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 1bdc703cb966..07403fa164e1 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2463,7 +2463,6 @@ static void mld_ifc_event(struct inet6_dev *idev) mld_ifc_start_timer(idev, 1); } - static void igmp6_timer_handler(unsigned long data) { struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data; @@ -2599,6 +2598,44 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) write_unlock_bh(&idev->lock); } +static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) +{ + struct ifmcaddr6 *pmc; + + ASSERT_RTNL(); + + if (mld_in_v1_mode(idev)) { + read_lock_bh(&idev->lock); + for (pmc = idev->mc_list; pmc; pmc = pmc->next) + igmp6_join_group(pmc); + read_unlock_bh(&idev->lock); + } else + mld_send_report(idev, NULL); +} + +static int ipv6_mc_netdev_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct inet6_dev *idev = __in6_dev_get(dev); + + switch (event) { + case NETDEV_RESEND_IGMP: + if (idev) + ipv6_mc_rejoin_groups(idev); + break; + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block igmp6_netdev_notifier = { + .notifier_call = ipv6_mc_netdev_event, +}; + #ifdef CONFIG_PROC_FS struct igmp6_mc_iter_state { struct seq_net_private p; @@ -2970,7 +3007,17 @@ int __init igmp6_init(void) return register_pernet_subsys(&igmp6_net_ops); } +int __init igmp6_late_init(void) +{ + return register_netdevice_notifier(&igmp6_netdev_notifier); +} + void igmp6_cleanup(void) { unregister_pernet_subsys(&igmp6_net_ops); } + +void igmp6_late_cleanup(void) +{ + unregister_netdevice_notifier(&igmp6_netdev_notifier); +} diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index e3770abe688a..b5d54d4f995c 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -26,7 +26,7 @@ #include <net/protocol.h> #if IS_ENABLED(CONFIG_IPV6) -const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; +struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; EXPORT_SYMBOL(inet6_protos); int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 85582257d3af..a644aaecdfd3 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -26,17 +26,13 @@ #include <linux/seg6_iptunnel.h> #include <net/addrconf.h> #include <net/ip6_route.h> -#ifdef CONFIG_DST_CACHE #include <net/dst_cache.h> -#endif #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif struct seg6_lwt { -#ifdef CONFIG_DST_CACHE struct dst_cache cache; -#endif struct seg6_iptunnel_encap tuninfo[0]; }; @@ -105,7 +101,7 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) hdrlen = (osrh->hdrlen + 1) << 3; tot_len = hdrlen + sizeof(*hdr); - err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + err = skb_cow_head(skb, tot_len); if (unlikely(err)) return err; @@ -156,7 +152,7 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) hdrlen = (osrh->hdrlen + 1) << 3; - err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + err = skb_cow_head(skb, hdrlen); if (unlikely(err)) return err; @@ -237,6 +233,9 @@ static int seg6_do_srh(struct sk_buff *skb) static int seg6_input(struct sk_buff *skb) { + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; int err; err = seg6_do_srh(skb); @@ -245,8 +244,26 @@ static int seg6_input(struct sk_buff *skb) return err; } + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + + preempt_disable(); + dst = dst_cache_get(&slwt->cache); + preempt_enable(); + skb_dst_drop(skb); - ip6_route_input(skb); + + if (!dst) { + ip6_route_input(skb); + dst = skb_dst(skb); + if (!dst->error) { + preempt_disable(); + dst_cache_set_ip6(&slwt->cache, dst, + &ipv6_hdr(skb)->saddr); + preempt_enable(); + } + } else { + skb_dst_set(skb, dst); + } return dst_input(skb); } @@ -264,11 +281,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); -#ifdef CONFIG_DST_CACHE preempt_disable(); dst = dst_cache_get(&slwt->cache); preempt_enable(); -#endif if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -287,11 +302,9 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } -#ifdef CONFIG_DST_CACHE preempt_disable(); dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); preempt_enable(); -#endif } skb_dst_drop(skb); @@ -355,13 +368,11 @@ static int seg6_build_state(struct nlattr *nla, slwt = seg6_lwt_lwtunnel(newts); -#ifdef CONFIG_DST_CACHE err = dst_cache_init(&slwt->cache, GFP_KERNEL); if (err) { kfree(newts); return err; } -#endif memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); @@ -375,12 +386,10 @@ static int seg6_build_state(struct nlattr *nla, return 0; } -#ifdef CONFIG_DST_CACHE static void seg6_destroy_state(struct lwtunnel_state *lwt) { dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); } -#endif static int seg6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) @@ -414,9 +423,7 @@ static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops seg6_iptun_ops = { .build_state = seg6_build_state, -#ifdef CONFIG_DST_CACHE .destroy_state = seg6_destroy_state, -#endif .output = seg6_output, .input = seg6_input, .fill_encap = seg6_fill_encap_info, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0f08d718a002..8e42e8f54b70 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1293,8 +1293,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1925,8 +1923,9 @@ struct proto tcpv6_prot = { .diag_destroy = tcp_abort, }; -static const struct inet6_protocol tcpv6_protocol = { +static struct inet6_protocol tcpv6_protocol = { .early_demux = tcp_v6_early_demux, + .early_demux_handler = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b793ed1d2a36..fd4b1c98a472 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1436,8 +1436,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, } #endif -static const struct inet6_protocol udpv6_protocol = { +static struct inet6_protocol udpv6_protocol = { .early_demux = udp_v6_early_demux, + .early_demux_handler = udp_v6_early_demux, .handler = udpv6_rcv, .err_handler = udpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 309062f3debe..31762f76cdb5 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_attach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_attach_ioctl(sock, &info); @@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_unattach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_unattach_ioctl(sock, &info); @@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct socket *newsock = NULL; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_clone(sock, &info, &newsock); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8adab6335ced..e37d9554da7b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -278,7 +278,57 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn } EXPORT_SYMBOL_GPL(l2tp_session_find); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) +/* Like l2tp_session_find() but takes a reference on the returned session. + * Optionally calls session->ref() too if do_ref is true. + */ +struct l2tp_session *l2tp_session_get(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref) +{ + struct hlist_head *session_list; + struct l2tp_session *session; + + if (!tunnel) { + struct l2tp_net *pn = l2tp_pernet(net); + + session_list = l2tp_session_id_hash_2(pn, session_id); + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(session, session_list, global_hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + rcu_read_unlock_bh(); + + return session; + } + } + rcu_read_unlock_bh(); + + return NULL; + } + + session_list = l2tp_session_id_hash(tunnel, session_id); + read_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session, session_list, hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + read_unlock_bh(&tunnel->hlist_lock); + + return session; + } + } + read_unlock_bh(&tunnel->hlist_lock); + + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_session_get); + +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref) { int hash; struct l2tp_session *session; @@ -288,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; } @@ -298,12 +351,13 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_nth); +EXPORT_SYMBOL_GPL(l2tp_session_get_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. */ -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) +struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, + bool do_ref) { struct l2tp_net *pn = l2tp_pernet(net); int hash; @@ -313,7 +367,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); rcu_read_unlock_bh(); + return session; } } @@ -323,7 +381,49 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname); +EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); + +static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, + struct l2tp_session *session) +{ + struct l2tp_session *session_walk; + struct hlist_head *g_head; + struct hlist_head *head; + struct l2tp_net *pn; + + head = l2tp_session_id_hash(tunnel, session->session_id); + + write_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session_walk, head, hlist) + if (session_walk->session_id == session->session_id) + goto exist; + + if (tunnel->version == L2TP_HDR_VER_3) { + pn = l2tp_pernet(tunnel->l2tp_net); + g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net), + session->session_id); + + spin_lock_bh(&pn->l2tp_session_hlist_lock); + hlist_for_each_entry(session_walk, g_head, global_hlist) + if (session_walk->session_id == session->session_id) + goto exist_glob; + + hlist_add_head_rcu(&session->global_hlist, g_head); + spin_unlock_bh(&pn->l2tp_session_hlist_lock); + } + + hlist_add_head(&session->hlist, head); + write_unlock_bh(&tunnel->hlist_lock); + + return 0; + +exist_glob: + spin_unlock_bh(&pn->l2tp_session_hlist_lock); +exist: + write_unlock_bh(&tunnel->hlist_lock); + + return -EEXIST; +} /* Lookup a tunnel by id */ @@ -633,6 +733,9 @@ discard: * a data (not control) frame before coming here. Fields up to the * session-id have already been parsed and ptr points to the data * after the session-id. + * + * session->ref() must have been called prior to l2tp_recv_common(). + * session->deref() will be called automatically after skb is processed. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, @@ -642,14 +745,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int offset; u32 ns, nr; - /* The ref count is increased since we now hold a pointer to - * the session. Take care to decrement the refcnt when exiting - * this function from now on... - */ - l2tp_session_inc_refcount(session); - if (session->ref) - (*session->ref)(session); - /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { @@ -802,8 +897,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Try to dequeue as many skbs from reorder_q as we can. */ l2tp_recv_dequeue(session); - l2tp_session_dec_refcount(session); - return; discard: @@ -812,8 +905,6 @@ discard: if (session->deref) (*session->deref)(session); - - l2tp_session_dec_refcount(session); } EXPORT_SYMBOL(l2tp_recv_common); @@ -920,8 +1011,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } /* Find the session context */ - session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id); + session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); if (!session || !session->recv_skb) { + if (session) { + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } + /* Not found? Pass to userspace to deal with */ l2tp_info(tunnel, L2TP_MSG_DATA, "%s: no session found (%u/%u). Passing up.\n", @@ -930,6 +1027,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook); + l2tp_session_dec_refcount(session); return 0; @@ -1738,6 +1836,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; + int err; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); if (session != NULL) { @@ -1793,6 +1892,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn l2tp_session_set_header_len(session, tunnel->version); + err = l2tp_session_add_to_tunnel(tunnel, session); + if (err) { + kfree(session); + + return ERR_PTR(err); + } + /* Bump the reference count. The session context is deleted * only when this drops to zero. */ @@ -1802,28 +1908,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn /* Ensure tunnel socket isn't deleted */ sock_hold(tunnel->sock); - /* Add session to the tunnel's hash list */ - write_lock_bh(&tunnel->hlist_lock); - hlist_add_head(&session->hlist, - l2tp_session_id_hash(tunnel, session_id)); - write_unlock_bh(&tunnel->hlist_lock); - - /* And to the global session list if L2TPv3 */ - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_add_head_rcu(&session->global_hlist, - l2tp_session_id_hash_2(pn, session_id)); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - } - /* Ignore management session in session count value */ if (session->session_id != 0) atomic_inc(&l2tp_session_count); + + return session; } - return session; + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(l2tp_session_create); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index aebf281d09ee..8ce7818c7a9d 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -230,11 +230,16 @@ out: return tunnel; } +struct l2tp_session *l2tp_session_get(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref); struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref); +struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, + bool do_ref); struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 2d6760a2ae34..d100aed3d06f 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) } /* Show the tunnel or session context */ - if (pd->session == NULL) + if (!pd->session) { l2tp_dfs_seq_tunnel_show(m, pd->tunnel); - else + } else { l2tp_dfs_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 8bf18a5f66e0..138566a63123 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -30,6 +30,9 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> #include "l2tp_core.h" @@ -204,6 +207,53 @@ static void l2tp_eth_show(struct seq_file *m, void *arg) } #endif +static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel, + struct l2tp_session *session, + struct net_device *dev) +{ + unsigned int overhead = 0; + struct dst_entry *dst; + u32 l3_overhead = 0; + + /* if the encap is UDP, account for UDP header size */ + if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { + overhead += sizeof(struct udphdr); + dev->needed_headroom += sizeof(struct udphdr); + } + if (session->mtu != 0) { + dev->mtu = session->mtu; + dev->needed_headroom += session->hdr_len; + return; + } + l3_overhead = kernel_sock_ip_overhead(tunnel->sock); + if (l3_overhead == 0) { + /* L3 Overhead couldn't be identified, this could be + * because tunnel->sock was NULL or the socket's + * address family was not IPv4 or IPv6, + * dev mtu stays at 1500. + */ + return; + } + /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr + * UDP overhead, if any, was already factored in above. + */ + overhead += session->hdr_len + ETH_HLEN + l3_overhead; + + /* If PMTU discovery was enabled, use discovered MTU on L2TP device */ + dst = sk_dst_get(tunnel->sock); + if (dst) { + /* dst_mtu will use PMTU if found, else fallback to intf MTU */ + u32 pmtu = dst_mtu(dst); + + if (pmtu != 0) + dev->mtu = pmtu; + dst_release(dst); + } + session->mtu = dev->mtu - overhead; + dev->mtu = session->mtu; + dev->needed_headroom += session->hdr_len; +} + static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct net_device *dev; @@ -221,12 +271,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p goto out; } - session = l2tp_session_find(net, tunnel, session_id); - if (session) { - rc = -EEXIST; - goto out; - } - if (cfg->ifname) { dev = dev_get_by_name(net, cfg->ifname); if (dev) { @@ -240,8 +284,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p session = l2tp_session_create(sizeof(*spriv), tunnel, session_id, peer_session_id, cfg); - if (!session) { - rc = -ENOMEM; + if (IS_ERR(session)) { + rc = PTR_ERR(session); goto out; } @@ -253,12 +297,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p } dev_net_set(dev, net); - if (session->mtu == 0) - session->mtu = dev->mtu - session->hdr_len; - dev->mtu = session->mtu; - dev->needed_headroom += session->hdr_len; dev->min_mtu = 0; dev->max_mtu = ETH_MAX_MTU; + l2tp_eth_adjust_mtu(tunnel, session, dev); priv = netdev_priv(dev); priv->dev = dev; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index d25038cfd64e..4d322c1b7233 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); return 0; @@ -178,9 +179,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); @@ -202,6 +204,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a4abcbc4c09a..88b397c30d86 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); + return 0; pass_up: @@ -191,9 +193,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); @@ -215,6 +218,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 3620fba31786..7e3e669baac4 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; -static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) +static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, + bool do_ref) { u32 tunnel_id; u32 session_id; @@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) if (info->attrs[L2TP_ATTR_IFNAME]) { ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); - session = l2tp_session_find_by_ifname(net, ifname); + session = l2tp_session_get_by_ifname(net, ifname, do_ref); } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel) - session = l2tp_session_find(net, tunnel, session_id); + session = l2tp_session_get(net, tunnel, session_id, + do_ref); } return session; @@ -642,10 +644,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf session_id, peer_session_id, &cfg); if (ret >= 0) { - session = l2tp_session_find(net, tunnel, session_id); - if (session) + session = l2tp_session_get(net, tunnel, session_id, false); + if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); + l2tp_session_dec_refcount(session); + } } out: @@ -658,7 +662,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf struct l2tp_session *session; u16 pw_type; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, true); if (session == NULL) { ret = -ENODEV; goto out; @@ -672,6 +676,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -681,7 +689,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf int ret = 0; struct l2tp_session *session; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; goto out; @@ -716,6 +724,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_MODIFY); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -811,29 +821,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; int ret; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; - goto out; + goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto out; + goto err_ref; } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) - goto err_out; + goto err_ref_msg; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); + ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); -err_out: - nlmsg_free(msg); + l2tp_session_dec_refcount(session); -out: + return ret; + +err_ref_msg: + nlmsg_free(msg); +err_ref: + l2tp_session_dec_refcount(session); +err: return ret; } @@ -852,7 +867,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback goto out; } - session = l2tp_session_find_nth(tunnel, si); + session = l2tp_session_get_nth(tunnel, si, false); if (session == NULL) { ti++; tunnel = NULL; @@ -862,8 +877,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session, L2TP_CMD_SESSION_GET) < 0) + session, L2TP_CMD_SESSION_GET) < 0) { + l2tp_session_dec_refcount(session); break; + } + l2tp_session_dec_refcount(session); si++; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 36cc56fd0418..861b255a2d51 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session) static void pppol2tp_session_destruct(struct sock *sk) { struct l2tp_session *session = sk->sk_user_data; + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); @@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock) l2tp_session_queue_purge(session); sock_put(sk); } - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - release_sock(sk); /* This will delete the session context via @@ -582,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, int error = 0; u32 tunnel_id, peer_tunnel_id; u32 session_id, peer_session_id; + bool drop_refcnt = false; int ver = 2; int fd; @@ -683,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = peer_tunnel_id; - /* Create session if it doesn't already exist. We handle the - * case where a session was previously created by the netlink - * interface by checking that the session doesn't already have - * a socket and its tunnel socket are what we expect. If any - * of those checks fail, return EEXIST to the caller. - */ - session = l2tp_session_find(sock_net(sk), tunnel, session_id); - if (session == NULL) { - /* Default MTU must allow space for UDP/L2TP/PPP - * headers. + session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); + if (session) { + drop_refcnt = true; + ps = l2tp_session_priv(session); + + /* Using a pre-existing session is fine as long as it hasn't + * been connected yet. */ - cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD; + if (ps->sock) { + error = -EEXIST; + goto end; + } - /* Allocate and initialize a new session context. */ - session = l2tp_session_create(sizeof(struct pppol2tp_session), - tunnel, session_id, - peer_session_id, &cfg); - if (session == NULL) { - error = -ENOMEM; + /* consistency checks */ + if (ps->tunnel_sock != tunnel->sock) { + error = -EEXIST; goto end; } } else { - ps = l2tp_session_priv(session); - error = -EEXIST; - if (ps->sock != NULL) - goto end; + /* Default MTU must allow space for UDP/L2TP/PPP headers */ + cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; + cfg.mru = cfg.mtu; - /* consistency checks */ - if (ps->tunnel_sock != tunnel->sock) + session = l2tp_session_create(sizeof(struct pppol2tp_session), + tunnel, session_id, + peer_session_id, &cfg); + if (IS_ERR(session)) { + error = PTR_ERR(session); goto end; + } } /* Associate session with its PPPoL2TP socket */ @@ -777,6 +779,8 @@ out_no_ppp: session->name); end: + if (drop_refcnt) + l2tp_session_dec_refcount(session); release_sock(sk); return error; @@ -804,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i if (tunnel->sock == NULL) goto out; - /* Check that this session doesn't already exist */ - error = -EEXIST; - session = l2tp_session_find(net, tunnel, session_id); - if (session != NULL) - goto out; - /* Default MTU values. */ if (cfg->mtu == 0) cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; @@ -817,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i cfg->mru = cfg->mtu; /* Allocate and initialize a new session context. */ - error = -ENOMEM; session = l2tp_session_create(sizeof(struct pppol2tp_session), tunnel, session_id, peer_session_id, cfg); - if (session == NULL) + if (IS_ERR(session)) { + error = PTR_ERR(session); goto out; + } ps = l2tp_session_priv(session); ps->tunnel_sock = tunnel->sock; @@ -1140,11 +1139,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, if (stats.session_id != 0) { /* resend to session ioctl handler */ struct l2tp_session *session = - l2tp_session_find(sock_net(sk), tunnel, stats.session_id); - if (session != NULL) - err = pppol2tp_session_ioctl(session, cmd, arg); - else + l2tp_session_get(sock_net(sk), tunnel, + stats.session_id, true); + + if (session) { + err = pppol2tp_session_ioctl(session, cmd, + arg); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } else { err = -EBADR; + } break; } #ifdef CONFIG_XFRM @@ -1554,7 +1560,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -1681,10 +1687,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) /* Show the tunnel or session context. */ - if (pd->session == NULL) + if (!pd->session) { pppol2tp_seq_tunnel_show(m, pd->tunnel); - else + } else { pppol2tp_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; @@ -1843,4 +1853,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); -MODULE_ALIAS_L2TP_PWTYPE(11); +MODULE_ALIAS_L2TP_PWTYPE(7); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 40813dd3301c..5bb0c5012819 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_recalc_ps(local); if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + local->ops->wake_tx_queue) { /* XXX: for AP_VLAN, actually track AP queues */ netif_tx_start_all_queues(dev); } else if (dev) { diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 56ccffa3f2bf..62141dcec2d6 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -19,6 +19,7 @@ #ifndef __IEEE802154_I_H #define __IEEE802154_I_H +#include <linux/interrupt.h> #include <linux/mutex.h> #include <linux/hrtimer.h> #include <net/cfg802154.h> diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 82589b2abf3c..5928d22ba9c8 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,6 +24,9 @@ #include <net/nexthop.h> #include "internal.h" +/* max memory we will use for mpls_route */ +#define MAX_MPLS_ROUTE_MEM 4096 + /* Maximum number of labels to look ahead at when selecting a path of * a multipath route */ @@ -60,10 +63,7 @@ EXPORT_SYMBOL_GPL(mpls_output_possible); static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh) { - u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN); - int nh_index = nh - rt->rt_nh; - - return nh0_via + rt->rt_max_alen * nh_index; + return (u8 *)nh + rt->rt_via_offset; } static const u8 *mpls_nh_via(const struct mpls_route *rt, @@ -189,21 +189,32 @@ static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb) return hash; } +static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index) +{ + return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size); +} + +/* number of alive nexthops (rt->rt_nhn_alive) and the flags for + * a next hop (nh->nh_flags) are modified by netdev event handlers. + * Since those fields can change at any moment, use READ_ONCE to + * access both. + */ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, struct sk_buff *skb) { - int alive = ACCESS_ONCE(rt->rt_nhn_alive); u32 hash = 0; int nh_index = 0; int n = 0; + u8 alive; /* No need to look further into packet if there's only * one path */ if (rt->rt_nhn == 1) - goto out; + return rt->rt_nh; - if (alive <= 0) + alive = READ_ONCE(rt->rt_nhn_alive); + if (alive == 0) return NULL; hash = mpls_multipath_hash(rt, skb); @@ -211,7 +222,9 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, if (alive == rt->rt_nhn) goto out; for_nexthops(rt) { - if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) + unsigned int nh_flags = READ_ONCE(nh->nh_flags); + + if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) continue; if (n == nh_index) return nh; @@ -219,7 +232,7 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt, } endfor_nexthops(rt); out: - return &rt->rt_nh[nh_index]; + return mpls_get_nexthop(rt, nh_index); } static bool mpls_egress(struct net *net, struct mpls_route *rt, @@ -458,20 +471,27 @@ struct mpls_route_config { int rc_mp_len; }; -static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen) +/* all nexthops within a route have the same size based on max + * number of labels and max via length for a hop + */ +static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels) { - u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN); + u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen); struct mpls_route *rt; + size_t size; - rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh), - VIA_ALEN_ALIGN) + - num_nh * max_alen_aligned, - GFP_KERNEL); - if (rt) { - rt->rt_nhn = num_nh; - rt->rt_nhn_alive = num_nh; - rt->rt_max_alen = max_alen_aligned; - } + size = sizeof(*rt) + num_nh * nh_size; + if (size > MAX_MPLS_ROUTE_MEM) + return ERR_PTR(-EINVAL); + + rt = kzalloc(size, GFP_KERNEL); + if (!rt) + return ERR_PTR(-ENOMEM); + + rt->rt_nhn = num_nh; + rt->rt_nhn_alive = num_nh; + rt->rt_nh_size = nh_size; + rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels); return rt; } @@ -676,9 +696,6 @@ static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg, return -ENOMEM; err = -EINVAL; - /* Ensure only a supported number of labels are present */ - if (cfg->rc_output_labels > MAX_NEW_LABELS) - goto errout; nh->nh_labels = cfg->rc_output_labels; for (i = 0; i < nh->nh_labels; i++) @@ -703,7 +720,7 @@ errout: static int mpls_nh_build(struct net *net, struct mpls_route *rt, struct mpls_nh *nh, int oif, struct nlattr *via, - struct nlattr *newdst) + struct nlattr *newdst, u8 max_labels) { int err = -ENOMEM; @@ -711,7 +728,7 @@ static int mpls_nh_build(struct net *net, struct mpls_route *rt, goto errout; if (newdst) { - err = nla_get_labels(newdst, MAX_NEW_LABELS, + err = nla_get_labels(newdst, max_labels, &nh->nh_labels, nh->nh_label); if (err) goto errout; @@ -736,22 +753,20 @@ errout: return err; } -static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, - u8 cfg_via_alen, u8 *max_via_alen) +static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len, + u8 cfg_via_alen, u8 *max_via_alen, + u8 *max_labels) { - int nhs = 0; int remaining = len; - - if (!rtnh) { - *max_via_alen = cfg_via_alen; - return 1; - } + u8 nhs = 0; *max_via_alen = 0; + *max_labels = 0; while (rtnh_ok(rtnh, remaining)) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); int attrlen; + u8 n_labels = 0; attrlen = rtnh_attrlen(rtnh); nla = nla_find(attrs, attrlen, RTA_VIA); @@ -765,7 +780,20 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, via_alen); } + nla = nla_find(attrs, attrlen, RTA_NEWDST); + if (nla && + nla_get_labels(nla, MAX_NEW_LABELS, &n_labels, NULL) != 0) + return 0; + + *max_labels = max_t(u8, *max_labels, n_labels); + + /* number of nexthops is tracked by a u8. + * Check for overflow. + */ + if (nhs == 255) + return 0; nhs++; + rtnh = rtnh_next(rtnh, &remaining); } @@ -774,13 +802,13 @@ static int mpls_count_nexthops(struct rtnexthop *rtnh, int len, } static int mpls_nh_build_multi(struct mpls_route_config *cfg, - struct mpls_route *rt) + struct mpls_route *rt, u8 max_labels) { struct rtnexthop *rtnh = cfg->rc_mp; struct nlattr *nla_via, *nla_newdst; int remaining = cfg->rc_mp_len; - int nhs = 0; int err = 0; + u8 nhs = 0; change_nexthops(rt) { int attrlen; @@ -807,7 +835,8 @@ static int mpls_nh_build_multi(struct mpls_route_config *cfg, } err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh, - rtnh->rtnh_ifindex, nla_via, nla_newdst); + rtnh->rtnh_ifindex, nla_via, nla_newdst, + max_labels); if (err) goto errout; @@ -834,7 +863,8 @@ static int mpls_route_add(struct mpls_route_config *cfg) int err = -EINVAL; u8 max_via_alen; unsigned index; - int nhs; + u8 max_labels; + u8 nhs; index = cfg->rc_label; @@ -872,22 +902,32 @@ static int mpls_route_add(struct mpls_route_config *cfg) goto errout; err = -EINVAL; - nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, - cfg->rc_via_alen, &max_via_alen); + if (cfg->rc_mp) { + nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len, + cfg->rc_via_alen, &max_via_alen, + &max_labels); + } else { + max_via_alen = cfg->rc_via_alen; + max_labels = cfg->rc_output_labels; + nhs = 1; + } + if (nhs == 0) goto errout; err = -ENOMEM; - rt = mpls_rt_alloc(nhs, max_via_alen); - if (!rt) + rt = mpls_rt_alloc(nhs, max_via_alen, max_labels); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); goto errout; + } rt->rt_protocol = cfg->rc_protocol; rt->rt_payload_type = cfg->rc_payload_type; rt->rt_ttl_propagate = cfg->rc_ttl_propagate; if (cfg->rc_mp) - err = mpls_nh_build_multi(cfg, rt); + err = mpls_nh_build_multi(cfg, rt, max_labels); else err = mpls_nh_build_from_cfg(cfg, rt); if (err) @@ -1040,8 +1080,8 @@ static int mpls_netconf_msgsize_devconf(int type) return size; } -static void mpls_netconf_notify_devconf(struct net *net, int type, - struct mpls_dev *mdev) +static void mpls_netconf_notify_devconf(struct net *net, int event, + int type, struct mpls_dev *mdev) { struct sk_buff *skb; int err = -ENOBUFS; @@ -1050,8 +1090,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int type, if (!skb) goto errout; - err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, RTM_NEWNETCONF, - 0, type); + err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type); if (err < 0) { /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */ WARN_ON(err == -EMSGSIZE); @@ -1184,9 +1223,8 @@ static int mpls_conf_proc(struct ctl_table *ctl, int write, if (i == offsetof(struct mpls_dev, input_enabled) && val != oval) { - mpls_netconf_notify_devconf(net, - NETCONFA_INPUT, - mdev); + mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_INPUT, mdev); } } @@ -1227,10 +1265,11 @@ static int mpls_dev_sysctl_register(struct net_device *dev, snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); - mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); + mdev->sysctl = register_net_sysctl(net, path, table); if (!mdev->sysctl) goto free; + mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev); return 0; free: @@ -1239,13 +1278,17 @@ out: return -ENOBUFS; } -static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) +static void mpls_dev_sysctl_unregister(struct net_device *dev, + struct mpls_dev *mdev) { + struct net *net = dev_net(dev); struct ctl_table *table; table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); + + mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev); } static struct mpls_dev *mpls_add_dev(struct net_device *dev) @@ -1271,11 +1314,12 @@ static struct mpls_dev *mpls_add_dev(struct net_device *dev) u64_stats_init(&mpls_stats->syncp); } + mdev->dev = dev; + err = mpls_dev_sysctl_register(dev, mdev); if (err) goto free; - mdev->dev = dev; rcu_assign_pointer(dev->mpls_ptr, mdev); return mdev; @@ -1298,8 +1342,7 @@ static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN; - unsigned int alive; + u8 alive, deleted; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -1310,36 +1353,48 @@ static void mpls_ifdown(struct net_device *dev, int event) continue; alive = 0; + deleted = 0; change_nexthops(rt) { + unsigned int nh_flags = nh->nh_flags; + if (rtnl_dereference(nh->nh_dev) != dev) goto next; switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nh->nh_flags |= RTNH_F_DEAD; + nh_flags |= RTNH_F_DEAD; /* fall through */ case NETDEV_CHANGE: - nh->nh_flags |= RTNH_F_LINKDOWN; + nh_flags |= RTNH_F_LINKDOWN; break; } if (event == NETDEV_UNREGISTER) RCU_INIT_POINTER(nh->nh_dev, NULL); + + if (nh->nh_flags != nh_flags) + WRITE_ONCE(nh->nh_flags, nh_flags); next: - if (!(nh->nh_flags & nh_flags)) + if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) alive++; + if (!rtnl_dereference(nh->nh_dev)) + deleted++; } endfor_nexthops(rt); WRITE_ONCE(rt->rt_nhn_alive, alive); + + /* if there are no more nexthops, delete the route */ + if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn) + mpls_route_update(net, index, NULL, NULL); } } -static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) +static void mpls_ifup(struct net_device *dev, unsigned int flags) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); unsigned index; - int alive; + u8 alive; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { @@ -1350,20 +1405,22 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) alive = 0; change_nexthops(rt) { + unsigned int nh_flags = nh->nh_flags; struct net_device *nh_dev = rtnl_dereference(nh->nh_dev); - if (!(nh->nh_flags & nh_flags)) { + if (!(nh_flags & flags)) { alive++; continue; } if (nh_dev != dev) continue; alive++; - nh->nh_flags &= ~nh_flags; + nh_flags &= ~flags; + WRITE_ONCE(nh->nh_flags, flags); } endfor_nexthops(rt); - ACCESS_ONCE(rt->rt_nhn_alive) = alive; + WRITE_ONCE(rt->rt_nhn_alive, alive); } } @@ -1414,7 +1471,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, mpls_ifdown(dev, event); mdev = mpls_dev_get(dev); if (mdev) { - mpls_dev_sysctl_unregister(mdev); + mpls_dev_sysctl_unregister(dev, mdev); RCU_INIT_POINTER(dev->mpls_ptr, NULL); call_rcu(&mdev->rcu, mpls_dev_destroy_rcu); } @@ -1424,7 +1481,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, if (mdev) { int err; - mpls_dev_sysctl_unregister(mdev); + mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); if (err) return notifier_from_errno(err); @@ -1484,16 +1541,18 @@ int nla_put_labels(struct sk_buff *skb, int attrtype, EXPORT_SYMBOL_GPL(nla_put_labels); int nla_get_labels(const struct nlattr *nla, - u32 max_labels, u8 *labels, u32 label[]) + u8 max_labels, u8 *labels, u32 label[]) { unsigned len = nla_len(nla); - unsigned nla_labels; struct mpls_shim_hdr *nla_label; + u8 nla_labels; bool bos; int i; - /* len needs to be an even multiple of 4 (the label size) */ - if (len & 3) + /* len needs to be an even multiple of 4 (the label size). Number + * of labels is a u8 so check for overflow. + */ + if (len & 3 || len / 4 > 255) return -EINVAL; /* Limit the number of new labels allowed */ @@ -1501,6 +1560,10 @@ int nla_get_labels(const struct nlattr *nla, if (nla_labels > max_labels) return -EINVAL; + /* when label == NULL, caller wants number of labels */ + if (!label) + goto out; + nla_label = nla_data(nla); bos = true; for (i = nla_labels - 1; i >= 0; i--, bos = false) { @@ -1524,6 +1587,7 @@ int nla_get_labels(const struct nlattr *nla, label[i] = dec.label; } +out: *labels = nla_labels; return 0; } @@ -1585,7 +1649,6 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, err = -EINVAL; rtm = nlmsg_data(nlh); - memset(cfg, 0, sizeof(*cfg)); if (rtm->rtm_family != AF_MPLS) goto errout; @@ -1684,27 +1747,43 @@ errout: static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct mpls_route_config cfg; + struct mpls_route_config *cfg; int err; - err = rtm_to_route_config(skb, nlh, &cfg); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = rtm_to_route_config(skb, nlh, cfg); if (err < 0) - return err; + goto out; + + err = mpls_route_del(cfg); +out: + kfree(cfg); - return mpls_route_del(&cfg); + return err; } static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct mpls_route_config cfg; + struct mpls_route_config *cfg; int err; - err = rtm_to_route_config(skb, nlh, &cfg); + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + err = rtm_to_route_config(skb, nlh, cfg); if (err < 0) - return err; + goto out; + + err = mpls_route_add(cfg); +out: + kfree(cfg); - return mpls_route_add(&cfg); + return err; } static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, @@ -1761,21 +1840,23 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, } else { struct rtnexthop *rtnh; struct nlattr *mp; - int dead = 0; - int linkdown = 0; + u8 linkdown = 0; + u8 dead = 0; mp = nla_nest_start(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; for_nexthops(rt) { + dev = rtnl_dereference(nh->nh_dev); + if (!dev) + continue; + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; - dev = rtnl_dereference(nh->nh_dev); - if (dev) - rtnh->rtnh_ifindex = dev->ifindex; + rtnh->rtnh_ifindex = dev->ifindex; if (nh->nh_flags & RTNH_F_LINKDOWN) { rtnh->rtnh_flags |= RTNH_F_LINKDOWN; linkdown++; @@ -1867,6 +1948,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt) size_t nhsize = 0; for_nexthops(rt) { + if (!rtnl_dereference(nh->nh_dev)) + continue; nhsize += nla_total_size(sizeof(struct rtnexthop)); /* RTA_VIA */ if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) @@ -1929,8 +2012,8 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* In case the predefined labels need to be populated */ if (limit > MPLS_LABEL_IPV4NULL) { struct net_device *lo = net->loopback_dev; - rt0 = mpls_rt_alloc(1, lo->addr_len); - if (!rt0) + rt0 = mpls_rt_alloc(1, lo->addr_len, 0); + if (IS_ERR(rt0)) goto nort0; RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; @@ -1943,13 +2026,13 @@ static int resize_platform_label_table(struct net *net, size_t limit) } if (limit > MPLS_LABEL_IPV6NULL) { struct net_device *lo = net->loopback_dev; - rt2 = mpls_rt_alloc(1, lo->addr_len); - if (!rt2) + rt2 = mpls_rt_alloc(1, lo->addr_len, 0); + if (IS_ERR(rt2)) goto nort2; RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_payload_type = MPT_IPV6; - rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; + rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT; rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE; rt2->rt_nh->nh_via_alen = lo->addr_len; memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr, diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 62928d8fabd1..4db6a5971322 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -2,6 +2,11 @@ #define MPLS_INTERNAL_H #include <net/mpls.h> +/* put a reasonable limit on the number of labels + * we will accept from userspace + */ +#define MAX_NEW_LABELS 30 + struct mpls_entry_decoded { u32 label; u8 ttl; @@ -64,7 +69,6 @@ struct mpls_dev { struct sk_buff; #define LABEL_NOT_SPECIFIED (1 << 20) -#define MAX_NEW_LABELS 2 /* This maximum ha length copied from the definition of struct neighbour */ #define VIA_ALEN_ALIGN sizeof(unsigned long) @@ -83,13 +87,31 @@ enum mpls_payload_type { struct mpls_nh { /* next hop label forwarding entry */ struct net_device __rcu *nh_dev; + + /* nh_flags is accessed under RCU in the packet path; it is + * modified handling netdev events with rtnl lock held + */ unsigned int nh_flags; - u32 nh_label[MAX_NEW_LABELS]; u8 nh_labels; u8 nh_via_alen; u8 nh_via_table; + u8 nh_reserved1; + + u32 nh_label[0]; }; +/* offset of via from beginning of mpls_nh */ +#define MPLS_NH_VIA_OFF(num_labels) \ + ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \ + VIA_ALEN_ALIGN) + +/* all nexthops within a route have the same size based on the + * max number of labels and max via length across all nexthops + */ +#define MPLS_NH_SIZE(num_labels, max_via_alen) \ + (MPLS_NH_VIA_OFF((num_labels)) + \ + ALIGN((max_via_alen), VIA_ALEN_ALIGN)) + enum mpls_ttl_propagation { MPLS_TTL_PROP_DEFAULT, MPLS_TTL_PROP_ENABLED, @@ -104,16 +126,16 @@ enum mpls_ttl_propagation { * +----------------------+ * | mpls_nh 0 | * +----------------------+ - * | ... | - * +----------------------+ - * | mpls_nh n-1 | - * +----------------------+ - * | alignment padding | + * | alignment padding | 4 bytes for odd number of labels * +----------------------+ * | via[rt_max_alen] 0 | * +----------------------+ + * | alignment padding | via's aligned on sizeof(unsigned long) + * +----------------------+ * | ... | * +----------------------+ + * | mpls_nh n-1 | + * +----------------------+ * | via[rt_max_alen] n-1 | * +----------------------+ */ @@ -123,22 +145,29 @@ struct mpls_route { /* next hop label forwarding entry */ u8 rt_payload_type; u8 rt_max_alen; u8 rt_ttl_propagate; - unsigned int rt_nhn; - unsigned int rt_nhn_alive; + u8 rt_nhn; + /* rt_nhn_alive is accessed under RCU in the packet path; it + * is modified handling netdev events with rtnl lock held + */ + u8 rt_nhn_alive; + u8 rt_nh_size; + u8 rt_via_offset; + u8 rt_reserved1; struct mpls_nh rt_nh[0]; }; #define for_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (rt)->rt_nh; \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (rt)->rt_nh, __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh++, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define change_nexthops(rt) { \ - int nhsel; struct mpls_nh *nh; \ - for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh); \ + int nhsel; struct mpls_nh *nh; u8 *__nh; \ + for (nhsel = 0, nh = (struct mpls_nh *)((rt)->rt_nh), \ + __nh = (u8 *)((rt)->rt_nh); \ nhsel < (rt)->rt_nhn; \ - nh++, nhsel++) + __nh += rt->rt_nh_size, nh = (struct mpls_nh *)__nh, nhsel++) #define endfor_nexthops(rt) } @@ -173,7 +202,7 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); -int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels, +int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, u32 label[]); int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table, u8 via[]); diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 22f71fce0bfb..fe00e98667cf 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -164,6 +164,7 @@ static int mpls_build_state(struct nlattr *nla, struct mpls_iptunnel_encap *tun_encap_info; struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; struct lwtunnel_state *newts; + u8 n_labels; int ret; ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, @@ -175,12 +176,18 @@ static int mpls_build_state(struct nlattr *nla, return -EINVAL; - newts = lwtunnel_state_alloc(sizeof(*tun_encap_info)); + /* determine number of labels */ + if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], + MAX_NEW_LABELS, &n_labels, NULL)) + return -EINVAL; + + newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + + n_labels * sizeof(u32)); if (!newts) return -ENOMEM; tun_encap_info = mpls_lwtunnel_encap(newts); - ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, + ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels, &tun_encap_info->labels, tun_encap_info->label); if (ret) goto errout; @@ -257,7 +264,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) a_hdr->default_ttl != b_hdr->default_ttl) return 1; - for (l = 0; l < MAX_NEW_LABELS; l++) + for (l = 0; l < a_hdr->labels; l++) if (a_hdr->label[l] != b_hdr->label[l]) return 1; return 0; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index da9df2d56e66..22fc32143e9c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 02bcf00c2492..008299b7f78f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } + off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len + var_alloc_len; alloc_size = t->alloc_size + var_alloc_len; @@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } newoff = ALIGN(old->len, t->align); newlen = newoff + t->len + var_alloc_len; @@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - rcu_barrier(); /* Wait for completion of call_rcu()'s */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d49cc1e03c5b..ecdc324c7785 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3442,6 +3442,7 @@ static void __exit ctnetlink_exit(void) #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT RCU_INIT_POINTER(nfnl_ct_hook, NULL); #endif + synchronize_rcu(); } module_init(ctnetlink_init); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 94b14c5a8b17..82802e4a6640 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void) #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif + synchronize_rcu(); + for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index de8782345c86..d45558178da5 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -32,6 +32,13 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); +struct nfnl_cthelper { + struct list_head list; + struct nf_conntrack_helper helper; +}; + +static LIST_HEAD(nfnl_cthelper_list); + static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -161,6 +168,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, int i, ret; struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + unsigned int class_max; ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set); @@ -170,19 +178,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; - helper->expect_class_max = - ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); - - if (helper->expect_class_max != 0 && - helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (class_max == 0) + return -EINVAL; + if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * - helper->expect_class_max, GFP_KERNEL); + class_max, GFP_KERNEL); if (expect_policy == NULL) return -ENOMEM; - for (i=0; i<helper->expect_class_max; i++) { + for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; @@ -191,6 +198,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (ret < 0) goto err; } + + helper->expect_class_max = class_max - 1; helper->expect_policy = expect_policy; return 0; err: @@ -203,18 +212,20 @@ nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; + struct nfnl_cthelper *nfcth; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; - helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); - if (helper == NULL) + nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); + if (nfcth == NULL) return -ENOMEM; + helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) - goto err; + goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); @@ -245,15 +256,101 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nf_conntrack_helper_register(helper); if (ret < 0) - goto err; + goto err2; + list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; -err: - kfree(helper); +err2: + kfree(helper->expect_policy); +err1: + kfree(nfcth); return ret; } static int +nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, + struct nf_conntrack_expect_policy *new_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) + return -EBUSY; + + new_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + new_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], + struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1]; + struct nf_conntrack_expect_policy *policy; + int i, err; + + /* Check first that all policy attributes are well-formed, so we don't + * leave things in inconsistent state on errors. + */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + + if (!tb[NFCTH_POLICY_SET + i]) + return -EINVAL; + + err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], + &new_policy[i], + tb[NFCTH_POLICY_SET + i]); + if (err < 0) + return err; + } + /* Now we can safely update them. */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + policy = (struct nf_conntrack_expect_policy *) + &helper->expect_policy[i]; + policy->max_expected = new_policy->max_expected; + policy->timeout = new_policy->timeout; + } + + return 0; +} + +static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; + unsigned int class_max; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (helper->expect_class_max + 1 != class_max) + return -EBUSY; + + return nfnl_cthelper_update_policy_all(tb, helper); +} + +static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { @@ -263,8 +360,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return -EBUSY; if (tb[NFCTH_POLICY]) { - ret = nfnl_cthelper_parse_expect_policy(helper, - tb[NFCTH_POLICY]); + ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } @@ -293,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - int ret = 0, i; + struct nfnl_cthelper *nlcth; + int ret = 0; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -304,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, if (ret < 0) return ret; - rcu_read_lock(); - for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) - continue; + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if ((tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_EXCL) { - ret = -EEXIST; - goto err; - } - helper = cur; - break; - } + helper = cur; + break; } - rcu_read_unlock(); if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); @@ -336,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, ret = nfnl_cthelper_update(tb, helper); return ret; -err: - rcu_read_unlock(); - return ret; } static int @@ -377,10 +462,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, - htonl(helper->expect_class_max))) + htonl(helper->expect_class_max + 1))) goto nla_put_failure; - for (i=0; i<helper->expect_class_max; i++) { + for (i = 0; i < helper->expect_class_max + 1; i++) { nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET+i) | NLA_F_NESTED); if (nest_parms2 == NULL) @@ -502,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { - int ret = -ENOENT, i; + int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; + struct nfnl_cthelper *nlcth; bool tuple_set = false; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -527,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; - - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { - ret = -ENOMEM; - break; - } + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), - NFNL_MSG_CTHELPER_NEW, cur); - if (ret <= 0) { - kfree_skb(skb2); - break; - } + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, - MSG_DONTWAIT); - if (ret > 0) - ret = 0; + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; - /* this avoids a loop in nfnetlink. */ - return ret == -EAGAIN ? -ENOBUFS : ret; - } + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; } return ret; } @@ -576,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; - int i, j = 0, ret; + struct nfnl_cthelper *nlcth, *n; + int j = 0, ret; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); @@ -592,28 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + j++; - j++; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - found = true; - nf_conntrack_helper_unregister(cur); - } + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + + list_del(&nlcth->list); + kfree(nlcth); } + /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : -ENOENT; } @@ -662,20 +741,16 @@ err_out: static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *tmp; - int i; + struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); - for (i=0; i<nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - nf_conntrack_helper_unregister(cur); - } + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(nlcth); } } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index baa75f3ab7e7..57c2cdf7b691 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void) #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); + synchronize_rcu(); #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - rcu_barrier(); } module_init(cttimeout_init); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3ee0b8a000a4..933509ebf3d3 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); - return NULL; + goto nlmsg_failure; } nlh = nlmsg_put(skb, 0, 0, @@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); - return NULL; + goto nlmsg_failure; } nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = entry->state.pf; @@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; + if (seclen) + security_release_secctx(secdata, seclen); return skb; nla_put_failure: skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); +nlmsg_failure: + if (seclen) + security_release_secctx(secdata, seclen); return NULL; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 596eaff66649..fc232441cf23 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -78,14 +78,6 @@ struct listeners { /* state bits */ #define NETLINK_S_CONGESTED 0x0 -/* flags */ -#define NETLINK_F_KERNEL_SOCKET 0x1 -#define NETLINK_F_RECV_PKTINFO 0x2 -#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 -#define NETLINK_F_RECV_NO_ENOBUFS 0x8 -#define NETLINK_F_LISTEN_ALL_NSID 0x10 -#define NETLINK_F_CAP_ACK 0x20 - static inline int netlink_is_kernel(struct sock *sk) { return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 4fdb38318977..f792f8d7f982 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -6,6 +6,14 @@ #include <linux/workqueue.h> #include <net/sock.h> +/* flags */ +#define NETLINK_F_KERNEL_SOCKET 0x1 +#define NETLINK_F_RECV_PKTINFO 0x2 +#define NETLINK_F_BROADCAST_SEND_ERROR 0x4 +#define NETLINK_F_RECV_NO_ENOBUFS 0x8 +#define NETLINK_F_LISTEN_ALL_NSID 0x10 +#define NETLINK_F_CAP_ACK 0x20 + #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) diff --git a/net/netlink/diag.c b/net/netlink/diag.c index a5546249fb10..8faa20b4d457 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -19,6 +19,27 @@ static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) nlk->groups); } +static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + u32 flags = 0; + + if (nlk->cb_running) + flags |= NDIAG_FLAG_CB_RUNNING; + if (nlk->flags & NETLINK_F_RECV_PKTINFO) + flags |= NDIAG_FLAG_PKTINFO; + if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR) + flags |= NDIAG_FLAG_BROADCAST_ERROR; + if (nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) + flags |= NDIAG_FLAG_NO_ENOBUFS; + if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID) + flags |= NDIAG_FLAG_LISTEN_ALL_NSID; + if (nlk->flags & NETLINK_F_CAP_ACK) + flags |= NDIAG_FLAG_CAP_ACK; + + return nla_put_u32(skb, NETLINK_DIAG_FLAGS, flags); +} + static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct netlink_diag_req *req, u32 portid, u32 seq, u32 flags, int sk_ino) @@ -52,6 +73,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) goto out_nlmsg_trim; + if ((req->ndiag_show & NDIAG_SHOW_FLAGS) && + sk_diag_put_flags(sk, skb)) + goto out_nlmsg_trim; + nlmsg_end(skb, nlh); return 0; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e0a87776a010..7b2c2fce408a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net, */ if (nf_ct_is_confirmed(ct)) nf_ct_delete(ct, 0, 0); - else - nf_conntrack_put(&ct->ct_general); + + nf_conntrack_put(&ct->ct_general); nf_ct_set(skb, NULL, 0); return false; } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d4bb8eb63f2..3f76cb765e5b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) /* Link layer. */ clear_vlan(key); - if (key->mac_proto == MAC_PROTO_NONE) { + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { if (unlikely(eth_type_vlan(skb->protocol))) return -EINVAL; @@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { - return key_extract(skb, key); + int res; + + res = key_extract(skb, key); + if (!res) + key->mac_proto &= ~SW_FLOW_KEY_INVALID; + + return res; } static int key_extract_mac_proto(struct sk_buff *skb) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a0dbe7ca8f72..8489beff5c25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + if (val > INT_MAX) + return -EINVAL; po->tp_reserve = val; return 0; } @@ -4193,8 +4195,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; if (po->tp_version >= TPACKET_V3 && - (int)(req->tp_block_size - - BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) @@ -4205,6 +4207,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; + if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig index b83c6807a5ae..326fd97444f5 100644 --- a/net/qrtr/Kconfig +++ b/net/qrtr/Kconfig @@ -16,7 +16,7 @@ if QRTR config QRTR_SMD tristate "SMD IPC Router channels" - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG || (COMPILE_TEST && RPMSG=n) ---help--- Say Y here to support SMD based ipcrouter channels. SMD is the most common transport for IPC Router. diff --git a/net/qrtr/smd.c b/net/qrtr/smd.c index 0d11132b3370..50615d5efac1 100644 --- a/net/qrtr/smd.c +++ b/net/qrtr/smd.c @@ -14,21 +14,21 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/soc/qcom/smd.h> +#include <linux/rpmsg.h> #include "qrtr.h" struct qrtr_smd_dev { struct qrtr_endpoint ep; - struct qcom_smd_channel *channel; + struct rpmsg_endpoint *channel; struct device *dev; }; /* from smd to qrtr */ -static int qcom_smd_qrtr_callback(struct qcom_smd_channel *channel, - const void *data, size_t len) +static int qcom_smd_qrtr_callback(struct rpmsg_device *rpdev, + void *data, int len, void *priv, u32 addr) { - struct qrtr_smd_dev *qdev = qcom_smd_get_drvdata(channel); + struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev); int rc; if (!qdev) @@ -54,7 +54,7 @@ static int qcom_smd_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) if (rc) goto out; - rc = qcom_smd_send(qdev->channel, skb->data, skb->len); + rc = rpmsg_send(qdev->channel, skb->data, skb->len); out: if (rc) @@ -64,57 +64,55 @@ out: return rc; } -static int qcom_smd_qrtr_probe(struct qcom_smd_device *sdev) +static int qcom_smd_qrtr_probe(struct rpmsg_device *rpdev) { struct qrtr_smd_dev *qdev; int rc; - qdev = devm_kzalloc(&sdev->dev, sizeof(*qdev), GFP_KERNEL); + qdev = devm_kzalloc(&rpdev->dev, sizeof(*qdev), GFP_KERNEL); if (!qdev) return -ENOMEM; - qdev->channel = sdev->channel; - qdev->dev = &sdev->dev; + qdev->channel = rpdev->ept; + qdev->dev = &rpdev->dev; qdev->ep.xmit = qcom_smd_qrtr_send; rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO); if (rc) return rc; - qcom_smd_set_drvdata(sdev->channel, qdev); - dev_set_drvdata(&sdev->dev, qdev); + dev_set_drvdata(&rpdev->dev, qdev); - dev_dbg(&sdev->dev, "Qualcomm SMD QRTR driver probed\n"); + dev_dbg(&rpdev->dev, "Qualcomm SMD QRTR driver probed\n"); return 0; } -static void qcom_smd_qrtr_remove(struct qcom_smd_device *sdev) +static void qcom_smd_qrtr_remove(struct rpmsg_device *rpdev) { - struct qrtr_smd_dev *qdev = dev_get_drvdata(&sdev->dev); + struct qrtr_smd_dev *qdev = dev_get_drvdata(&rpdev->dev); qrtr_endpoint_unregister(&qdev->ep); - dev_set_drvdata(&sdev->dev, NULL); + dev_set_drvdata(&rpdev->dev, NULL); } -static const struct qcom_smd_id qcom_smd_qrtr_smd_match[] = { +static const struct rpmsg_device_id qcom_smd_qrtr_smd_match[] = { { "IPCRTR" }, {} }; -static struct qcom_smd_driver qcom_smd_qrtr_driver = { +static struct rpmsg_driver qcom_smd_qrtr_driver = { .probe = qcom_smd_qrtr_probe, .remove = qcom_smd_qrtr_remove, .callback = qcom_smd_qrtr_callback, - .smd_match_table = qcom_smd_qrtr_smd_match, - .driver = { + .id_table = qcom_smd_qrtr_smd_match, + .drv = { .name = "qcom_smd_qrtr", - .owner = THIS_MODULE, }, }; -module_qcom_smd_driver(qcom_smd_qrtr_driver); +module_rpmsg_driver(qcom_smd_qrtr_driver); MODULE_DESCRIPTION("Qualcomm IPC-Router SMD interface driver"); MODULE_LICENSE("GPL v2"); diff --git a/net/rds/connection.c b/net/rds/connection.c index 1fa75ab7b733..6a5ebdea7d2e 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -333,11 +333,19 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rds_conn_path_reset(cp); if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, + RDS_CONN_DOWN) && + !rds_conn_path_transition(cp, RDS_CONN_ERROR, RDS_CONN_DOWN)) { /* This can happen - eg when we're in the middle of tearing * down the connection, and someone unloads the rds module. - * Quite reproduceable with loopback connections. + * Quite reproducible with loopback connections. * Mostly harmless. + * + * Note that this also happens with rds-tcp because + * we could have triggered rds_conn_path_drop in irq + * mode from rds_tcp_state change on the receipt of + * a FIN, thus we need to recheck for RDS_CONN_ERROR + * here. */ rds_conn_path_error(cp, "%s: failed to transition " "to state DOWN, current state " diff --git a/net/rds/threads.c b/net/rds/threads.c index e36e333a0aa0..3e447d056d09 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -156,7 +156,7 @@ void rds_connect_worker(struct work_struct *work) struct rds_connection *conn = cp->cp_conn; int ret; - if (cp->cp_index > 1 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) + if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) return; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 26a7b1db1361..7486926e60a8 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -740,6 +740,25 @@ static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, } /* + * Abort a call due to a protocol error. + */ +static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + const char *eproto_why, + const char *why, + u32 abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); + return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); +} + +#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ + __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ + (abort_why), (abort_code)) + +/* * conn_client.c */ extern unsigned int rxrpc_max_client_connections; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 0ed181f53f32..1752fcf8e8f1 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -413,11 +413,11 @@ found_service: case RXRPC_CONN_REMOTELY_ABORTED: rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->remote_abort, ECONNABORTED); + conn->remote_abort, -ECONNABORTED); break; case RXRPC_CONN_LOCALLY_ABORTED: rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->local_abort, ECONNABORTED); + conn->local_abort, -ECONNABORTED); break; default: BUG(); @@ -600,7 +600,7 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED); + __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, -ECONNABORTED); abort = true; /* fall through */ case RXRPC_CALL_COMPLETE: diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 97a17ada4431..7a77844aab16 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -386,7 +386,7 @@ recheck_state: now = ktime_get_real(); if (ktime_before(call->expire_at, now)) { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d79cd36987a9..47f7f4205653 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -486,7 +486,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); list_del(&call->accept_link); - rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); rxrpc_put_call(call, rxrpc_call_put); } @@ -494,7 +494,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_got); - rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index c3be03e8d098..e8dea0d49e7f 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -550,6 +550,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, call->cid = conn->proto.cid | channel; call->call_id = call_id; + trace_rxrpc_connect_call(call); _net("CONNECT call %08x:%08x as call %d on conn %d", call->cid, call->call_id, call->debug_id, conn->debug_id); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b099b64366f3..46babcf82ce8 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -168,7 +168,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, * generate a connection-level abort */ static int rxrpc_abort_connection(struct rxrpc_connection *conn, - u32 error, u32 abort_code) + int error, u32 abort_code) { struct rxrpc_wire_header whdr; struct msghdr msg; @@ -281,14 +281,17 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, case RXRPC_PACKET_TYPE_ABORT: if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &wtmp, sizeof(wtmp)) < 0) + &wtmp, sizeof(wtmp)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_abort")); return -EPROTO; + } abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED); + abort_code, -ECONNABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -327,7 +330,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; default: - _leave(" = -EPROTO [%u]", sp->hdr.type); + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_conn_pkt")); return -EPROTO; } } @@ -370,7 +374,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn) abort: _debug("abort %d, %d", ret, abort_code); - rxrpc_abort_connection(conn, -ret, abort_code); + rxrpc_abort_connection(conn, ret, abort_code); _leave(" [aborted]"); } @@ -419,9 +423,8 @@ requeue_and_leave: goto out; protocol_error: - if (rxrpc_abort_connection(conn, -ret, abort_code) < 0) + if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - _leave(" [EPROTO]"); goto out; } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 18b2ad8be8e2..45dba732a3b4 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -30,7 +30,7 @@ static void rxrpc_proto_abort(const char *why, struct rxrpc_call *call, rxrpc_seq_t seq) { - if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) { + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) { set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -665,6 +665,8 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, rwind = RXRPC_RXTX_BUFF_SIZE - 1; if (rwind > call->tx_winsize) wake = true; + trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, + ntohl(ackinfo->rwind), wake); call->tx_winsize = rwind; } @@ -877,7 +879,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) } /* - * Process an ABORT packet. + * Process an ABORT packet directed at a call. */ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) { @@ -892,10 +894,12 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) &wtmp, sizeof(wtmp)) >= 0) abort_code = ntohl(wtmp); + trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code); + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, ECONNABORTED)) + abort_code, -ECONNABORTED)) rxrpc_notify_socket(call); } @@ -958,7 +962,7 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, case RXRPC_CALL_COMPLETE: break; default: - if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) { + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) { set_bit(RXRPC_CALL_EV_ABORT, &call->events); rxrpc_queue_call(call); } @@ -1017,8 +1021,11 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) struct rxrpc_wire_header whdr; /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_hdr")); return -EBADMSG; + } memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 7d4375e557e6..af276f173b10 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -46,7 +46,10 @@ static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) { - *_abort_code = RX_PROTOCOL_ERROR; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("chall_none")); return -EPROTO; } @@ -54,7 +57,10 @@ static int none_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb, u32 *_abort_code) { - *_abort_code = RX_PROTOCOL_ERROR; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("resp_none")); return -EPROTO; } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index bf13b8470c9a..1ed9c0c2e94f 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -296,7 +296,7 @@ void rxrpc_peer_error_distributor(struct work_struct *work) hlist_del_init(&call->error_link); rxrpc_see_call(call); - if (rxrpc_set_call_completion(call, compl, 0, error)) + if (rxrpc_set_call_completion(call, compl, 0, -error)) rxrpc_notify_socket(call); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 3e2f1a8e9c5b..f9caf3b77509 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -83,11 +83,11 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); break; case RXRPC_CALL_NETWORK_ERROR: - tmp = call->error; + tmp = -call->error; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); break; case RXRPC_CALL_LOCAL_ERROR: - tmp = call->error; + tmp = -call->error; ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); break; default: @@ -682,14 +682,16 @@ out: return ret; short_data: + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data")); ret = -EBADMSG; goto out; excess_data: + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data")); ret = -EMSGSIZE; goto out; call_complete: *_abort = call->abort_code; - ret = -call->error; + ret = call->error; if (call->completion == RXRPC_CALL_SUCCEEDED) { ret = 1; if (size > 0) diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 4374e7b9c7bf..1bb9b2ccc267 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -148,15 +148,13 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, u32 data_size, void *sechdr) { - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxkad_level1_hdr hdr; struct rxrpc_crypt iv; struct scatterlist sg; u16 check; - sp = rxrpc_skb(skb); - _enter(""); check = sp->hdr.seq ^ call->call_id; @@ -323,6 +321,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_crypt iv; struct scatterlist sg[16]; struct sk_buff *trailer; + bool aborted; u32 data_size, buf; u16 check; int nsg; @@ -330,7 +329,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, _enter(""); if (len < 8) { - rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H", + RXKADSEALEDINCON); goto protocol_error; } @@ -355,7 +355,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, /* Extract the decrypted packet length */ if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { - rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1", + RXKADDATALEN); goto protocol_error; } offset += sizeof(sechdr); @@ -368,12 +369,14 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C", + RXKADSEALEDINCON); goto protocol_error; } if (data_size > len) { - rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L", + RXKADDATALEN); goto protocol_error; } @@ -381,8 +384,8 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO"); + if (aborted) + rxrpc_send_abort_packet(call); return -EPROTO; nomem: @@ -403,6 +406,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; struct sk_buff *trailer; + bool aborted; u32 data_size, buf; u16 check; int nsg; @@ -410,7 +414,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, _enter(",{%d}", skb->len); if (len < 8) { - rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H", + RXKADSEALEDINCON); goto protocol_error; } @@ -445,7 +450,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, /* Extract the decrypted packet length */ if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { - rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2", + RXKADDATALEN); goto protocol_error; } offset += sizeof(sechdr); @@ -458,12 +464,14 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C", + RXKADSEALEDINCON); goto protocol_error; } if (data_size > len) { - rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L", + RXKADDATALEN); goto protocol_error; } @@ -471,8 +479,8 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, return 0; protocol_error: - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO"); + if (aborted) + rxrpc_send_abort_packet(call); return -EPROTO; nomem: @@ -491,6 +499,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg; + bool aborted; u16 cksum; u32 x, y; @@ -522,10 +531,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, cksum = 1; /* zero checksums are not permitted */ if (cksum != expected_cksum) { - rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); - rxrpc_send_abort_packet(call); - _leave(" = -EPROTO [csum failed]"); - return -EPROTO; + aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK", + RXKADSEALEDINCON); + goto protocol_error; } switch (call->conn->params.security_level) { @@ -538,6 +546,11 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, default: return -ENOANO; } + +protocol_error: + if (aborted) + rxrpc_send_abort_packet(call); + return -EPROTO; } /* @@ -754,22 +767,23 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_response resp __attribute__((aligned(8))); /* must be aligned for crypto */ struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + const char *eproto; u32 version, nonce, min_level, abort_code; int ret; _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key)); - if (!conn->params.key) { - _leave(" = -EPROTO [no key]"); - return -EPROTO; - } + eproto = tracepoint_string("chall_no_key"); + abort_code = RX_PROTOCOL_ERROR; + if (!conn->params.key) + goto protocol_error; + abort_code = RXKADEXPIRED; ret = key_validate(conn->params.key); - if (ret < 0) { - *_abort_code = RXKADEXPIRED; - return ret; - } + if (ret < 0) + goto other_error; + eproto = tracepoint_string("chall_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &challenge, sizeof(challenge)) < 0) @@ -782,13 +796,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }", sp->hdr.serial, version, nonce, min_level); + eproto = tracepoint_string("chall_ver"); abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) goto protocol_error; abort_code = RXKADLEVELFAIL; + ret = -EACCES; if (conn->params.security_level < min_level) - goto protocol_error; + goto other_error; token = conn->params.key->payload.data[0]; @@ -815,28 +831,34 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, return rxkad_send_response(conn, &sp->hdr, &resp, token->kad); protocol_error: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + ret = -EPROTO; +other_error: *_abort_code = abort_code; - _leave(" = -EPROTO [%d]", abort_code); - return -EPROTO; + return ret; } /* * decrypt the kerberos IV ticket in the response */ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, + struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, time_t *_expiry, u32 *_abort_code) { struct skcipher_request *req; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt iv, key; struct scatterlist sg[1]; struct in_addr addr; unsigned int life; + const char *eproto; time_t issue, now; bool little_endian; int ret; + u32 abort_code; u8 *p, *q, *name, *end; _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key)); @@ -847,11 +869,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, if (ret < 0) { switch (ret) { case -EKEYEXPIRED: - *_abort_code = RXKADEXPIRED; - goto error; + abort_code = RXKADEXPIRED; + goto other_error; default: - *_abort_code = RXKADNOAUTH; - goto error; + abort_code = RXKADNOAUTH; + goto other_error; } } @@ -860,13 +882,11 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv)); + ret = -ENOMEM; req = skcipher_request_alloc(conn->server_key->payload.data[0], GFP_NOFS); - if (!req) { - *_abort_code = RXKADNOAUTH; - ret = -ENOMEM; - goto error; - } + if (!req) + goto temporary_error; sg_init_one(&sg[0], ticket, ticket_len); skcipher_request_set_callback(req, 0, NULL, NULL); @@ -877,11 +897,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p = ticket; end = p + ticket_len; -#define Z(size) \ +#define Z(field) \ ({ \ u8 *__str = p; \ + eproto = tracepoint_string("rxkad_bad_"#field); \ q = memchr(p, 0, end - p); \ - if (!q || q - p > (size)) \ + if (!q || q - p > (field##_SZ)) \ goto bad_ticket; \ for (; p < q; p++) \ if (!isprint(*p)) \ @@ -896,17 +917,18 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, p++; /* extract the authentication name */ - name = Z(ANAME_SZ); + name = Z(ANAME); _debug("KIV ANAME: %s", name); /* extract the principal's instance */ - name = Z(INST_SZ); + name = Z(INST); _debug("KIV INST : %s", name); /* extract the principal's authentication domain */ - name = Z(REALM_SZ); + name = Z(REALM); _debug("KIV REALM: %s", name); + eproto = tracepoint_string("rxkad_bad_len"); if (end - p < 4 + 8 + 4 + 2) goto bad_ticket; @@ -941,36 +963,37 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, /* check the ticket is in date */ if (issue > now) { - *_abort_code = RXKADNOAUTH; + abort_code = RXKADNOAUTH; ret = -EKEYREJECTED; - goto error; + goto other_error; } if (issue < now - life) { - *_abort_code = RXKADEXPIRED; + abort_code = RXKADEXPIRED; ret = -EKEYEXPIRED; - goto error; + goto other_error; } *_expiry = issue + life; /* get the service name */ - name = Z(SNAME_SZ); + name = Z(SNAME); _debug("KIV SNAME: %s", name); /* get the service instance name */ - name = Z(INST_SZ); + name = Z(INST); _debug("KIV SINST: %s", name); - - ret = 0; -error: - _leave(" = %d", ret); - return ret; + return 0; bad_ticket: - *_abort_code = RXKADBADTICKET; - ret = -EBADMSG; - goto error; + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + abort_code = RXKADBADTICKET; + ret = -EPROTO; +other_error: + *_abort_code = abort_code; + return ret; +temporary_error: + return ret; } /* @@ -1020,6 +1043,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, __attribute__((aligned(8))); /* must be aligned for crypto */ struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; + const char *eproto; time_t expiry; void *ticket; u32 abort_code, version, kvno, ticket_len, level; @@ -1028,6 +1052,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + eproto = tracepoint_string("rxkad_rsp_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &response, sizeof(response)) < 0) @@ -1041,40 +1066,43 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); + eproto = tracepoint_string("rxkad_rsp_ver"); abort_code = RXKADINCONSISTENCY; if (version != RXKAD_VERSION) goto protocol_error; + eproto = tracepoint_string("rxkad_rsp_tktlen"); abort_code = RXKADTICKETLEN; if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) goto protocol_error; + eproto = tracepoint_string("rxkad_rsp_unkkey"); abort_code = RXKADUNKNOWNKEY; if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) goto protocol_error; /* extract the kerberos ticket and decrypt and decode it */ + ret = -ENOMEM; ticket = kmalloc(ticket_len, GFP_NOFS); if (!ticket) - return -ENOMEM; + goto temporary_error; + eproto = tracepoint_string("rxkad_tkt_short"); abort_code = RXKADPACKETSHORT; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), ticket, ticket_len) < 0) goto protocol_error_free; - ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, - &expiry, &abort_code); - if (ret < 0) { - *_abort_code = abort_code; - kfree(ticket); - return ret; - } + ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, + &expiry, _abort_code); + if (ret < 0) + goto temporary_error_free; /* use the session key from inside the ticket to decrypt the * response */ rxkad_decrypt_response(conn, &response, &session_key); + eproto = tracepoint_string("rxkad_rsp_param"); abort_code = RXKADSEALEDINCON; if (ntohl(response.encrypted.epoch) != conn->proto.epoch) goto protocol_error_free; @@ -1085,6 +1113,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, csum = response.encrypted.checksum; response.encrypted.checksum = 0; rxkad_calc_response_checksum(&response); + eproto = tracepoint_string("rxkad_rsp_csum"); if (response.encrypted.checksum != csum) goto protocol_error_free; @@ -1093,11 +1122,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxrpc_call *call; u32 call_id = ntohl(response.encrypted.call_id[i]); + eproto = tracepoint_string("rxkad_rsp_callid"); if (call_id > INT_MAX) goto protocol_error_unlock; + eproto = tracepoint_string("rxkad_rsp_callctr"); if (call_id < conn->channels[i].call_counter) goto protocol_error_unlock; + + eproto = tracepoint_string("rxkad_rsp_callst"); if (call_id > conn->channels[i].call_counter) { call = rcu_dereference_protected( conn->channels[i].call, @@ -1109,10 +1142,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, } spin_unlock(&conn->channel_lock); + eproto = tracepoint_string("rxkad_rsp_seq"); abort_code = RXKADOUTOFSEQUENCE; if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1) goto protocol_error_free; + eproto = tracepoint_string("rxkad_rsp_level"); abort_code = RXKADLEVELFAIL; level = ntohl(response.encrypted.level); if (level > RXRPC_SECURITY_ENCRYPT) @@ -1123,10 +1158,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, * this the connection security can be handled in exactly the same way * as for a client connection */ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); - if (ret < 0) { - kfree(ticket); - return ret; - } + if (ret < 0) + goto temporary_error_free; kfree(ticket); _leave(" = 0"); @@ -1137,9 +1170,18 @@ protocol_error_unlock: protocol_error_free: kfree(ticket); protocol_error: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); *_abort_code = abort_code; - _leave(" = -EPROTO [%d]", abort_code); return -EPROTO; + +temporary_error_free: + kfree(ticket); +temporary_error: + /* Ignore the response packet if we got a temporary error such as + * ENOMEM. We just want to send the challenge again. Note that we + * also come out this way if the ticket decryption fails. + */ + return ret; } /* diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 97ab214ca411..96ffa5d5733b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -556,7 +556,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { ret = 0; - if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) + if (rxrpc_abort_call("CMD", call, 0, abort_code, -ECONNABORTED)) ret = rxrpc_send_abort_packet(call); } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; @@ -623,7 +623,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, read_unlock_bh(&call->state_lock); break; default: - /* Request phase complete for this client call */ + /* Request phase complete for this client call */ + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send")); ret = -EPROTO; break; } @@ -642,20 +643,24 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data); * @error: Local error value * @why: 3-char string indicating why. * - * Allow a kernel service to abort a call, if it's still in an abortable state. + * Allow a kernel service to abort a call, if it's still in an abortable state + * and return true if the call was aborted, false if it was already complete. */ -void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, +bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, u32 abort_code, int error, const char *why) { + bool aborted; + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); mutex_lock(&call->user_mutex); - if (rxrpc_abort_call(why, call, 0, abort_code, error)) + aborted = rxrpc_abort_call(why, call, 0, abort_code, error); + if (aborted) rxrpc_send_abort_packet(call); mutex_unlock(&call->user_mutex); - _leave(""); + return aborted; } EXPORT_SYMBOL(rxrpc_kernel_abort_call); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 3d6b9286c203..ca193af8634a 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -508,9 +508,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, get_random_bytes(&fnew->hashrnd, 4); } - fnew->perturb_timer.function = flow_perturbation; - fnew->perturb_timer.data = (unsigned long)fnew; - init_timer_deferrable(&fnew->perturb_timer); + setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, + (unsigned long)fnew); tcf_exts_change(tp, &fnew->exts, &e); tcf_em_tree_change(tp, &fnew->ematches, &t); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 3b86a97bc67c..593183a5b5b5 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -58,7 +58,6 @@ struct choke_sched_data { /* Variables */ struct red_vars vars; - struct tcf_proto __rcu *filter_list; struct { u32 prob_drop; /* Early probability drops */ u32 prob_mark; /* Early probability marks */ @@ -152,11 +151,6 @@ static inline void choke_set_classid(struct sk_buff *skb, u16 classid) choke_skb_cb(skb)->classid = classid; } -static u16 choke_get_classid(const struct sk_buff *skb) -{ - return choke_skb_cb(skb)->classid; -} - /* * Compare flow of two packets * Returns true only if source and destination address and port match. @@ -188,40 +182,6 @@ static bool choke_match_flow(struct sk_buff *skb1, } /* - * Classify flow using either: - * 1. pre-existing classification result in skb - * 2. fast internal classification - * 3. use TC filter based classification - */ -static bool choke_classify(struct sk_buff *skb, - struct Qdisc *sch, int *qerr) - -{ - struct choke_sched_data *q = qdisc_priv(sch); - struct tcf_result res; - struct tcf_proto *fl; - int result; - - fl = rcu_dereference_bh(q->filter_list); - result = tc_classify(skb, fl, &res, false); - if (result >= 0) { -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - case TC_ACT_SHOT: - return false; - } -#endif - choke_set_classid(skb, TC_H_MIN(res.classid)); - return true; - } - - return false; -} - -/* * Select a packet at random from queue * HACK: since queue can have holes from previous deletion; retry several * times to find a random skb but then just give up and return the head @@ -257,25 +217,15 @@ static bool choke_match_random(const struct choke_sched_data *q, return false; oskb = choke_peek_random(q, pidx); - if (rcu_access_pointer(q->filter_list)) - return choke_get_classid(nskb) == choke_get_classid(oskb); - return choke_match_flow(oskb, nskb); } static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; struct choke_sched_data *q = qdisc_priv(sch); const struct red_parms *p = &q->parms; - if (rcu_access_pointer(q->filter_list)) { - /* If using external classifiers, get result and record it. */ - if (!choke_classify(skb, sch, &ret)) - goto other_drop; /* Packet was eaten by filter */ - } - choke_skb_cb(skb)->keys_valid = 0; /* Compute average queue usage (see RED) */ q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen); @@ -339,12 +289,6 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, congestion_drop: qdisc_drop(skb, sch, to_free); return NET_XMIT_CN; - -other_drop: - if (ret & __NET_XMIT_BYPASS) - qdisc_qstats_drop(sch); - __qdisc_drop(skb, to_free); - return ret; } static struct sk_buff *choke_dequeue(struct Qdisc *sch) @@ -538,7 +482,6 @@ static void choke_destroy(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(&q->filter_list); choke_free(q->tab); } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 42e8c8615e65..b00e02c139de 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -714,9 +714,8 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) struct sfq_sched_data *q = qdisc_priv(sch); int i; - q->perturb_timer.function = sfq_perturbation; - q->perturb_timer.data = (unsigned long)sch; - init_timer_deferrable(&q->perturb_timer); + setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, + (unsigned long)sch); for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { q->dep[i].next = i + SFQ_MAX_FLOWS; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0439a1a68367..a9708da28eb5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -246,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; + if (sctp_stream_new(asoc, gfp)) + goto fail_init; + /* Assume that peer would support both address types unless we are * told otherwise. */ @@ -264,7 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* AUTH related initializations */ INIT_LIST_HEAD(&asoc->endpoint_shared_keys); if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp)) - goto fail_init; + goto stream_free; asoc->active_key_id = ep->active_key_id; asoc->prsctp_enable = ep->prsctp_enable; @@ -287,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; +stream_free: + sctp_stream_free(asoc->stream); fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); @@ -1407,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc, /* Update the association's pmtu and frag_point by going through all the * transports. This routine is called when a transport's PMTU has changed. */ -void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) +void sctp_assoc_sync_pmtu(struct sctp_association *asoc) { struct sctp_transport *t; __u32 pmtu = 0; @@ -1419,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(sk, t, - SCTP_TRUNC4(dst_mtu(t->dst))); + sctp_transport_update_pmtu( + t, SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index e3621cb4827f..697721a7a3f1 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -306,14 +306,24 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) && time_after(jiffies, chunk->msg->expires_at)) { - if (chunk->sent_count) + struct sctp_stream_out *streamout = + &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + + if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; - else + streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; + } else { chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + } return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && chunk->sent_count > chunk->sinfo.sinfo_timetolive) { + struct sctp_stream_out *streamout = + &chunk->asoc->stream->out[chunk->sinfo.sinfo_stream]; + chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && diff --git a/net/sctp/input.c b/net/sctp/input.c index 2a28ab20487f..0e06a278d2a9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, if (t->param_flags & SPP_PMTUD_ENABLE) { /* Update transports view of the MTU */ - sctp_transport_update_pmtu(sk, t, pmtu); + sctp_transport_update_pmtu(t, pmtu); /* Update association pmtu. */ - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } /* Retransmit with the new pmtu setting. diff --git a/net/sctp/output.c b/net/sctp/output.c index 1224421036b3..1409a875ad8e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; + struct sock *sk; pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); - packet->vtag = vtag; - if (asoc && tp->dst) { - struct sock *sk = asoc->base.sk; - - rcu_read_lock(); - if (__sk_dst_get(sk) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - - if (sk_can_gso(sk)) { - struct net_device *dev = tp->dst->dev; + /* do the following jobs only once for a flush schedule */ + if (!sctp_packet_empty(packet)) + return; - packet->max_size = dev->gso_max_size; - } else { - packet->max_size = asoc->pathmtu; - } - rcu_read_unlock(); + /* set packet max_size with pathmtu */ + packet->max_size = tp->pathmtu; + if (!asoc) + return; - } else { - packet->max_size = tp->pathmtu; + /* update dst or transport pathmtu if in need */ + sk = asoc->base.sk; + if (!sctp_transport_dst_check(tp)) { + sctp_transport_route(tp, NULL, sctp_sk(sk)); + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); + } else if (!sctp_transport_pmtu_check(tp)) { + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(asoc); } - if (ecn_capable && sctp_packet_empty(packet)) { - struct sctp_chunk *chunk; + /* If there a is a prepend chunk stick it on the list before + * any other chunks get appended. + */ + if (ecn_capable) { + struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc); - /* If there a is a prepend chunk stick it on the list before - * any other chunks get appended. - */ - chunk = sctp_get_ecne_prepend(asoc); if (chunk) sctp_packet_append_chunk(packet, chunk); } + + if (!tp->dst) + return; + + /* set packet max_size with gso_max_size if gso is enabled*/ + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size + : asoc->pathmtu; + rcu_read_unlock(); } /* Initialize the packet structure. */ @@ -582,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sh->vtag = htonl(packet->vtag); sh->checksum = 0; - /* update dst if in need */ - if (!sctp_transport_dst_check(tp)) { - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); - } + /* drop packet if no dst */ dst = dst_clone(tp->dst); if (!dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -704,7 +709,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, */ if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) && - !chunk->msg->force_delay) + !asoc->force_delay) /* Nothing unacked */ return SCTP_XMIT_OK; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 025ccff67072..fe4c3d462f6e 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -353,6 +353,8 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, struct sctp_chunk *chk, *temp; list_for_each_entry_safe(chk, temp, queue, transmitted_list) { + struct sctp_stream_out *streamout; + if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; @@ -361,8 +363,10 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, sctp_insert_list(&asoc->outqueue.abandoned, &chk->transmitted_list); + streamout = &asoc->stream->out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; + streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; if (!chk->tsn_gap_acked) { if (chk->transport) @@ -396,6 +400,12 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) { + struct sctp_stream_out *streamout = + &asoc->stream->out[chk->sinfo.sinfo_stream]; + + streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + } msg_len -= SCTP_DATA_SNDSIZE(chk) + sizeof(struct sk_buff) + @@ -1026,8 +1036,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ - if (chunk->sinfo.sinfo_stream >= - asoc->c.sinit_num_ostreams) { + if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) { /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 206377fe91ec..a0b29d43627f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_remote_addrs(seq, assoc); seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " "%8d %8d %8d %8d", - assoc->hbinterval, assoc->c.sinit_max_instreams, - assoc->c.sinit_num_ostreams, assoc->max_retrans, + assoc->hbinterval, assoc->stream->incnt, + assoc->stream->outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, atomic_read(&sk->sk_wmem_alloc), diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 969a30c7bb54..118faff6a332 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * association. */ if (!asoc->temp) { - int error; - - asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams, - asoc->c.sinit_num_ostreams, gfp); - if (!asoc->stream) + if (sctp_stream_init(asoc, gfp)) goto clean_up; - error = sctp_assoc_set_id(asoc, gfp); - if (error) + if (sctp_assoc_set_id(asoc, gfp)) goto clean_up; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ab1374fa5ab0..4f5e6cfc7f60 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3955,7 +3955,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto discard_noforce; } @@ -4026,7 +4026,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto gen_shutdown; } @@ -6362,7 +6362,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ sid = ntohs(data_hdr->stream); - if (sid >= asoc->c.sinit_max_instreams) { + if (sid >= asoc->stream->incnt) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 72cc3ecf6516..8e56df8d175d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(sk, asoc); + sctp_assoc_pending_pmtu(asoc); /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D @@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } /* Check for invalid stream. */ - if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) { + if (sinfo->sinfo_stream >= asoc->stream->outcnt) { err = -EINVAL; goto out_free; } @@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) err = PTR_ERR(datamsg); goto out_free; } - datamsg->force_delay = !!(msg->msg_flags & MSG_MORE); + asoc->force_delay = !!(msg->msg_flags & MSG_MORE); /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { @@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { if (trans) { trans->pathmtu = params->spp_pathmtu; - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; } else { @@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, (trans->param_flags & ~SPP_PMTUD) | pmtud_change; if (update) { sctp_transport_pmtu(trans, sctp_opt2sk(sp)); - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } } else if (asoc) { asoc->param_flags = @@ -4497,8 +4497,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_rwnd = asoc->a_rwnd; info->sctpi_unackdata = asoc->unack_data; info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - info->sctpi_instrms = asoc->c.sinit_max_instreams; - info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + info->sctpi_instrms = asoc->stream->incnt; + info->sctpi_outstrms = asoc->stream->outcnt; list_for_each(pos, &asoc->base.inqueue.in_chunk_list) info->sctpi_inqueue++; list_for_each(pos, &asoc->outqueue.out_chunk_list) @@ -4727,8 +4727,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_unackdata = asoc->unack_data; status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - status.sstat_instrms = asoc->c.sinit_max_instreams; - status.sstat_outstrms = asoc->c.sinit_num_ostreams; + status.sstat_instrms = asoc->stream->incnt; + status.sstat_outstrms = asoc->stream->outcnt; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, @@ -6576,6 +6576,61 @@ out: return retval; } +static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_stream_out *streamout; + struct sctp_association *asoc; + struct sctp_prstatus params; + int retval = -EINVAL; + int policy; + + if (len < sizeof(params)) + goto out; + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) { + retval = -EFAULT; + goto out; + } + + policy = params.sprstat_policy; + if (policy & ~SCTP_PR_SCTP_MASK) + goto out; + + asoc = sctp_id2assoc(sk, params.sprstat_assoc_id); + if (!asoc || params.sprstat_sid >= asoc->stream->outcnt) + goto out; + + streamout = &asoc->stream->out[params.sprstat_sid]; + if (policy == SCTP_PR_SCTP_NONE) { + params.sprstat_abandoned_unsent = 0; + params.sprstat_abandoned_sent = 0; + for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { + params.sprstat_abandoned_unsent += + streamout->abandoned_unsent[policy]; + params.sprstat_abandoned_sent += + streamout->abandoned_sent[policy]; + } + } else { + params.sprstat_abandoned_unsent = + streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; + params.sprstat_abandoned_sent = + streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; + } + + if (put_user(len, optlen) || copy_to_user(optval, ¶ms, len)) { + retval = -EFAULT; + goto out; + } + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -6825,6 +6880,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_pr_assocstatus(sk, len, optval, optlen); break; + case SCTP_PR_STREAM_STATUS: + retval = sctp_getsockopt_pr_streamstatus(sk, len, optval, + optlen); + break; case SCTP_RECONFIG_SUPPORTED: retval = sctp_getsockopt_reconfig_supported(sk, len, optval, optlen); @@ -7518,9 +7577,12 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk->sk_shutdown & RCV_SHUTDOWN) break; - if (sk_can_busy_loop(sk) && - sk_busy_loop(sk, noblock)) - continue; + if (sk_can_busy_loop(sk)) { + sk_busy_loop(sk, noblock); + + if (!skb_queue_empty(&sk->sk_receive_queue)) + continue; + } /* User doesn't want to wait. */ error = -EAGAIN; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 961d0a1e99d1..eff6008a32ba 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,33 +35,60 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp) +int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp) { struct sctp_stream *stream; int i; stream = kzalloc(sizeof(*stream), gfp); if (!stream) - return NULL; + return -ENOMEM; - stream->outcnt = outcnt; + stream->outcnt = asoc->c.sinit_num_ostreams; stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); if (!stream->out) { kfree(stream); - return NULL; + return -ENOMEM; } for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; - stream->incnt = incnt; + asoc->stream = stream; + + return 0; +} + +int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp) +{ + struct sctp_stream *stream = asoc->stream; + int i; + + /* Initial stream->out size may be very big, so free it and alloc + * a new one with new outcnt to save memory. + */ + kfree(stream->out); + stream->outcnt = asoc->c.sinit_num_ostreams; + stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); + if (!stream->out) + goto nomem; + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + stream->incnt = asoc->c.sinit_max_instreams; stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp); if (!stream->in) { kfree(stream->out); - kfree(stream); - return NULL; + goto nomem; } - return stream; + return 0; + +nomem: + asoc->stream = NULL; + kfree(stream); + + return -ENOMEM; } void sctp_stream_free(struct sctp_stream *stream) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3379668af368..721eeebfcd8a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu) +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { - struct dst_entry *dst; + struct dst_entry *dst = sctp_transport_dst_check(t); if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, - SCTP_DEFAULT_MINSEGMENT); + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment size and disable * pmtu discovery on this transport. */ @@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->pathmtu = pmtu; } - dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); - if (dst) { - dst->ops->update_pmtu(dst, sk, NULL, pmtu); - + dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); } + + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); } /* Caches the dst entry and source address for a transport's destination diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index a95f74bb5569..7e1f0e24d177 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -11,6 +11,7 @@ #ifndef _SMC_IB_H #define _SMC_IB_H +#include <linux/interrupt.h> #include <linux/if_ether.h> #include <rdma/ib_verbs.h> diff --git a/net/socket.c b/net/socket.c index 985ef06792d6..eea997036ada 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3356,3 +3356,49 @@ int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) return sock->ops->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); + +/* This routine returns the IP overhead imposed by a socket i.e. + * the length of the underlying IP header, depending on whether + * this is an IPv4 or IPv6 socket and the length from IP options turned + * on at the socket. + */ +u32 kernel_sock_ip_overhead(struct sock *sk) +{ + struct inet_sock *inet; + struct ip_options_rcu *opt; + u32 overhead = 0; + bool owned_by_user; +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6_pinfo *np; + struct ipv6_txoptions *optv6 = NULL; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + + if (!sk) + return overhead; + + owned_by_user = sock_owned_by_user(sk); + switch (sk->sk_family) { + case AF_INET: + inet = inet_sk(sk); + overhead += sizeof(struct iphdr); + opt = rcu_dereference_protected(inet->inet_opt, + owned_by_user); + if (opt) + overhead += opt->opt.optlen; + return overhead; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + np = inet6_sk(sk); + overhead += sizeof(struct ipv6hdr); + if (np) + optv6 = rcu_dereference_protected(np->opt, + owned_by_user); + if (optv6) + overhead += (optv6->opt_flen + optv6->opt_nflen); + return overhead; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */ + return overhead; + } +} +EXPORT_SYMBOL(kernel_sock_ip_overhead); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8931e33b6541..2b720fa35c4f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1635,6 +1635,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, xprt = &svsk->sk_xprt; svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv); + set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); serv->sv_bc_xprt = xprt; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c13a5c35ce14..fc8f14c7bfec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -127,6 +127,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv, xprt = &cma_xprt->sc_xprt; svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv); + set_bit(XPT_CONG_CTRL, &xprt->xpt_flags); serv->sv_bc_xprt = xprt; dprintk("svcrdma: %s(%p)\n", __func__, xprt); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9be6592e4a6f..bd0aac87b41a 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + tipc_subscrp_get(s); list_add(&s->nameseq_list, &nseq->subscriptions); if (!sseq) @@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); + tipc_subscrp_put(s); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7130e73bd42c..15f6ce7bf868 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2511,6 +2511,28 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } } +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + u32 onode = tipc_own_addr(sock_net(sock1->sk)); + + tsk1->peer.family = AF_TIPC; + tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.scope = TIPC_NODE_SCOPE; + tsk1->peer.addr.id.ref = tsk2->portid; + tsk1->peer.addr.id.node = onode; + tsk2->peer.family = AF_TIPC; + tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.scope = TIPC_NODE_SCOPE; + tsk2->peer.addr.id.ref = tsk1->portid; + tsk2->peer.addr.id.node = onode; + + tipc_sk_finish_conn(tsk1, tsk2->portid, onode); + tipc_sk_finish_conn(tsk2, tsk1->portid, onode); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -2519,7 +2541,7 @@ static const struct proto_ops msg_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname = tipc_getname, .poll = tipc_poll, @@ -2540,7 +2562,7 @@ static const struct proto_ops packet_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, @@ -2561,7 +2583,7 @@ static const struct proto_ops stream_ops = { .release = tipc_release, .bind = tipc_bind, .connect = tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, .poll = tipc_poll, diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 271cd66e4b3b..0bf91cd3733c 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,8 +54,6 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); -static void tipc_subscrp_put(struct tipc_subscription *subscription); -static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; - tipc_subscrp_get(sub); tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; @@ -135,7 +132,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); - tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) @@ -145,6 +141,7 @@ static void tipc_subscrp_timeout(unsigned long data) spin_lock_bh(&subscriber->lock); tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ @@ -177,20 +174,17 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; - spin_lock_bh(&subscriber->lock); - list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); - spin_unlock_bh(&subscriber->lock); kfree(sub); tipc_subscrb_put(subscriber); } -static void tipc_subscrp_put(struct tipc_subscription *subscription) +void tipc_subscrp_put(struct tipc_subscription *subscription) { kref_put(&subscription->kref, tipc_subscrp_kref_release); } -static void tipc_subscrp_get(struct tipc_subscription *subscription) +void tipc_subscrp_get(struct tipc_subscription *subscription) { kref_get(&subscription->kref); } @@ -210,11 +204,8 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, continue; tipc_nametbl_unsubscribe(sub); - tipc_subscrp_get(sub); - spin_unlock_bh(&subscriber->lock); + list_del(&sub->subscrp_list); tipc_subscrp_delete(sub); - tipc_subscrp_put(sub); - spin_lock_bh(&subscriber->lock); if (s) break; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ffdc214c117a..ee52957dc952 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); +void tipc_subscrp_put(struct tipc_subscription *subscription); +void tipc_subscrp_get(struct tipc_subscription *subscription); + #endif diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 928691c43408..6a7fe7660551 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -996,7 +996,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned int hash; struct unix_address *addr; struct hlist_head *list; - struct path path = { NULL, NULL }; + struct path path = { }; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 4be4fbbc0b50..10ae7823a19d 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -96,31 +96,23 @@ static int PROTOCOL_OVERRIDE = -1; static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) { - int err; - switch (vmci_error) { case VMCI_ERROR_NO_MEM: - err = ENOMEM; - break; + return -ENOMEM; case VMCI_ERROR_DUPLICATE_ENTRY: case VMCI_ERROR_ALREADY_EXISTS: - err = EADDRINUSE; - break; + return -EADDRINUSE; case VMCI_ERROR_NO_ACCESS: - err = EPERM; - break; + return -EPERM; case VMCI_ERROR_NO_RESOURCES: - err = ENOBUFS; - break; + return -ENOBUFS; case VMCI_ERROR_INVALID_RESOURCE: - err = EHOSTUNREACH; - break; + return -EHOSTUNREACH; case VMCI_ERROR_INVALID_ARGS: default: - err = EINVAL; + break; } - - return err > 0 ? -err : err; + return -EINVAL; } static u32 vmci_transport_peer_rid(u32 peer_cid) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 16b6b5988be9..570a2b67ca10 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev) /* Age scan results with time spent in suspend */ cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); - if (rdev->ops->resume) { - rtnl_lock(); - if (rdev->wiphy.registered) - ret = rdev_resume(rdev); - rtnl_unlock(); - } + rtnl_lock(); + if (rdev->wiphy.registered && rdev->ops->resume) + ret = rdev_resume(rdev); + rtnl_unlock(); return ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8e696eafd067..4f7e62ddc17e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -412,7 +412,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) + return -EINVAL; + + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) return -EINVAL; return 0; |