diff options
277 files changed, 8858 insertions, 3321 deletions
diff --git a/.clang-format b/.clang-format index f49620f506f1..3a4c8220df2f 100644 --- a/.clang-format +++ b/.clang-format @@ -366,14 +366,14 @@ ForEachMacros: - 'rhl_for_each_entry_rcu' - 'rhl_for_each_rcu' - 'rht_for_each' - - 'rht_for_each_continue' + - 'rht_for_each_from' - 'rht_for_each_entry' - - 'rht_for_each_entry_continue' + - 'rht_for_each_entry_from' - 'rht_for_each_entry_rcu' - - 'rht_for_each_entry_rcu_continue' + - 'rht_for_each_entry_rcu_from' - 'rht_for_each_entry_safe' - 'rht_for_each_rcu' - - 'rht_for_each_rcu_continue' + - 'rht_for_each_rcu_from' - '__rq_for_each_bio' - 'rq_for_each_segment' - 'scsi_for_each_prot_sg' diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index acdfb5d2bcaa..5eedc6941ce5 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -81,6 +81,11 @@ fib_multipath_hash_policy - INTEGER 0 - Layer 3 1 - Layer 4 +fib_sync_mem - UNSIGNED INTEGER + Amount of dirty memory from fib entries that can be backlogged before + synchronize_rcu is forced. + Default: 512kB Minimum: 64kB Maximum: 64MB + ip_forward_update_priority - INTEGER Whether to update SKB priority from "TOS" field in IPv4 header after it is forwarded. The new SKB priority is mapped from TOS field value @@ -1918,6 +1923,16 @@ echo_ignore_all - BOOLEAN requests sent to it over the IPv6 protocol. Default: 0 +echo_ignore_multicast - BOOLEAN + If set non-zero, then the kernel will ignore all ICMP ECHO + requests sent to it over the IPv6 protocol via multicast. + Default: 0 + +echo_ignore_anycast - BOOLEAN + If set non-zero, then the kernel will ignore all ICMP ECHO + requests sent to it over the IPv6 protocol destined to anycast address. + Default: 0 + xfrm6_gc_thresh - INTEGER The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 90ba9f4c03f3..92b8aafb8bb4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1999,22 +1999,18 @@ out: static const struct genl_ops nbd_connect_genl_ops[] = { { .cmd = NBD_CMD_CONNECT, - .policy = nbd_attr_policy, .doit = nbd_genl_connect, }, { .cmd = NBD_CMD_DISCONNECT, - .policy = nbd_attr_policy, .doit = nbd_genl_disconnect, }, { .cmd = NBD_CMD_RECONFIGURE, - .policy = nbd_attr_policy, .doit = nbd_genl_reconfigure, }, { .cmd = NBD_CMD_STATUS, - .policy = nbd_attr_policy, .doit = nbd_genl_status, }, }; @@ -2031,6 +2027,7 @@ static struct genl_family nbd_genl_family __ro_after_init = { .ops = nbd_connect_genl_ops, .n_ops = ARRAY_SIZE(nbd_connect_genl_ops), .maxattr = NBD_ATTR_MAX, + .policy = nbd_attr_policy, .mcgrps = nbd_mcast_grps, .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), }; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index a922db58be14..2b07032dbdda 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -423,8 +423,7 @@ tx_finish: static u16 hfi1_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); struct opa_vnic_skb_mdata *mdata; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index ae70cd18903e..aeff68f582d3 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -95,8 +95,7 @@ static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb, } static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev); struct opa_vnic_skb_mdata *mdata; @@ -106,8 +105,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb, mdata = skb_push(skb, sizeof(*mdata)); mdata->entropy = opa_vnic_calc_entropy(skb); mdata->vl = opa_vnic_get_vl(adapter, skb); - rc = adapter->rn_ops->ndo_select_queue(netdev, skb, - sb_dev, fallback); + rc = adapter->rn_ops->ndo_select_queue(netdev, skb, sb_dev); skb_pull(skb, sizeof(*mdata)); return rc; } diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index a7b275ea5de1..7e0f419c14f8 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -1888,8 +1888,9 @@ static u32 isdn_ppp_mp_get_seq(int short_seq, return seq; } -struct sk_buff *isdn_ppp_mp_discard(ippp_bundle *mp, - struct sk_buff *from, struct sk_buff *to) +static struct sk_buff *isdn_ppp_mp_discard(ippp_bundle *mp, + struct sk_buff *from, + struct sk_buff *to) { if (from) while (from != to) { @@ -1900,8 +1901,8 @@ struct sk_buff *isdn_ppp_mp_discard(ippp_bundle *mp, return from; } -void isdn_ppp_mp_reassembly(isdn_net_dev *net_dev, isdn_net_local *lp, - struct sk_buff *from, struct sk_buff *to) +static void isdn_ppp_mp_reassembly(isdn_net_dev *net_dev, isdn_net_local *lp, + struct sk_buff *from, struct sk_buff *to) { ippp_bundle *mp = net_dev->pb; int proto; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 7a96d168efc4..bc42f131f47c 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -505,6 +505,7 @@ source "drivers/net/hyperv/Kconfig" config NETDEVSIM tristate "Simulated networking device" depends on DEBUG_FS + select NET_DEVLINK help This driver is a developer testing tool and software model that can be used to test various control path networking APIs, especially diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b59708c35faf..8ddbada9e281 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4114,8 +4114,7 @@ static inline int bond_slave_override(struct bonding *bond, static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { /* This helper function exists to help dev_pick_tx get the correct * destination queue. Using a helper function skips a call to diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index c8e3f05e1d72..4ccb3239f5f7 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1188,10 +1188,11 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) if (ret) goto out_mdio; - pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n", - priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff, - priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff, - priv->core, priv->irq0, priv->irq1); + dev_info(&pdev->dev, + "Starfighter 2 top: %x.%02x, core: %x.%02x, IRQs: %d, %d\n", + priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff, + priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff, + priv->irq0, priv->irq1); return 0; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index f4e2db44ad91..65da6709a173 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4631,14 +4631,6 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, return 0; } -static void mv88e6xxx_ports_cmode_init(struct mv88e6xxx_chip *chip) -{ - int i; - - for (i = 0; i < mv88e6xxx_num_ports(chip); i++) - chip->ports[i].cmode = MV88E6XXX_PORT_STS_CMODE_INVALID; -} - static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds, int port) { @@ -4675,8 +4667,6 @@ static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, if (err) goto free; - mv88e6xxx_ports_cmode_init(chip); - mutex_lock(&chip->reg_lock); err = mv88e6xxx_switch_reset(chip); mutex_unlock(&chip->reg_lock); @@ -4915,7 +4905,6 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (err) goto out; - mv88e6xxx_ports_cmode_init(chip); mv88e6xxx_phy_init(chip); if (chip->info->ops->get_eeprom) { diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index c7bed263a0f4..39c85e98fb92 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -52,7 +52,6 @@ #define MV88E6185_PORT_STS_CMODE_1000BASE_X 0x0005 #define MV88E6185_PORT_STS_CMODE_PHY 0x0006 #define MV88E6185_PORT_STS_CMODE_DISABLED 0x0007 -#define MV88E6XXX_PORT_STS_CMODE_INVALID 0xff /* Offset 0x01: MAC (or PCS or Physical) Control Register */ #define MV88E6XXX_PORT_MAC_CTL 0x01 diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index a6eacf2099c3..71c8cac6e44e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2258,8 +2258,7 @@ error_drop_packet: } static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { u16 qid; /* we suspect that this is good for in--kernel network services that @@ -2269,7 +2268,7 @@ static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, if (skb_rx_queue_recorded(skb)) qid = skb_get_rx_queue(skb); else - qid = fallback(dev, skb, NULL); + qid = netdev_pick_tx(dev, skb, NULL); return qid; } diff --git a/drivers/net/ethernet/aquantia/Kconfig b/drivers/net/ethernet/aquantia/Kconfig index 7d623e90dc19..12472c5bb34d 100644 --- a/drivers/net/ethernet/aquantia/Kconfig +++ b/drivers/net/ethernet/aquantia/Kconfig @@ -17,7 +17,8 @@ if NET_VENDOR_AQUANTIA config AQTION tristate "aQuantia AQtion(tm) Support" - depends on PCI && X86_64 + depends on PCI + depends on X86_64 || ARM64 || COMPILE_TEST ---help--- This enables the support for the aQuantia AQtion(tm) Ethernet card. diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 3944ce7f0870..8f35c3f883f0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -16,7 +16,7 @@ #define AQ_CFG_TCS_DEF 1U #define AQ_CFG_TXDS_DEF 4096U -#define AQ_CFG_RXDS_DEF 1024U +#define AQ_CFG_RXDS_DEF 2048U #define AQ_CFG_IS_POLLING_DEF 0U @@ -34,10 +34,16 @@ #define AQ_CFG_TCS_MAX 8U #define AQ_CFG_TX_FRAME_MAX (16U * 1024U) -#define AQ_CFG_RX_FRAME_MAX (4U * 1024U) +#define AQ_CFG_RX_FRAME_MAX (2U * 1024U) #define AQ_CFG_TX_CLEAN_BUDGET 256U +#define AQ_CFG_RX_REFILL_THRES 32U + +#define AQ_CFG_RX_HDR_SIZE 256U + +#define AQ_CFG_RX_PAGEORDER 0U + /* LRO */ #define AQ_CFG_IS_LRO_DEF 1U diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index ff83667410bd..059df86e8e37 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -73,6 +73,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) cfg->tx_itr = aq_itr_tx; cfg->rx_itr = aq_itr_rx; + cfg->rxpageorder = AQ_CFG_RX_PAGEORDER; cfg->is_rss = AQ_CFG_IS_RSS_DEF; cfg->num_rss_queues = AQ_CFG_NUM_RSS_QUEUES_DEF; cfg->aq_rss.base_cpu_number = AQ_CFG_RSS_BASE_CPU_NUM_DEF; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 8e34c1e49bf2..b1372430f62f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -31,6 +31,7 @@ struct aq_nic_cfg_s { u32 itr; u16 rx_itr; u16 tx_itr; + u32 rxpageorder; u32 num_rss_queues; u32 mtu; u32 flow_control; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index e2ffb159cbe2..c64e2fb5a4f1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -12,10 +12,89 @@ #include "aq_ring.h" #include "aq_nic.h" #include "aq_hw.h" +#include "aq_hw_utils.h" #include <linux/netdevice.h> #include <linux/etherdevice.h> +static inline void aq_free_rxpage(struct aq_rxpage *rxpage, struct device *dev) +{ + unsigned int len = PAGE_SIZE << rxpage->order; + + dma_unmap_page(dev, rxpage->daddr, len, DMA_FROM_DEVICE); + + /* Drop the ref for being in the ring. */ + __free_pages(rxpage->page, rxpage->order); + rxpage->page = NULL; +} + +static int aq_get_rxpage(struct aq_rxpage *rxpage, unsigned int order, + struct device *dev) +{ + struct page *page; + dma_addr_t daddr; + int ret = -ENOMEM; + + page = dev_alloc_pages(order); + if (unlikely(!page)) + goto err_exit; + + daddr = dma_map_page(dev, page, 0, PAGE_SIZE << order, + DMA_FROM_DEVICE); + + if (unlikely(dma_mapping_error(dev, daddr))) + goto free_page; + + rxpage->page = page; + rxpage->daddr = daddr; + rxpage->order = order; + rxpage->pg_off = 0; + + return 0; + +free_page: + __free_pages(page, order); + +err_exit: + return ret; +} + +static int aq_get_rxpages(struct aq_ring_s *self, struct aq_ring_buff_s *rxbuf, + int order) +{ + int ret; + + if (rxbuf->rxdata.page) { + /* One means ring is the only user and can reuse */ + if (page_ref_count(rxbuf->rxdata.page) > 1) { + /* Try reuse buffer */ + rxbuf->rxdata.pg_off += AQ_CFG_RX_FRAME_MAX; + if (rxbuf->rxdata.pg_off + AQ_CFG_RX_FRAME_MAX <= + (PAGE_SIZE << order)) { + self->stats.rx.pg_flips++; + } else { + /* Buffer exhausted. We have other users and + * should release this page and realloc + */ + aq_free_rxpage(&rxbuf->rxdata, + aq_nic_get_dev(self->aq_nic)); + self->stats.rx.pg_losts++; + } + } else { + rxbuf->rxdata.pg_off = 0; + self->stats.rx.pg_reuses++; + } + } + + if (!rxbuf->rxdata.page) { + ret = aq_get_rxpage(&rxbuf->rxdata, order, + aq_nic_get_dev(self->aq_nic)); + return ret; + } + + return 0; +} + static struct aq_ring_s *aq_ring_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic) { @@ -81,6 +160,11 @@ struct aq_ring_s *aq_ring_rx_alloc(struct aq_ring_s *self, self->idx = idx; self->size = aq_nic_cfg->rxds; self->dx_size = aq_nic_cfg->aq_hw_caps->rxd_size; + self->page_order = fls(AQ_CFG_RX_FRAME_MAX / PAGE_SIZE + + (AQ_CFG_RX_FRAME_MAX % PAGE_SIZE ? 1 : 0)) - 1; + + if (aq_nic_cfg->rxpageorder > self->page_order) + self->page_order = aq_nic_cfg->rxpageorder; self = aq_ring_alloc(self, aq_nic); if (!self) { @@ -201,22 +285,21 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int budget) { struct net_device *ndev = aq_nic_get_ndev(self->aq_nic); - int err = 0; bool is_rsc_completed = true; + int err = 0; for (; (self->sw_head != self->hw_head) && budget; self->sw_head = aq_ring_next_dx(self, self->sw_head), --budget, ++(*work_done)) { struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head]; + struct aq_ring_buff_s *buff_ = NULL; struct sk_buff *skb = NULL; unsigned int next_ = 0U; unsigned int i = 0U; - struct aq_ring_buff_s *buff_ = NULL; + u16 hdr_len; - if (buff->is_error) { - __free_pages(buff->page, 0); + if (buff->is_error) continue; - } if (buff->is_cleaned) continue; @@ -246,45 +329,66 @@ int aq_ring_rx_clean(struct aq_ring_s *self, } } + dma_sync_single_range_for_cpu(aq_nic_get_dev(self->aq_nic), + buff->rxdata.daddr, + buff->rxdata.pg_off, + buff->len, DMA_FROM_DEVICE); + /* for single fragment packets use build_skb() */ if (buff->is_eop && buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) { - skb = build_skb(page_address(buff->page), + skb = build_skb(aq_buf_vaddr(&buff->rxdata), AQ_CFG_RX_FRAME_MAX); if (unlikely(!skb)) { err = -ENOMEM; goto err_exit; } - skb_put(skb, buff->len); + page_ref_inc(buff->rxdata.page); } else { - skb = netdev_alloc_skb(ndev, ETH_HLEN); + skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE); if (unlikely(!skb)) { err = -ENOMEM; goto err_exit; } - skb_put(skb, ETH_HLEN); - memcpy(skb->data, page_address(buff->page), ETH_HLEN); - skb_add_rx_frag(skb, 0, buff->page, ETH_HLEN, - buff->len - ETH_HLEN, - SKB_TRUESIZE(buff->len - ETH_HLEN)); + hdr_len = buff->len; + if (hdr_len > AQ_CFG_RX_HDR_SIZE) + hdr_len = eth_get_headlen(aq_buf_vaddr(&buff->rxdata), + AQ_CFG_RX_HDR_SIZE); + + memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata), + ALIGN(hdr_len, sizeof(long))); + + if (buff->len - hdr_len > 0) { + skb_add_rx_frag(skb, 0, buff->rxdata.page, + buff->rxdata.pg_off + hdr_len, + buff->len - hdr_len, + AQ_CFG_RX_FRAME_MAX); + page_ref_inc(buff->rxdata.page); + } if (!buff->is_eop) { - for (i = 1U, next_ = buff->next, - buff_ = &self->buff_ring[next_]; - true; next_ = buff_->next, - buff_ = &self->buff_ring[next_], ++i) { - skb_add_rx_frag(skb, i, - buff_->page, 0, + buff_ = buff; + i = 1U; + do { + next_ = buff_->next, + buff_ = &self->buff_ring[next_]; + + dma_sync_single_range_for_cpu( + aq_nic_get_dev(self->aq_nic), + buff_->rxdata.daddr, + buff_->rxdata.pg_off, buff_->len, - SKB_TRUESIZE(buff->len - - ETH_HLEN)); + DMA_FROM_DEVICE); + skb_add_rx_frag(skb, i++, + buff_->rxdata.page, + buff_->rxdata.pg_off, + buff_->len, + AQ_CFG_RX_FRAME_MAX); + page_ref_inc(buff_->rxdata.page); buff_->is_cleaned = 1; - - if (buff_->is_eop) - break; - } + } while (!buff_->is_eop); } } @@ -310,12 +414,15 @@ err_exit: int aq_ring_rx_fill(struct aq_ring_s *self) { - unsigned int pages_order = fls(AQ_CFG_RX_FRAME_MAX / PAGE_SIZE + - (AQ_CFG_RX_FRAME_MAX % PAGE_SIZE ? 1 : 0)) - 1; + unsigned int page_order = self->page_order; struct aq_ring_buff_s *buff = NULL; int err = 0; int i = 0; + if (aq_ring_avail_dx(self) < min_t(unsigned int, AQ_CFG_RX_REFILL_THRES, + self->size / 2)) + return err; + for (i = aq_ring_avail_dx(self); i--; self->sw_tail = aq_ring_next_dx(self, self->sw_tail)) { buff = &self->buff_ring[self->sw_tail]; @@ -323,30 +430,15 @@ int aq_ring_rx_fill(struct aq_ring_s *self) buff->flags = 0U; buff->len = AQ_CFG_RX_FRAME_MAX; - buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order); - if (!buff->page) { - err = -ENOMEM; + err = aq_get_rxpages(self, buff, page_order); + if (err) goto err_exit; - } - - buff->pa = dma_map_page(aq_nic_get_dev(self->aq_nic), - buff->page, 0, - AQ_CFG_RX_FRAME_MAX, DMA_FROM_DEVICE); - - if (dma_mapping_error(aq_nic_get_dev(self->aq_nic), buff->pa)) { - err = -ENOMEM; - goto err_exit; - } + buff->pa = aq_buf_daddr(&buff->rxdata); buff = NULL; } err_exit: - if (err < 0) { - if (buff && buff->page) - __free_pages(buff->page, 0); - } - return err; } @@ -359,10 +451,7 @@ void aq_ring_rx_deinit(struct aq_ring_s *self) self->sw_head = aq_ring_next_dx(self, self->sw_head)) { struct aq_ring_buff_s *buff = &self->buff_ring[self->sw_head]; - dma_unmap_page(aq_nic_get_dev(self->aq_nic), buff->pa, - AQ_CFG_RX_FRAME_MAX, DMA_FROM_DEVICE); - - __free_pages(buff->page, 0); + aq_free_rxpage(&buff->rxdata, aq_nic_get_dev(self->aq_nic)); } err_exit:; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index ac1329f4051d..cfffc301e746 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -17,6 +17,13 @@ struct page; struct aq_nic_cfg_s; +struct aq_rxpage { + struct page *page; + dma_addr_t daddr; + unsigned int order; + unsigned int pg_off; +}; + /* TxC SOP DX EOP * +----------+----------+----------+----------- * 8bytes|len l3,l4 | pa | pa | pa @@ -31,28 +38,21 @@ struct aq_nic_cfg_s; */ struct __packed aq_ring_buff_s { union { + /* RX/TX */ + dma_addr_t pa; /* RX */ struct { u32 rss_hash; u16 next; u8 is_hash_l4; u8 rsvd1; - struct page *page; + struct aq_rxpage rxdata; }; /* EOP */ struct { dma_addr_t pa_eop; struct sk_buff *skb; }; - /* DX */ - struct { - dma_addr_t pa; - }; - /* SOP */ - struct { - dma_addr_t pa_sop; - u32 len_pkt_sop; - }; /* TxC */ struct { u32 mss; @@ -91,6 +91,9 @@ struct aq_ring_stats_rx_s { u64 bytes; u64 lro_packets; u64 jumbo_packets; + u64 pg_losts; + u64 pg_flips; + u64 pg_reuses; }; struct aq_ring_stats_tx_s { @@ -116,6 +119,7 @@ struct aq_ring_s { unsigned int size; /* descriptors number */ unsigned int dx_size; /* TX or RX descriptor size, */ /* stored here for fater math */ + unsigned int page_order; union aq_ring_stats_s stats; dma_addr_t dx_ring_pa; }; @@ -126,6 +130,16 @@ struct aq_ring_param_s { cpumask_t affinity_mask; }; +static inline void *aq_buf_vaddr(struct aq_rxpage *rxpage) +{ + return page_to_virt(rxpage->page) + rxpage->pg_off; +} + +static inline dma_addr_t aq_buf_daddr(struct aq_rxpage *rxpage) +{ + return rxpage->daddr + rxpage->pg_off; +} + static inline unsigned int aq_ring_next_dx(struct aq_ring_s *self, unsigned int dx) { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index d335c334fa56..a2e4ca1782ae 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -353,6 +353,9 @@ void aq_vec_add_stats(struct aq_vec_s *self, stats_rx->errors += rx->errors; stats_rx->jumbo_packets += rx->jumbo_packets; stats_rx->lro_packets += rx->lro_packets; + stats_rx->pg_losts += rx->pg_losts; + stats_rx->pg_flips += rx->pg_flips; + stats_rx->pg_reuses += rx->pg_reuses; stats_tx->packets += tx->packets; stats_tx->bytes += tx->bytes; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index f6f8338153a2..65ffaa7ad69e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -619,8 +619,6 @@ err_exit: static int hw_atl_a0_hw_ring_rx_receive(struct aq_hw_s *self, struct aq_ring_s *ring) { - struct device *ndev = aq_nic_get_dev(ring->aq_nic); - for (; ring->hw_head != ring->sw_tail; ring->hw_head = aq_ring_next_dx(ring, ring->hw_head)) { struct aq_ring_buff_s *buff = NULL; @@ -687,8 +685,6 @@ static int hw_atl_a0_hw_ring_rx_receive(struct aq_hw_s *self, is_err &= ~0x18U; is_err &= ~0x04U; - dma_unmap_page(ndev, buff->pa, buff->len, DMA_FROM_DEVICE); - if (is_err || rxd_wb->type & 0x1000U) { /* status error or DMA error */ buff->is_error = 1U; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index b31dba1b1a55..7e95804e2180 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -259,7 +259,13 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self, hw_atl_rpo_lro_time_base_divider_set(self, 0x61AU); hw_atl_rpo_lro_inactive_interval_set(self, 0); - hw_atl_rpo_lro_max_coalescing_interval_set(self, 2); + /* the LRO timebase divider is 5 uS (0x61a), + * which is multiplied by 50(0x32) + * to get a maximum coalescing interval of 250 uS, + * which is the default value + */ + hw_atl_rpo_lro_max_coalescing_interval_set(self, 50); + hw_atl_rpo_lro_qsessions_lim_set(self, 1U); @@ -273,6 +279,10 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self, hw_atl_rpo_lro_en_set(self, aq_nic_cfg->is_lro ? 0xFFFFFFFFU : 0U); + hw_atl_itr_rsc_en_set(self, + aq_nic_cfg->is_lro ? 0xFFFFFFFFU : 0U); + + hw_atl_itr_rsc_delay_set(self, 1U); } return aq_hw_err_from_flags(self); } @@ -654,8 +664,6 @@ err_exit: static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, struct aq_ring_s *ring) { - struct device *ndev = aq_nic_get_dev(ring->aq_nic); - for (; ring->hw_head != ring->sw_tail; ring->hw_head = aq_ring_next_dx(ring, ring->hw_head)) { struct aq_ring_buff_s *buff = NULL; @@ -697,8 +705,6 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, buff->is_cso_err = 0U; } - dma_unmap_page(ndev, buff->pa, buff->len, DMA_FROM_DEVICE); - if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) { /* MAC error or DMA error */ buff->is_error = 1U; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h index b318eefd36ae..ea98a08d7820 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h @@ -78,7 +78,7 @@ #define HW_ATL_B0_TC_MAX 1U #define HW_ATL_B0_RSS_MAX 8U -#define HW_ATL_B0_LRO_RXD_MAX 2U +#define HW_ATL_B0_LRO_RXD_MAX 16U #define HW_ATL_B0_RS_SLIP_ENABLED 0U /* (256k -1(max pay_len) - 54(header)) */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c index 0722b8e01964..9442deff98a8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c @@ -315,6 +315,21 @@ void hw_atl_itr_res_irq_set(struct aq_hw_s *aq_hw, u32 res_irq) HW_ATL_ITR_RES_SHIFT, res_irq); } +/* set RSC interrupt */ +void hw_atl_itr_rsc_en_set(struct aq_hw_s *aq_hw, u32 enable) +{ + aq_hw_write_reg(aq_hw, HW_ATL_ITR_RSC_EN_ADR, enable); +} + +/* set RSC delay */ +void hw_atl_itr_rsc_delay_set(struct aq_hw_s *aq_hw, u32 delay) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL_ITR_RSC_DELAY_ADR, + HW_ATL_ITR_RSC_DELAY_MSK, + HW_ATL_ITR_RSC_DELAY_SHIFT, + delay); +} + /* rdm */ void hw_atl_rdm_cpu_id_set(struct aq_hw_s *aq_hw, u32 cpuid, u32 dca) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h index d46351890b16..4cfa4bd80ad3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h @@ -152,6 +152,12 @@ u32 hw_atl_itr_res_irq_get(struct aq_hw_s *aq_hw); /* set reset interrupt */ void hw_atl_itr_res_irq_set(struct aq_hw_s *aq_hw, u32 res_irq); +/* set RSC interrupt */ +void hw_atl_itr_rsc_en_set(struct aq_hw_s *aq_hw, u32 enable); + +/* set RSC delay */ +void hw_atl_itr_rsc_delay_set(struct aq_hw_s *aq_hw, u32 delay); + /* rdm */ /* set cpu id */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index fb45bc2d99cf..430bbd45b2f0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -95,6 +95,19 @@ #define HW_ATL_ITR_RES_MSK 0x80000000 /* lower bit position of bitfield itr_reset */ #define HW_ATL_ITR_RES_SHIFT 31 + +/* register address for bitfield rsc_en */ +#define HW_ATL_ITR_RSC_EN_ADR 0x00002200 + +/* register address for bitfield rsc_delay */ +#define HW_ATL_ITR_RSC_DELAY_ADR 0x00002204 +/* bitmask for bitfield rsc_delay */ +#define HW_ATL_ITR_RSC_DELAY_MSK 0x0000000f +/* width of bitfield rsc_delay */ +#define HW_ATL_ITR_RSC_DELAY_WIDTH 4 +/* lower bit position of bitfield rsc_delay */ +#define HW_ATL_ITR_RSC_DELAY_SHIFT 0 + /* register address for bitfield dca{d}_cpuid[7:0] */ #define HW_ATL_RDM_DCADCPUID_ADR(dca) (0x00006100 + (dca) * 0x4) /* bitmask for bitfield dca{d}_cpuid[7:0] */ diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 716bfbba59cf..461b2c0b2ed6 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -196,6 +196,7 @@ config BNXT depends on PCI select FW_LOADER select LIBCRC32C + select NET_DEVLINK ---help--- This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit Ethernet cards. To compile this driver as a module, choose M here: diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index bc3ac369cbe3..dfe46dacf5cf 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2274,8 +2274,7 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = { }; static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct bcm_sysport_priv *priv = netdev_priv(dev); u16 queue = skb_get_queue_mapping(skb); @@ -2283,7 +2282,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb, unsigned int q, port; if (!netdev_uses_dsa(dev)) - return fallback(dev, skb, NULL); + return netdev_pick_tx(dev, skb, NULL); /* DSA tagging layer will have configured the correct queue */ q = BRCM_TAG_GET_QUEUE(queue); @@ -2291,7 +2290,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb, tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues]; if (unlikely(!tx_ring)) - return fallback(dev, skb, NULL); + return netdev_pick_tx(dev, skb, NULL); return tx_ring->index; } @@ -2599,11 +2598,11 @@ static int bcm_sysport_probe(struct platform_device *pdev) priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK; dev_info(&pdev->dev, - "Broadcom SYSTEMPORT%s" REV_FMT - " at 0x%p (irqs: %d, %d, TXQs: %d, RXQs: %d)\n", + "Broadcom SYSTEMPORT%s " REV_FMT + " (irqs: %d, %d, TXQs: %d, RXQs: %d)\n", priv->is_lite ? " Lite" : "", (priv->rev >> 8) & 0xff, priv->rev & 0xff, - priv->base, priv->irq0, priv->irq1, txq, rxq); + priv->irq0, priv->irq1, txq, rxq); return 0; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ecb1bd7eb508..6012fe61735e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1909,8 +1909,7 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw) } u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct bnx2x *bp = netdev_priv(dev); @@ -1932,7 +1931,7 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, } /* select a non-FCoE queue */ - return fallback(dev, skb, NULL) % + return netdev_pick_tx(dev, skb, NULL) % (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 2462e7aa0c5d..7f8df08a7a4c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -498,8 +498,7 @@ int bnx2x_set_vf_spoofchk(struct net_device *dev, int idx, bool val); /* select_queue callback */ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); static inline void bnx2x_update_rx_prod(struct bnx2x *bp, struct bnx2x_fastpath *fp, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index d9057c8bbeef..78326a6c0aba 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -3024,7 +3024,7 @@ struct afex_stats { #define BCM_5710_FW_MAJOR_VERSION 7 #define BCM_5710_FW_MINOR_VERSION 13 -#define BCM_5710_FW_REVISION_VERSION 1 +#define BCM_5710_FW_REVISION_VERSION 11 #define BCM_5710_FW_ENGINEERING_VERSION 0 #define BCM_5710_FW_COMPILE_FLAGS 1 @@ -3639,8 +3639,10 @@ struct client_init_rx_data { #define CLIENT_INIT_RX_DATA_TPA_EN_IPV6_SHIFT 1 #define CLIENT_INIT_RX_DATA_TPA_MODE (0x1<<2) #define CLIENT_INIT_RX_DATA_TPA_MODE_SHIFT 2 -#define CLIENT_INIT_RX_DATA_RESERVED5 (0x1F<<3) -#define CLIENT_INIT_RX_DATA_RESERVED5_SHIFT 3 +#define CLIENT_INIT_RX_DATA_TPA_OVER_VLAN_DISABLE (0x1<<3) +#define CLIENT_INIT_RX_DATA_TPA_OVER_VLAN_DISABLE_SHIFT 3 +#define CLIENT_INIT_RX_DATA_RESERVED5 (0xF<<4) +#define CLIENT_INIT_RX_DATA_RESERVED5_SHIFT 4 u8 vmqueue_mode_en_flg; u8 extra_data_over_sgl_en_flg; u8 cache_line_alignment_log_size; @@ -3831,7 +3833,7 @@ struct eth_classify_cmd_header { */ struct eth_classify_header { u8 rule_cnt; - u8 reserved0; + u8 warning_on_error; __le16 reserved1; __le32 echo; }; @@ -4752,6 +4754,8 @@ struct tpa_update_ramrod_data { __le32 sge_page_base_hi; __le16 sge_pause_thr_low; __le16 sge_pause_thr_high; + u8 tpa_over_vlan_disable; + u8 reserved[7]; }; @@ -4946,7 +4950,7 @@ struct fairness_vars_per_port { u32 upper_bound; u32 fair_threshold; u32 fairness_timeout; - u32 reserved0; + u32 size_thr; }; /* @@ -5415,7 +5419,9 @@ struct function_start_data { u8 sd_vlan_force_pri_val; u8 c2s_pri_tt_valid; u8 c2s_pri_default; - u8 reserved2[6]; + u8 tx_vlan_filtering_enable; + u8 tx_vlan_filtering_use_pvid; + u8 reserved2[4]; struct c2s_pri_trans_table_entry c2s_pri_trans_table; }; @@ -5448,7 +5454,8 @@ struct function_update_data { u8 reserved1; __le16 sd_vlan_tag; __le16 sd_vlan_eth_type; - __le16 reserved0; + u8 tx_vlan_filtering_pvid_change_flg; + u8 reserved0; __le32 reserved2; }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index e1feb97bcd81..ab6fd05c462b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -9,6 +9,7 @@ #include <linux/pci.h> #include <linux/netdevice.h> +#include <net/devlink.h> #include "bnxt_hsi.h" #include "bnxt.h" #include "bnxt_vfr.h" @@ -228,6 +229,8 @@ int bnxt_dl_register(struct bnxt *bp) goto err_dl_unreg; } + devlink_port_attrs_set(&bp->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + bp->pf.port_id, false, 0); rc = devlink_port_register(dl, &bp->dl_port, bp->pf.port_id); if (rc) { netdev_err(bp->dev, "devlink_port_register failed"); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1522aee81884..a44171fddf47 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4360,8 +4360,7 @@ static int __maybe_unused macb_resume(struct device *dev) static int __maybe_unused macb_runtime_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *netdev = platform_get_drvdata(pdev); + struct net_device *netdev = dev_get_drvdata(dev); struct macb *bp = netdev_priv(netdev); if (!(device_may_wakeup(&bp->dev->dev))) { @@ -4377,8 +4376,7 @@ static int __maybe_unused macb_runtime_suspend(struct device *dev) static int __maybe_unused macb_runtime_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct net_device *netdev = platform_get_drvdata(pdev); + struct net_device *netdev = dev_get_drvdata(dev); struct macb *bp = netdev_priv(netdev); if (!(device_may_wakeup(&bp->dev->dev))) { diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 6650e2a5f171..7612ab6b286d 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -68,6 +68,7 @@ config LIQUIDIO imply PTP_1588_CLOCK select FW_LOADER select LIBCRC32C + select NET_DEVLINK ---help--- This driver supports Cavium LiquidIO Intelligent Server Adapters based on CN66XX, CN68XX and CN23XX chips. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 89179e316687..3339f1f4bcdd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -979,8 +979,7 @@ freeout: } static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { int txq; @@ -1022,7 +1021,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, return txq; } - return fallback(dev, skb, NULL) % dev->real_num_tx_queues; + return netdev_pick_tx(dev, skb, NULL) % dev->real_num_tx_queues; } static int closest_timer(const struct sge *s, int time) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index dc339dc1adb2..2055c97dc22b 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -435,7 +435,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, percpu_stats->rx_packets++; percpu_stats->rx_bytes += dpaa2_fd_get_len(fd); - napi_gro_receive(&ch->napi, skb); + list_add_tail(&skb->list, ch->rx_list); return; @@ -1113,12 +1113,16 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) struct dpaa2_eth_fq *fq, *txc_fq = NULL; struct netdev_queue *nq; int store_cleaned, work_done; + struct list_head rx_list; int err; ch = container_of(napi, struct dpaa2_eth_channel, napi); ch->xdp.res = 0; priv = ch->priv; + INIT_LIST_HEAD(&rx_list); + ch->rx_list = &rx_list; + do { err = pull_channel(ch); if (unlikely(err)) @@ -1162,6 +1166,8 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) work_done = max(rx_cleaned, 1); out: + netif_receive_skb_list(ch->rx_list); + if (txc_fq && txc_fq->dq_frames) { nq = netdev_get_tx_queue(priv->net_dev, txc_fq->flowid); netdev_tx_completed_queue(nq, txc_fq->dq_frames, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 7879622aa3e6..a11ebfdc4a23 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -334,6 +334,7 @@ struct dpaa2_eth_channel { struct dpaa2_eth_ch_stats stats; struct dpaa2_eth_ch_xdp xdp; struct xdp_rxq_info xdp_rxq; + struct list_head *rx_list; }; struct dpaa2_eth_dist_fields { diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 60e7d7ae3787..e37a0ca0db89 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1964,8 +1964,7 @@ static void hns_nic_get_stats64(struct net_device *ndev, static u16 hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct ethhdr *eth_hdr = (struct ethhdr *)skb->data; struct hns_nic_priv *priv = netdev_priv(ndev); @@ -1975,7 +1974,7 @@ hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb, is_multicast_ether_addr(eth_hdr->h_dest)) return 0; else - return fallback(ndev, skb, NULL); + return netdev_pick_tx(ndev, skb, NULL); } static const struct net_device_ops hns_nic_netdev_ops = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 162cb9afa0e7..21085c4bf66b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -827,12 +827,12 @@ static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto, */ static bool hns3_tunnel_csum_bug(struct sk_buff *skb) { -#define IANA_VXLAN_PORT 4789 union l4_hdr_info l4; l4.hdr = skb_transport_header(skb); - if (!(!skb->encapsulation && l4.udp->dest == htons(IANA_VXLAN_PORT))) + if (!(!skb->encapsulation && + l4.udp->dest == htons(IANA_VXLAN_UDP_PORT))) return false; skb_checksum_help(skb); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 3a093a92eac5..722bb3124bb6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -411,7 +411,7 @@ static void hclge_destroy_queue(struct hclge_cmq_ring *ring) spin_unlock(&ring->lock); } -void hclge_destroy_cmd_queue(struct hclge_hw *hw) +static void hclge_destroy_cmd_queue(struct hclge_hw *hw) { hclge_destroy_queue(&hw->cmq.csq); hclge_destroy_queue(&hw->cmq.crq); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 8bc28e6f465f..65bdc689a4ce 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -404,7 +404,7 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) } } -void hclgevf_update_link_mode(struct hclgevf_dev *hdev) +static void hclgevf_update_link_mode(struct hclgevf_dev *hdev) { #define HCLGEVF_ADVERTISING 0 #define HCLGEVF_SUPPORTED 1 diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index dd71d5db7274..d86b0e5895a6 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -93,7 +93,7 @@ struct ibmveth_stat { #define IBMVETH_STAT_OFF(stat) offsetof(struct ibmveth_adapter, stat) #define IBMVETH_GET_STAT(a, off) *((u64 *)(((unsigned long)(a)) + off)) -struct ibmveth_stat ibmveth_stats[] = { +static struct ibmveth_stat ibmveth_stats[] = { { "replenish_task_cycles", IBMVETH_STAT_OFF(replenish_task_cycles) }, { "replenish_no_mem", IBMVETH_STAT_OFF(replenish_no_mem) }, { "replenish_add_buff_failure", diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5ecbb1adcf3b..25b8e04ef11a 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -120,6 +120,7 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *); static void release_crq_queue(struct ibmvnic_adapter *); static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p); static int init_crq_queue(struct ibmvnic_adapter *adapter); +static int send_query_phys_parms(struct ibmvnic_adapter *adapter); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -2278,23 +2279,20 @@ static const struct net_device_ops ibmvnic_netdev_ops = { static int ibmvnic_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - u32 supported, advertising; + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + int rc; - supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | - SUPPORTED_FIBRE); - advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg | - ADVERTISED_FIBRE); - cmd->base.speed = SPEED_1000; - cmd->base.duplex = DUPLEX_FULL; + rc = send_query_phys_parms(adapter); + if (rc) { + adapter->speed = SPEED_UNKNOWN; + adapter->duplex = DUPLEX_UNKNOWN; + } + cmd->base.speed = adapter->speed; + cmd->base.duplex = adapter->duplex; cmd->base.port = PORT_FIBRE; cmd->base.phy_address = 0; cmd->base.autoneg = AUTONEG_ENABLE; - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); - return 0; } @@ -4278,6 +4276,73 @@ out: } } +static int send_query_phys_parms(struct ibmvnic_adapter *adapter) +{ + union ibmvnic_crq crq; + int rc; + + memset(&crq, 0, sizeof(crq)); + crq.query_phys_parms.first = IBMVNIC_CRQ_CMD; + crq.query_phys_parms.cmd = QUERY_PHYS_PARMS; + init_completion(&adapter->fw_done); + rc = ibmvnic_send_crq(adapter, &crq); + if (rc) + return rc; + wait_for_completion(&adapter->fw_done); + return adapter->fw_done_rc ? -EIO : 0; +} + +static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + rc = crq->query_phys_parms_rsp.rc.code; + if (rc) { + netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc); + return rc; + } + switch (cpu_to_be32(crq->query_phys_parms_rsp.speed)) { + case IBMVNIC_10MBPS: + adapter->speed = SPEED_10; + break; + case IBMVNIC_100MBPS: + adapter->speed = SPEED_100; + break; + case IBMVNIC_1GBPS: + adapter->speed = SPEED_1000; + break; + case IBMVNIC_10GBP: + adapter->speed = SPEED_10000; + break; + case IBMVNIC_25GBPS: + adapter->speed = SPEED_25000; + break; + case IBMVNIC_40GBPS: + adapter->speed = SPEED_40000; + break; + case IBMVNIC_50GBPS: + adapter->speed = SPEED_50000; + break; + case IBMVNIC_100GBPS: + adapter->speed = SPEED_100000; + break; + default: + netdev_warn(netdev, "Unknown speed 0x%08x\n", + cpu_to_be32(crq->query_phys_parms_rsp.speed)); + adapter->speed = SPEED_UNKNOWN; + } + if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX) + adapter->duplex = DUPLEX_FULL; + else if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_HALF_DUPLEX) + adapter->duplex = DUPLEX_HALF; + else + adapter->duplex = DUPLEX_UNKNOWN; + + return rc; +} + static void ibmvnic_handle_crq(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { @@ -4426,6 +4491,10 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, case GET_VPD_RSP: handle_vpd_rsp(crq, adapter); break; + case QUERY_PHYS_PARMS_RSP: + adapter->fw_done_rc = handle_query_phys_parms_rsp(crq, adapter); + complete(&adapter->fw_done); + break; default: netdev_err(netdev, "Got an invalid cmd type 0x%02x\n", gen_crq->cmd); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index f2018dbebfa5..d5260a206708 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -377,11 +377,16 @@ struct ibmvnic_phys_parms { u8 flags2; #define IBMVNIC_LOGICAL_LNK_ACTIVE 0x80 __be32 speed; -#define IBMVNIC_AUTONEG 0x80 -#define IBMVNIC_10MBPS 0x40 -#define IBMVNIC_100MBPS 0x20 -#define IBMVNIC_1GBPS 0x10 -#define IBMVNIC_10GBPS 0x08 +#define IBMVNIC_AUTONEG 0x80000000 +#define IBMVNIC_10MBPS 0x40000000 +#define IBMVNIC_100MBPS 0x20000000 +#define IBMVNIC_1GBPS 0x10000000 +#define IBMVNIC_10GBP 0x08000000 +#define IBMVNIC_40GBPS 0x04000000 +#define IBMVNIC_100GBPS 0x02000000 +#define IBMVNIC_25GBPS 0x01000000 +#define IBMVNIC_50GBPS 0x00800000 +#define IBMVNIC_200GBPS 0x00400000 __be32 mtu; struct ibmvnic_rc rc; } __packed __aligned(8); @@ -999,6 +1004,9 @@ struct ibmvnic_adapter { int phys_link_state; int logical_link_state; + u32 speed; + u8 duplex; + /* login data */ struct ibmvnic_login_buffer *login_buf; dma_addr_t login_buf_token; diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 0fd268070fb4..a65d5a9ba7db 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2797,7 +2797,7 @@ static int e100_set_features(struct net_device *netdev, netdev->features = features; e100_exec_cb(nic, NULL, e100_configure); - return 0; + return 1; } static const struct net_device_ops e100_netdev_ops = { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 8fe9af0e2ab7..a7c76732849f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -820,7 +820,7 @@ static int e1000_set_features(struct net_device *netdev, else e1000_reset(adapter); - return 0; + return 1; } static const struct net_device_ops e1000_netdev_ops = { diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 7acc61e4f645..745c1242a2d9 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7003,7 +7003,7 @@ static int e1000_set_features(struct net_device *netdev, else e1000e_reset(adapter); - return 0; + return 1; } static const struct net_device_ops e1000e_netdev_ops = { @@ -7350,7 +7350,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); - if (pci_dev_run_wake(pdev)) + if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp) pm_runtime_put_noidle(&pdev->dev); return 0; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 89440775aea1..b819689da7e2 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -42,10 +42,21 @@ extern const char ice_drv_ver[]; #define ICE_BAR0 0 -#define ICE_DFLT_NUM_DESC 128 #define ICE_REQ_DESC_MULTIPLE 32 #define ICE_MIN_NUM_DESC ICE_REQ_DESC_MULTIPLE #define ICE_MAX_NUM_DESC 8160 +/* set default number of Rx/Tx descriptors to the minimum between + * ICE_MAX_NUM_DESC and the number of descriptors to fill up an entire page + */ +#define ICE_DFLT_NUM_RX_DESC min_t(u16, ICE_MAX_NUM_DESC, \ + ALIGN(PAGE_SIZE / \ + sizeof(union ice_32byte_rx_desc), \ + ICE_REQ_DESC_MULTIPLE)) +#define ICE_DFLT_NUM_TX_DESC min_t(u16, ICE_MAX_NUM_DESC, \ + ALIGN(PAGE_SIZE / \ + sizeof(struct ice_tx_desc), \ + ICE_REQ_DESC_MULTIPLE)) + #define ICE_DFLT_TRAFFIC_CLASS BIT(0) #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) #define ICE_ETHTOOL_FWVER_LEN 32 @@ -114,6 +125,23 @@ extern const char ice_drv_ver[]; #define ice_for_each_q_vector(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++) +#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \ + ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX) + +#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \ + ICE_PROMISC_MCAST_TX | \ + ICE_PROMISC_UCAST_RX | \ + ICE_PROMISC_MCAST_RX | \ + ICE_PROMISC_VLAN_TX | \ + ICE_PROMISC_VLAN_RX) + +#define ICE_MCAST_PROMISC_BITS (ICE_PROMISC_MCAST_TX | ICE_PROMISC_MCAST_RX) + +#define ICE_MCAST_VLAN_PROMISC_BITS (ICE_PROMISC_MCAST_TX | \ + ICE_PROMISC_MCAST_RX | \ + ICE_PROMISC_VLAN_TX | \ + ICE_PROMISC_VLAN_RX) + struct ice_tc_info { u16 qoffset; u16 qcount_tx; @@ -247,6 +275,7 @@ struct ice_vsi { u8 irqs_ready; u8 current_isup; /* Sync 'link up' logging */ u8 stat_offsets_loaded; + u8 vlan_ena; /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -257,26 +286,33 @@ struct ice_vsi { u16 num_txq; /* Used Tx queues */ u16 alloc_rxq; /* Allocated Rx queues */ u16 num_rxq; /* Used Rx queues */ - u16 num_desc; + u16 num_rx_desc; + u16 num_tx_desc; struct ice_tc_cfg tc_cfg; } ____cacheline_internodealigned_in_smp; /* struct that defines an interrupt vector */ struct ice_q_vector { struct ice_vsi *vsi; - cpumask_t affinity_mask; - struct napi_struct napi; - struct ice_ring_container rx; - struct ice_ring_container tx; - struct irq_affinity_notify affinity_notify; + u16 v_idx; /* index in the vsi->q_vector array. */ - u8 num_ring_tx; /* total number of Tx rings in vector */ u8 num_ring_rx; /* total number of Rx rings in vector */ - char name[ICE_INT_NAME_STR_LEN]; + u8 num_ring_tx; /* total number of Tx rings in vector */ + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ /* in usecs, need to use ice_intrl_to_usecs_reg() before writing this * value to the device */ u8 intrl; + + struct napi_struct napi; + + struct ice_ring_container rx; + struct ice_ring_container tx; + + cpumask_t affinity_mask; + struct irq_affinity_notify affinity_notify; + + char name[ICE_INT_NAME_STR_LEN]; } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { @@ -355,8 +391,9 @@ struct ice_netdev_priv { * @vsi: pointer to vsi struct, can be NULL * @q_vector: pointer to q_vector, can be NULL */ -static inline void ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, - struct ice_q_vector *q_vector) +static inline void +ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, + struct ice_q_vector *q_vector) { u32 vector = (vsi && q_vector) ? vsi->hw_base_vector + q_vector->v_idx : ((struct ice_pf *)hw->back)->hw_oicr_idx; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 242c78469181..8ff438968199 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -953,8 +953,9 @@ struct ice_aqc_set_phy_cfg_data { __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */ u8 caps; -#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY BIT(0) -#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY BIT(1) +#define ICE_AQ_PHY_ENA_VALID_MASK ICE_M(0xef, 0) +#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY BIT(0) +#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY BIT(1) #define ICE_AQ_PHY_ENA_LOW_POWER BIT(2) #define ICE_AQ_PHY_ENA_LINK BIT(3) #define ICE_AQ_PHY_ENA_AUTO_LINK_UPDT BIT(5) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 63f003441300..5e7a31421c0d 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -262,7 +262,7 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) * * Get Link Status (0x607). Returns the link status of the adapter. */ -static enum ice_status +enum ice_status ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, struct ice_link_status *link, struct ice_sq_cd *cd) { @@ -331,7 +331,7 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, /* flag cleared so calling functions don't call AQ again */ pi->phy.get_link_info = false; - return status; + return 0; } /** @@ -358,22 +358,22 @@ static void ice_init_flex_flags(struct ice_hw *hw, enum ice_rxdid prof_id) */ case ICE_RXDID_FLEX_NIC: case ICE_RXDID_FLEX_NIC_2: - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_PKT_FRG, - ICE_RXFLG_UDP_GRE, ICE_RXFLG_PKT_DSI, - ICE_RXFLG_FIN, idx++); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_FRG, + ICE_FLG_UDP_GRE, ICE_FLG_PKT_DSI, + ICE_FLG_FIN, idx++); /* flex flag 1 is not used for flexi-flag programming, skipping * these four FLG64 bits. */ - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_SYN, ICE_RXFLG_RST, - ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_PKT_DSI, - ICE_RXFLG_PKT_DSI, ICE_RXFLG_EVLAN_x8100, - ICE_RXFLG_EVLAN_x9100, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_VLAN_x8100, - ICE_RXFLG_TNL_VLAN, ICE_RXFLG_TNL_MAC, - ICE_RXFLG_TNL0, idx++); - ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_RXFLG_TNL1, ICE_RXFLG_TNL2, - ICE_RXFLG_PKT_DSI, ICE_RXFLG_PKT_DSI, idx); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_SYN, ICE_FLG_RST, + ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx++); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_DSI, + ICE_FLG_PKT_DSI, ICE_FLG_EVLAN_x8100, + ICE_FLG_EVLAN_x9100, idx++); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_VLAN_x8100, + ICE_FLG_TNL_VLAN, ICE_FLG_TNL_MAC, + ICE_FLG_TNL0, idx++); + ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_TNL1, ICE_FLG_TNL2, + ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx); break; default: @@ -1100,8 +1100,9 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { * * Dumps debug log about control command with descriptor contents. */ -void ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, - void *buf, u16 buf_len) +void +ice_debug_cq(struct ice_hw *hw, u32 __maybe_unused mask, void *desc, void *buf, + u16 buf_len) { struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; u16 len; @@ -1415,13 +1416,15 @@ void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res) } /** - * ice_get_guar_num_vsi - determine number of guar VSI for a PF + * ice_get_num_per_func - determine number of resources per PF * @hw: pointer to the hw structure + * @max: value to be evenly split between each PF * * Determine the number of valid functions by going through the bitmap returned - * from parsing capabilities and use this to calculate the number of VSI per PF. + * from parsing capabilities and use this to calculate the number of resources + * per PF based on the max value passed in. */ -static u32 ice_get_guar_num_vsi(struct ice_hw *hw) +static u32 ice_get_num_per_func(struct ice_hw *hw, u32 max) { u8 funcs; @@ -1432,7 +1435,7 @@ static u32 ice_get_guar_num_vsi(struct ice_hw *hw) if (!funcs) return 0; - return ICE_MAX_VSI / funcs; + return max / funcs; } /** @@ -1512,7 +1515,8 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, "HW caps: Dev.VSI cnt = %d\n", dev_p->num_vsi_allocd_to_host); } else if (func_p) { - func_p->guar_num_vsi = ice_get_guar_num_vsi(hw); + func_p->guar_num_vsi = + ice_get_num_per_func(hw, ICE_MAX_VSI); ice_debug(hw, ICE_DBG_INIT, "HW caps: Func.VSI cnt = %d\n", number); @@ -1617,8 +1621,8 @@ ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count, * @hw: pointer to the hardware structure * @opc: capabilities type to discover - pass in the command opcode */ -static enum ice_status ice_discover_caps(struct ice_hw *hw, - enum ice_adminq_opc opc) +static enum ice_status +ice_discover_caps(struct ice_hw *hw, enum ice_adminq_opc opc) { enum ice_status status; u32 cap_count; @@ -1929,6 +1933,15 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport, if (!cfg) return ICE_ERR_PARAM; + /* Ensure that only valid bits of cfg->caps can be turned on. */ + if (cfg->caps & ~ICE_AQ_PHY_ENA_VALID_MASK) { + ice_debug(hw, ICE_DBG_PHY, + "Invalid bit is set in ice_aqc_set_phy_cfg_data->caps : 0x%x\n", + cfg->caps); + + cfg->caps &= ICE_AQ_PHY_ENA_VALID_MASK; + } + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg); desc.params.set_phy.lport_num = lport; desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); @@ -2027,8 +2040,10 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) /* clear the old pause settings */ cfg.caps = pcaps->caps & ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE | ICE_AQC_PHY_EN_RX_LINK_PAUSE); + /* set the new capabilities */ cfg.caps |= pause_mask; + /* If the capabilities have changed, then set the new config */ if (cfg.caps != pcaps->caps) { int retry_count, retry_max = 10; @@ -2136,6 +2151,32 @@ ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, } /** + * ice_aq_set_event_mask + * @hw: pointer to the HW struct + * @port_num: port number of the physical function + * @mask: event mask to be set + * @cd: pointer to command details structure or NULL + * + * Set event mask (0x0613) + */ +enum ice_status +ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, + struct ice_sq_cd *cd) +{ + struct ice_aqc_set_event_mask *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.set_event_mask; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask); + + cmd->lport_num = port_num; + + cmd->event_mask = cpu_to_le16(mask); + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** * ice_aq_set_port_id_led * @pi: pointer to the port information * @is_orig_mode: is this LED set to original mode (by the net-list) @@ -2534,8 +2575,8 @@ do_aq: * @dest_ctx: the context to be written to * @ce_info: a description of the struct to be filled */ -static void ice_write_byte(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) +static void +ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) { u8 src_byte, dest_byte, mask; u8 *from, *dest; @@ -2573,8 +2614,8 @@ static void ice_write_byte(u8 *src_ctx, u8 *dest_ctx, * @dest_ctx: the context to be written to * @ce_info: a description of the struct to be filled */ -static void ice_write_word(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) +static void +ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) { u16 src_word, mask; __le16 dest_word; @@ -2616,8 +2657,8 @@ static void ice_write_word(u8 *src_ctx, u8 *dest_ctx, * @dest_ctx: the context to be written to * @ce_info: a description of the struct to be filled */ -static void ice_write_dword(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) +static void +ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) { u32 src_dword, mask; __le32 dest_dword; @@ -2667,8 +2708,8 @@ static void ice_write_dword(u8 *src_ctx, u8 *dest_ctx, * @dest_ctx: the context to be written to * @ce_info: a description of the struct to be filled */ -static void ice_write_qword(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) +static void +ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info) { u64 src_qword, mask; __le64 dest_qword; @@ -2908,7 +2949,7 @@ ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap, mutex_lock(&pi->sched_lock); - for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + ice_for_each_traffic_class(i) { /* configuration is possible only if TC node is present */ if (!ice_sched_get_tc_node(pi, i)) continue; @@ -3012,8 +3053,9 @@ void ice_replay_post(struct ice_hw *hw) * @prev_stat: ptr to previous loaded stat value * @cur_stat: ptr to current stat value */ -void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, - bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat) +void +ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, + bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat) { u64 new_data; @@ -3043,8 +3085,9 @@ void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, * @prev_stat: ptr to previous loaded stat value * @cur_stat: ptr to current stat value */ -void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, - u64 *prev_stat, u64 *cur_stat) +void +ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, + u64 *prev_stat, u64 *cur_stat) { u32 new_data; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index d7c7c2ed8823..fbdfdee353bc 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -9,8 +9,8 @@ #include "ice_switch.h" #include <linux/avf/virtchnl.h> -void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, - u16 buf_len); +void +ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len); enum ice_status ice_init_hw(struct ice_hw *hw); void ice_deinit_hw(struct ice_hw *hw); enum ice_status ice_check_reset(struct ice_hw *hw); @@ -28,8 +28,8 @@ ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res, enum ice_aq_res_access_type access, u32 timeout); void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res); enum ice_status ice_init_nvm(struct ice_hw *hw); -enum ice_status ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, - u16 *data); +enum ice_status +ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data); enum ice_status ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_aq_desc *desc, void *buf, u16 buf_size, @@ -89,6 +89,12 @@ enum ice_status ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, struct ice_sq_cd *cd); enum ice_status +ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, + struct ice_link_status *link, struct ice_sq_cd *cd); +enum ice_status +ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, + struct ice_sq_cd *cd); +enum ice_status ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, struct ice_sq_cd *cd); @@ -106,8 +112,10 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_qgrps, enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle); void ice_replay_post(struct ice_hw *hw); void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf); -void ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, - bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); -void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, - u64 *prev_stat, u64 *cur_stat); +void +ice_stat_update40(struct ice_hw *hw, u32 hireg, u32 loreg, + bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); +void +ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, + u64 *prev_stat, u64 *cur_stat); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index eb8d149e317c..4a1920e8f168 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1156,8 +1156,9 @@ ice_get_settings_link_down(struct ethtool_link_ksettings *ks, * * Reports speed/duplex settings based on media_type */ -static int ice_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *ks) +static int +ice_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *ks) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_link_status *hw_link_info; @@ -1400,13 +1401,12 @@ ice_set_link_ksettings(struct net_device *netdev, return -EOPNOTSUPP; /* Check if this is lan vsi */ - for (idx = 0 ; idx < pf->num_alloc_vsi ; idx++) { + ice_for_each_vsi(pf, idx) if (pf->vsi[idx]->type == ICE_VSI_PF) { if (np->vsi != pf->vsi[idx]) return -EOPNOTSUPP; break; } - } if (p->phy.media_type != ICE_MEDIA_BASET && p->phy.media_type != ICE_MEDIA_FIBER && @@ -1566,8 +1566,9 @@ done: * * Returns Success if the command is supported. */ -static int ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, - u32 __always_unused *rule_locs) +static int +ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 __always_unused *rule_locs) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; @@ -2024,8 +2025,9 @@ out: * Returns -EINVAL if the table specifies an invalid queue id, otherwise * returns 0 after programming the table. */ -static int ice_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key, const u8 hfunc) +static int +ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, + const u8 hfunc) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; @@ -2180,8 +2182,9 @@ ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) return __ice_get_coalesce(netdev, ec, -1); } -static int ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, - struct ethtool_coalesce *ec) +static int +ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, + struct ethtool_coalesce *ec) { return __ice_get_coalesce(netdev, ec, q_num); } @@ -2325,8 +2328,9 @@ ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) return __ice_set_coalesce(netdev, ec, -1); } -static int ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, - struct ethtool_coalesce *ec) +static int +ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, + struct ethtool_coalesce *ec) { return __ice_set_coalesce(netdev, ec, q_num); } diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 6bf5cc064270..af6f32358363 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -106,6 +106,16 @@ #define VPGEN_VFRTRIG_VFSWR_M BIT(0) #define PFHMC_ERRORDATA 0x00520500 #define PFHMC_ERRORINFO 0x00520400 +#define GLINT_CTL 0x0016CC54 +#define GLINT_CTL_DIS_AUTOMASK_M BIT(0) +#define GLINT_CTL_ITR_GRAN_200_S 16 +#define GLINT_CTL_ITR_GRAN_200_M ICE_M(0xF, 16) +#define GLINT_CTL_ITR_GRAN_100_S 20 +#define GLINT_CTL_ITR_GRAN_100_M ICE_M(0xF, 20) +#define GLINT_CTL_ITR_GRAN_50_S 24 +#define GLINT_CTL_ITR_GRAN_50_M ICE_M(0xF, 24) +#define GLINT_CTL_ITR_GRAN_25_S 28 +#define GLINT_CTL_ITR_GRAN_25_M ICE_M(0xF, 28) #define GLINT_DYN_CTL(_INT) (0x00160000 + ((_INT) * 4)) #define GLINT_DYN_CTL_INTENA_M BIT(0) #define GLINT_DYN_CTL_CLEARPBA_M BIT(1) @@ -168,6 +178,8 @@ #define VPINT_ALLOC_PCI_LAST_S 12 #define VPINT_ALLOC_PCI_LAST_M ICE_M(0x7FF, 12) #define VPINT_ALLOC_PCI_VALID_M BIT(31) +#define VPINT_MBX_CTL(_VSI) (0x0016A000 + ((_VSI) * 4)) +#define VPINT_MBX_CTL_CAUSE_ENA_M BIT(30) #define GLLAN_RCTL_0 0x002941F8 #define QRX_CONTEXT(_i, _QRX) (0x00280000 + ((_i) * 8192 + (_QRX) * 4)) #define QRX_CTRL(_QRX) (0x00120000 + ((_QRX) * 4)) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index ef4c79b5aa32..a8c3fe87d7aa 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -208,23 +208,23 @@ enum ice_flex_rx_mdid { ICE_RX_MDID_HASH_HIGH, }; -/* Rx Flag64 packet flag bits */ -enum ice_rx_flg64_bits { - ICE_RXFLG_PKT_DSI = 0, - ICE_RXFLG_EVLAN_x8100 = 15, - ICE_RXFLG_EVLAN_x9100, - ICE_RXFLG_VLAN_x8100, - ICE_RXFLG_TNL_MAC = 22, - ICE_RXFLG_TNL_VLAN, - ICE_RXFLG_PKT_FRG, - ICE_RXFLG_FIN = 32, - ICE_RXFLG_SYN, - ICE_RXFLG_RST, - ICE_RXFLG_TNL0 = 38, - ICE_RXFLG_TNL1, - ICE_RXFLG_TNL2, - ICE_RXFLG_UDP_GRE, - ICE_RXFLG_RSVD = 63 +/* RX/TX Flag64 packet flag bits */ +enum ice_flg64_bits { + ICE_FLG_PKT_DSI = 0, + ICE_FLG_EVLAN_x8100 = 15, + ICE_FLG_EVLAN_x9100, + ICE_FLG_VLAN_x8100, + ICE_FLG_TNL_MAC = 22, + ICE_FLG_TNL_VLAN, + ICE_FLG_PKT_FRG, + ICE_FLG_FIN = 32, + ICE_FLG_SYN, + ICE_FLG_RST, + ICE_FLG_TNL0 = 38, + ICE_FLG_TNL1, + ICE_FLG_TNL2, + ICE_FLG_UDP_GRE, + ICE_FLG_RSVD = 63 }; /* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */ @@ -342,12 +342,12 @@ enum ice_tx_desc_cmd_bits { ICE_TX_DESC_CMD_EOP = 0x0001, ICE_TX_DESC_CMD_RS = 0x0002, ICE_TX_DESC_CMD_IL2TAG1 = 0x0008, - ICE_TX_DESC_CMD_IIPT_IPV6 = 0x0020, /* 2 BITS */ - ICE_TX_DESC_CMD_IIPT_IPV4 = 0x0040, /* 2 BITS */ - ICE_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060, /* 2 BITS */ - ICE_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100, /* 2 BITS */ - ICE_TX_DESC_CMD_L4T_EOFT_SCTP = 0x0200, /* 2 BITS */ - ICE_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300, /* 2 BITS */ + ICE_TX_DESC_CMD_IIPT_IPV6 = 0x0020, + ICE_TX_DESC_CMD_IIPT_IPV4 = 0x0040, + ICE_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060, + ICE_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100, + ICE_TX_DESC_CMD_L4T_EOFT_SCTP = 0x0200, + ICE_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300, }; #define ICE_TXD_QW1_OFFSET_S 16 diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index fa61203bee26..45e361f72057 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -175,17 +175,14 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) int i; for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) { - u32 rx_reg = rd32(&pf->hw, QRX_CTRL(pf_q)); - - if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M)) - break; + if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) & + QRX_CTRL_QENA_STAT_M)) + return 0; usleep_range(20, 40); } - if (i >= ICE_Q_WAIT_MAX_RETRY) - return -ETIMEDOUT; - return 0; + return -ETIMEDOUT; } /** @@ -279,25 +276,50 @@ err_txrings: } /** - * ice_vsi_set_num_qs - Set num queues, descriptors and vectors for a VSI + * ice_vsi_set_num_desc - Set number of descriptors for queues on this VSI * @vsi: the VSI being configured + */ +static void ice_vsi_set_num_desc(struct ice_vsi *vsi) +{ + switch (vsi->type) { + case ICE_VSI_PF: + vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC; + vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC; + break; + default: + dev_dbg(&vsi->back->pdev->dev, + "Not setting number of Tx/Rx descriptors for VSI type %d\n", + vsi->type); + break; + } +} + +/** + * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI + * @vsi: the VSI being configured + * @vf_id: Id of the VF being configured * * Return 0 on success and a negative value on error */ -static void ice_vsi_set_num_qs(struct ice_vsi *vsi) +static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) { struct ice_pf *pf = vsi->back; + struct ice_vf *vf = NULL; + + if (vsi->type == ICE_VSI_VF) + vsi->vf_id = vf_id; + switch (vsi->type) { case ICE_VSI_PF: vsi->alloc_txq = pf->num_lan_tx; vsi->alloc_rxq = pf->num_lan_rx; - vsi->num_desc = ALIGN(ICE_DFLT_NUM_DESC, ICE_REQ_DESC_MULTIPLE); vsi->num_q_vectors = max_t(int, pf->num_lan_rx, pf->num_lan_tx); break; case ICE_VSI_VF: - vsi->alloc_txq = pf->num_vf_qps; - vsi->alloc_rxq = pf->num_vf_qps; + vf = &pf->vf[vsi->vf_id]; + vsi->alloc_txq = vf->num_vf_qs; + vsi->alloc_rxq = vf->num_vf_qs; /* pf->num_vf_msix includes (VF miscellaneous vector + * data queue interrupts). Since vsi->num_q_vectors is number * of queues vectors, subtract 1 from the original vector @@ -310,6 +332,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) vsi->type); break; } + + ice_vsi_set_num_desc(vsi); } /** @@ -455,10 +479,12 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) * ice_vsi_alloc - Allocates the next available struct VSI in the PF * @pf: board private structure * @type: type of VSI + * @vf_id: Id of the VF being configured * * returns a pointer to a VSI on success, NULL on failure. */ -static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type) +static struct ice_vsi * +ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) { struct ice_vsi *vsi = NULL; @@ -484,7 +510,10 @@ static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type) vsi->idx = pf->next_vsi; vsi->work_lmt = ICE_DFLT_IRQ_WORK; - ice_vsi_set_num_qs(vsi); + if (type == ICE_VSI_VF) + ice_vsi_set_num_qs(vsi, vf_id); + else + ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); switch (vsi->type) { case ICE_VSI_PF: @@ -579,11 +608,10 @@ err_scatter: /** * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI - * @qs_cfg: gathered variables needed for PF->VSI queues assignment + * @qs_cfg: gathered variables needed for pf->vsi queues assignment * - * This is an internal function for assigning queues from the PF to VSI and - * initially tries to find contiguous space. If it is not successful to find - * contiguous space, then it tries with the scatter approach. + * This function first tries to find contiguous space. If it is not successful, + * it tries with the scatter approach. * * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap */ @@ -827,7 +855,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) /* find the (rounded up) power-of-2 of qcount */ pow = order_base_2(qcount_rx); - for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + ice_for_each_traffic_class(i) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { /* TC is not enabled */ vsi->tc_cfg.tc_info[i].qoffset = 0; @@ -852,7 +880,18 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) tx_count += tx_numq_tc; ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } - vsi->num_rxq = offset; + + /* if offset is non-zero, means it is calculated correctly based on + * enabled TCs for a given VSI otherwise qcount_rx will always + * be correct and non-zero because it is based off - VSI's + * allocated Rx queues which is at least 1 (hence qcount_tx will be + * at least 1) + */ + if (offset) + vsi->num_rxq = offset; + else + vsi->num_rxq = qcount_rx; + vsi->num_txq = tx_count; if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) { @@ -923,6 +962,7 @@ static int ice_vsi_init(struct ice_vsi *vsi) if (!ctxt) return -ENOMEM; + ctxt->info = vsi->info; switch (vsi->type) { case ICE_VSI_PF: ctxt->flags = ICE_AQ_VSI_TYPE_PF; @@ -948,6 +988,14 @@ static int ice_vsi_init(struct ice_vsi *vsi) ctxt->info.sw_id = vsi->port_info->sw_id; ice_vsi_setup_q_map(vsi, ctxt); + /* Enable MAC Antispoof with new VSI being initialized or updated */ + if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) { + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); + ctxt->info.sec_flags |= + ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF; + } + ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL); if (ret) { dev_err(&pf->pdev->dev, @@ -1215,7 +1263,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->ring_active = false; ring->vsi = vsi; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_tx_desc; vsi->tx_rings[i] = ring; } @@ -1234,7 +1282,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_rx_desc; vsi->rx_rings[i] = ring; } @@ -1640,7 +1688,7 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, int offset) num_q_grps = 1; /* set up and configure the Tx queues for each enabled TC */ - for (tc = 0; tc < ICE_MAX_TRAFFIC_CLASS; tc++) { + ice_for_each_traffic_class(tc) { if (!(vsi->tc_cfg.ena_tc & BIT(tc))) break; @@ -1717,6 +1765,37 @@ static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) } /** + * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set + * @hw: board specific structure + */ +static void ice_cfg_itr_gran(struct ice_hw *hw) +{ + u32 regval = rd32(hw, GLINT_CTL); + + /* no need to update global register if ITR gran is already set */ + if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) && + (((regval & GLINT_CTL_ITR_GRAN_200_M) >> + GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_100_M) >> + GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_50_M) >> + GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) && + (((regval & GLINT_CTL_ITR_GRAN_25_M) >> + GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US)) + return; + + regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) & + GLINT_CTL_ITR_GRAN_200_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) & + GLINT_CTL_ITR_GRAN_100_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) & + GLINT_CTL_ITR_GRAN_50_M) | + ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) & + GLINT_CTL_ITR_GRAN_25_M); + wr32(hw, GLINT_CTL, regval); +} + +/** * ice_cfg_itr - configure the initial interrupt throttle values * @hw: pointer to the HW structure * @q_vector: interrupt vector that's being configured @@ -1728,6 +1807,8 @@ static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) static void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector) { + ice_cfg_itr_gran(hw); + if (q_vector->num_ring_rx) { struct ice_ring_container *rc = &q_vector->rx; @@ -1738,7 +1819,6 @@ ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector) rc->target_itr = ITR_TO_REG(rc->itr_setting); rc->next_update = jiffies + 1; rc->current_itr = rc->target_itr; - rc->latency_range = ICE_LOW_LATENCY; wr32(hw, GLINT_ITR(rc->itr_idx, vector), ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); } @@ -1753,7 +1833,6 @@ ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector, u16 vector) rc->target_itr = ITR_TO_REG(rc->itr_setting); rc->next_update = jiffies + 1; rc->current_itr = rc->target_itr; - rc->latency_range = ICE_LOW_LATENCY; wr32(hw, GLINT_ITR(rc->itr_idx, vector), ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); } @@ -2025,8 +2104,9 @@ err_alloc_q_ids: * @rst_src: reset source * @rel_vmvf_num: Relative id of VF/VM */ -int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, - enum ice_disq_rst_src rst_src, u16 rel_vmvf_num) +int +ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, + u16 rel_vmvf_num) { return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, 0); @@ -2036,10 +2116,11 @@ int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI * @vsi: VSI to enable or disable VLAN pruning on * @ena: set to true to enable VLAN pruning and false to disable it + * @vlan_promisc: enable valid security flags if not in VLAN promiscuous mode * * returns 0 if VSI is updated, negative otherwise */ -int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena) +int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) { struct ice_vsi_ctx *ctxt; struct device *dev; @@ -2067,8 +2148,10 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena) ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; } - ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID | - ICE_AQ_VSI_PROP_SW_VALID); + if (!vlan_promisc) + ctxt->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID | + ICE_AQ_VSI_PROP_SW_VALID); status = ice_update_vsi(&vsi->back->hw, vsi->idx, ctxt, NULL); if (status) { @@ -2112,7 +2195,11 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_vsi *vsi; int ret, i; - vsi = ice_vsi_alloc(pf, type); + if (type == ICE_VSI_VF) + vsi = ice_vsi_alloc(pf, type, vf_id); + else + vsi = ice_vsi_alloc(pf, type, ICE_INVAL_VFID); + if (!vsi) { dev_err(dev, "could not allocate VSI\n"); return NULL; @@ -2596,6 +2683,7 @@ int ice_vsi_release(struct ice_vsi *vsi) int ice_vsi_rebuild(struct ice_vsi *vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; + struct ice_vf *vf = NULL; struct ice_pf *pf; int ret, i; @@ -2603,16 +2691,38 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) return -EINVAL; pf = vsi->back; + if (vsi->type == ICE_VSI_VF) + vf = &pf->vf[vsi->vf_id]; + ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_free_q_vectors(vsi); - ice_free_res(vsi->back->sw_irq_tracker, vsi->sw_base_vector, vsi->idx); - ice_free_res(vsi->back->hw_irq_tracker, vsi->hw_base_vector, vsi->idx); - vsi->sw_base_vector = 0; + + if (vsi->type != ICE_VSI_VF) { + /* reclaim SW interrupts back to the common pool */ + ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx); + pf->num_avail_sw_msix += vsi->num_q_vectors; + vsi->sw_base_vector = 0; + /* reclaim HW interrupts back to the common pool */ + ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, + vsi->idx); + pf->num_avail_hw_msix += vsi->num_q_vectors; + } else { + /* Reclaim VF resources back to the common pool for reset and + * and rebuild, with vector reassignment + */ + ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx, + vsi->idx); + pf->num_avail_hw_msix += pf->num_vf_msix; + } vsi->hw_base_vector = 0; + ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi, false); ice_dev_onetime_setup(&vsi->back->hw); - ice_vsi_set_num_qs(vsi); + if (vsi->type == ICE_VSI_VF) + ice_vsi_set_num_qs(vsi, vf->vf_id); + else + ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); ice_vsi_set_tc_cfg(vsi); /* Initialize VSI struct elements and create VSI in FW */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 7988a53729a9..519ef59e9e43 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -35,7 +35,7 @@ int ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num); -int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena); +int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); void ice_vsi_delete(struct ice_vsi *vsi); @@ -70,8 +70,6 @@ void ice_vsi_free_rx_rings(struct ice_vsi *vsi); void ice_vsi_free_tx_rings(struct ice_vsi *vsi); -int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); - int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); #endif /* !_ICE_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 47cc3f905b7f..f7073e046979 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -168,6 +168,39 @@ static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) } /** + * ice_cfg_promisc - Enable or disable promiscuous mode for a given PF + * @vsi: the VSI being configured + * @promisc_m: mask of promiscuous config bits + * @set_promisc: enable or disable promisc flag request + * + */ +static int ice_cfg_promisc(struct ice_vsi *vsi, u8 promisc_m, bool set_promisc) +{ + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status = 0; + + if (vsi->type != ICE_VSI_PF) + return 0; + + if (vsi->vlan_ena) { + status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m, + set_promisc); + } else { + if (set_promisc) + status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m, + 0); + else + status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m, + 0); + } + + if (status) + return -EIO; + + return 0; +} + +/** * ice_vsi_sync_fltr - Update the VSI filter list to the HW * @vsi: ptr to the VSI * @@ -182,6 +215,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) struct ice_hw *hw = &pf->hw; enum ice_status status = 0; u32 changed_flags = 0; + u8 promisc_m; int err = 0; if (!vsi->netdev) @@ -226,7 +260,11 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) /* Add mac addresses in the sync list */ status = ice_add_mac(hw, &vsi->tmp_sync_list); ice_free_fltr_list(dev, &vsi->tmp_sync_list); - if (status) { + /* If filter is added successfully or already exists, do not go into + * 'if' condition and report it as error. Instead continue processing + * rest of the function. + */ + if (status && status != ICE_ERR_ALREADY_EXISTS) { netdev_err(netdev, "Failed to add MAC filters\n"); /* If there is no more space for new umac filters, vsi * should go into promiscuous mode. There should be some @@ -245,8 +283,35 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } } /* check for changes in promiscuous modes */ - if (changed_flags & IFF_ALLMULTI) - netdev_warn(netdev, "Unsupported configuration\n"); + if (changed_flags & IFF_ALLMULTI) { + if (vsi->current_netdev_flags & IFF_ALLMULTI) { + if (vsi->vlan_ena) + promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; + else + promisc_m = ICE_MCAST_PROMISC_BITS; + + err = ice_cfg_promisc(vsi, promisc_m, true); + if (err) { + netdev_err(netdev, "Error setting Multicast promiscuous mode on VSI %i\n", + vsi->vsi_num); + vsi->current_netdev_flags &= ~IFF_ALLMULTI; + goto out_promisc; + } + } else if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) { + if (vsi->vlan_ena) + promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; + else + promisc_m = ICE_MCAST_PROMISC_BITS; + + err = ice_cfg_promisc(vsi, promisc_m, false); + if (err) { + netdev_err(netdev, "Error clearing Multicast promiscuous mode on VSI %i\n", + vsi->vsi_num); + vsi->current_netdev_flags |= IFF_ALLMULTI; + goto out_promisc; + } + } + } if (((changed_flags & IFF_PROMISC) || promisc_forced_on) || test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) { @@ -322,7 +387,7 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf) clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags); - for (v = 0; v < pf->num_alloc_vsi; v++) + ice_for_each_vsi(pf, v) if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) && ice_vsi_sync_fltr(pf->vsi[v])) { /* come back and try again later */ @@ -342,6 +407,10 @@ ice_prepare_for_reset(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; + /* already prepared for reset */ + if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) + return; + /* Notify VFs of impending reset */ if (ice_check_sq_alive(hw, &hw->mailboxq)) ice_vc_notify_reset(pf); @@ -394,6 +463,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) ice_rebuild(pf); clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); clear_bit(__ICE_PFR_REQ, pf->state); + ice_reset_all_vfs(pf, true); } } @@ -416,10 +486,15 @@ static void ice_reset_subtask(struct ice_pf *pf) * for the reset now), poll for reset done, rebuild and return. */ if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) { - clear_bit(__ICE_GLOBR_RECV, pf->state); - clear_bit(__ICE_CORER_RECV, pf->state); - if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) - ice_prepare_for_reset(pf); + /* Perform the largest reset requested */ + if (test_and_clear_bit(__ICE_CORER_RECV, pf->state)) + reset_type = ICE_RESET_CORER; + if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state)) + reset_type = ICE_RESET_GLOBR; + /* return if no valid reset type requested */ + if (reset_type == ICE_RESET_INVAL) + return; + ice_prepare_for_reset(pf); /* make sure we are ready to rebuild */ if (ice_check_reset(&pf->hw)) { @@ -436,6 +511,7 @@ static void ice_reset_subtask(struct ice_pf *pf) clear_bit(__ICE_PFR_REQ, pf->state); clear_bit(__ICE_CORER_REQ, pf->state); clear_bit(__ICE_GLOBR_REQ, pf->state); + ice_reset_all_vfs(pf, true); } return; @@ -519,6 +595,9 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) case ICE_FC_RX_PAUSE: fc = "RX"; break; + case ICE_FC_NONE: + fc = "None"; + break; default: fc = "Unknown"; break; @@ -635,19 +714,70 @@ static void ice_watchdog_subtask(struct ice_pf *pf) pf->serv_tmr_prev = jiffies; - if (ice_link_event(pf, pf->hw.port_info)) - dev_dbg(&pf->pdev->dev, "ice_link_event failed\n"); - /* Update the stats for active netdevs so the network stack * can look at updated numbers whenever it cares to */ ice_update_pf_stats(pf); - for (i = 0; i < pf->num_alloc_vsi; i++) + ice_for_each_vsi(pf, i) if (pf->vsi[i] && pf->vsi[i]->netdev) ice_update_vsi_stats(pf->vsi[i]); } /** + * ice_init_link_events - enable/initialize link events + * @pi: pointer to the port_info instance + * + * Returns -EIO on failure, 0 on success + */ +static int ice_init_link_events(struct ice_port_info *pi) +{ + u16 mask; + + mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | + ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); + + if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { + dev_dbg(ice_hw_to_dev(pi->hw), + "Failed to set link event mask for port %d\n", + pi->lport); + return -EIO; + } + + if (ice_aq_get_link_info(pi, true, NULL, NULL)) { + dev_dbg(ice_hw_to_dev(pi->hw), + "Failed to enable link events for port %d\n", + pi->lport); + return -EIO; + } + + return 0; +} + +/** + * ice_handle_link_event - handle link event via ARQ + * @pf: pf that the link event is associated with + * + * Return -EINVAL if port_info is null + * Return status on success + */ +static int ice_handle_link_event(struct ice_pf *pf) +{ + struct ice_port_info *port_info; + int status; + + port_info = pf->hw.port_info; + if (!port_info) + return -EINVAL; + + status = ice_link_event(pf, port_info); + if (status) + dev_dbg(&pf->pdev->dev, + "Could not process link event, error %d\n", status); + + return status; +} + +/** * __ice_clean_ctrlq - helper function to clean controlq rings * @pf: ptr to struct ice_pf * @q_type: specific Control queue type @@ -750,6 +880,11 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) opcode = le16_to_cpu(event.desc.opcode); switch (opcode) { + case ice_aqc_opc_get_link_status: + if (ice_handle_link_event(pf)) + dev_err(&pf->pdev->dev, + "Could not handle link event\n"); + break; case ice_mbx_opc_send_msg_to_pf: ice_vc_process_vf_msg(pf, &event); break; @@ -877,6 +1012,18 @@ static void ice_service_task_stop(struct ice_pf *pf) } /** + * ice_service_task_restart - restart service task and schedule works + * @pf: board private structure + * + * This function is needed for suspend and resume works (e.g WoL scenario) + */ +static void ice_service_task_restart(struct ice_pf *pf) +{ + clear_bit(__ICE_SERVICE_DIS, pf->state); + ice_service_task_schedule(pf); +} + +/** * ice_service_timer - timer callback to schedule service task * @t: pointer to timer_list */ @@ -1111,8 +1258,9 @@ static void ice_set_ctrlq_len(struct ice_hw *hw) * This is a callback function used by the irq_set_affinity_notifier function * so that we may register to receive changes to the irq affinity masks. */ -static void ice_irq_affinity_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) +static void +ice_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) { struct ice_q_vector *q_vector = container_of(notify, struct ice_q_vector, affinity_notify); @@ -1184,10 +1332,9 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - err = devm_request_irq(&pf->pdev->dev, - pf->msix_entries[base + vector].vector, - vsi->irq_handler, 0, q_vector->name, - q_vector); + err = devm_request_irq(&pf->pdev->dev, irq_num, + vsi->irq_handler, 0, + q_vector->name, q_vector); if (err) { netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n", err); @@ -1656,11 +1803,13 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) * * net_device_ops implementation for adding vlan ids */ -static int ice_vlan_rx_add_vid(struct net_device *netdev, - __always_unused __be16 proto, u16 vid) +static int +ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, + u16 vid) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + int ret; if (vid >= VLAN_N_VID) { netdev_err(netdev, "VLAN id requested %d is out of range %d\n", @@ -1673,8 +1822,7 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, /* Enable VLAN pruning when VLAN 0 is added */ if (unlikely(!vid)) { - int ret = ice_cfg_vlan_pruning(vsi, true); - + ret = ice_cfg_vlan_pruning(vsi, true, false); if (ret) return ret; } @@ -1683,7 +1831,13 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, * needed to continue allowing all untagged packets since VLAN prune * list is applied to all packets by the switch */ - return ice_vsi_add_vlan(vsi, vid); + ret = ice_vsi_add_vlan(vsi, vid); + if (!ret) { + vsi->vlan_ena = true; + set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + } + + return ret; } /** @@ -1694,12 +1848,13 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev, * * net_device_ops implementation for removing vlan ids */ -static int ice_vlan_rx_kill_vid(struct net_device *netdev, - __always_unused __be16 proto, u16 vid) +static int +ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, + u16 vid) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - int status; + int ret; if (vsi->info.pvid) return -EINVAL; @@ -1707,15 +1862,17 @@ static int ice_vlan_rx_kill_vid(struct net_device *netdev, /* Make sure ice_vsi_kill_vlan is successful before updating VLAN * information */ - status = ice_vsi_kill_vlan(vsi, vid); - if (status) - return status; + ret = ice_vsi_kill_vlan(vsi, vid); + if (ret) + return ret; /* Disable VLAN pruning when VLAN 0 is removed */ if (unlikely(!vid)) - status = ice_cfg_vlan_pruning(vsi, false); + ret = ice_cfg_vlan_pruning(vsi, false, false); - return status; + vsi->vlan_ena = false; + set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + return ret; } /** @@ -2033,23 +2190,6 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) } /** - * ice_verify_itr_gran - verify driver's assumption of ITR granularity - * @pf: pointer to the PF structure - * - * There is no error returned here because the driver will be able to handle a - * different ITR granularity, but interrupt moderation will not be accurate if - * the driver's assumptions are not verified. This assumption is made so we can - * use constants in the hot path instead of accessing structure members. - */ -static void ice_verify_itr_gran(struct ice_pf *pf) -{ - if (pf->hw.itr_gran != (ICE_ITR_GRAN_S << 1)) - dev_warn(&pf->pdev->dev, - "%d ITR granularity assumption is invalid, actual ITR granularity is %d. Interrupt moderation will be inaccurate!\n", - (ICE_ITR_GRAN_S << 1), pf->hw.itr_gran); -} - -/** * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines * @pf: pointer to the PF structure * @@ -2072,9 +2212,10 @@ static void ice_verify_cacheline_size(struct ice_pf *pf) * * Returns 0 on success, negative on failure */ -static int ice_probe(struct pci_dev *pdev, - const struct pci_device_id __always_unused *ent) +static int +ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { + struct device *dev = &pdev->dev; struct ice_pf *pf; struct ice_hw *hw; int err; @@ -2086,20 +2227,20 @@ static int ice_probe(struct pci_dev *pdev, err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); if (err) { - dev_err(&pdev->dev, "BAR0 I/O map error %d\n", err); + dev_err(dev, "BAR0 I/O map error %d\n", err); return err; } - pf = devm_kzalloc(&pdev->dev, sizeof(*pf), GFP_KERNEL); + pf = devm_kzalloc(dev, sizeof(*pf), GFP_KERNEL); if (!pf) return -ENOMEM; /* set up for high or low dma */ - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (err) - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); + dev_err(dev, "DMA configuration failed: 0x%x\n", err); return err; } @@ -2133,12 +2274,12 @@ static int ice_probe(struct pci_dev *pdev, err = ice_init_hw(hw); if (err) { - dev_err(&pdev->dev, "ice_init_hw failed: %d\n", err); + dev_err(dev, "ice_init_hw failed: %d\n", err); err = -EIO; goto err_exit_unroll; } - dev_info(&pdev->dev, "firmware %d.%d.%05d api %d.%d\n", + dev_info(dev, "firmware %d.%d.%05d api %d.%d\n", hw->fw_maj_ver, hw->fw_min_ver, hw->fw_build, hw->api_maj_ver, hw->api_min_ver); @@ -2152,8 +2293,8 @@ static int ice_probe(struct pci_dev *pdev, goto err_init_pf_unroll; } - pf->vsi = devm_kcalloc(&pdev->dev, pf->num_alloc_vsi, - sizeof(*pf->vsi), GFP_KERNEL); + pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi), + GFP_KERNEL); if (!pf->vsi) { err = -ENOMEM; goto err_init_pf_unroll; @@ -2161,8 +2302,7 @@ static int ice_probe(struct pci_dev *pdev, err = ice_init_interrupt_scheme(pf); if (err) { - dev_err(&pdev->dev, - "ice_init_interrupt_scheme failed: %d\n", err); + dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); err = -EIO; goto err_init_interrupt_unroll; } @@ -2178,15 +2318,13 @@ static int ice_probe(struct pci_dev *pdev, if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { err = ice_req_irq_msix_misc(pf); if (err) { - dev_err(&pdev->dev, - "setup of misc vector failed: %d\n", err); + dev_err(dev, "setup of misc vector failed: %d\n", err); goto err_init_interrupt_unroll; } } /* create switch struct for the switch element created by FW on boot */ - pf->first_sw = devm_kzalloc(&pdev->dev, sizeof(*pf->first_sw), - GFP_KERNEL); + pf->first_sw = devm_kzalloc(dev, sizeof(*pf->first_sw), GFP_KERNEL); if (!pf->first_sw) { err = -ENOMEM; goto err_msix_misc_unroll; @@ -2204,8 +2342,7 @@ static int ice_probe(struct pci_dev *pdev, err = ice_setup_pf_sw(pf); if (err) { - dev_err(&pdev->dev, - "probe failed due to setup pf switch:%d\n", err); + dev_err(dev, "probe failed due to setup pf switch:%d\n", err); goto err_alloc_sw_unroll; } @@ -2214,8 +2351,13 @@ static int ice_probe(struct pci_dev *pdev, /* since everything is good, start the service timer */ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); + err = ice_init_link_events(pf->hw.port_info); + if (err) { + dev_err(dev, "ice_init_link_events failed: %d\n", err); + goto err_alloc_sw_unroll; + } + ice_verify_cacheline_size(pf); - ice_verify_itr_gran(pf); return 0; @@ -2227,7 +2369,7 @@ err_msix_misc_unroll: ice_free_irq_msix_misc(pf); err_init_interrupt_unroll: ice_clear_interrupt_scheme(pf); - devm_kfree(&pdev->dev, pf->vsi); + devm_kfree(dev, pf->vsi); err_init_pf_unroll: ice_deinit_pf(pf); ice_deinit_hw(hw); @@ -2272,6 +2414,136 @@ static void ice_remove(struct pci_dev *pdev) pci_disable_pcie_error_reporting(pdev); } +/** + * ice_pci_err_detected - warning that PCI error has been detected + * @pdev: PCI device information struct + * @err: the type of PCI error + * + * Called to warn that something happened on the PCI bus and the error handling + * is in progress. Allows the driver to gracefully prepare/handle PCI errors. + */ +static pci_ers_result_t +ice_pci_err_detected(struct pci_dev *pdev, enum pci_channel_state err) +{ + struct ice_pf *pf = pci_get_drvdata(pdev); + + if (!pf) { + dev_err(&pdev->dev, "%s: unrecoverable device error %d\n", + __func__, err); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (!test_bit(__ICE_SUSPENDED, pf->state)) { + ice_service_task_stop(pf); + + if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) { + set_bit(__ICE_PFR_REQ, pf->state); + ice_prepare_for_reset(pf); + } + } + + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * ice_pci_err_slot_reset - a PCI slot reset has just happened + * @pdev: PCI device information struct + * + * Called to determine if the driver can recover from the PCI slot reset by + * using a register read to determine if the device is recoverable. + */ +static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev) +{ + struct ice_pf *pf = pci_get_drvdata(pdev); + pci_ers_result_t result; + int err; + u32 reg; + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, + "Cannot re-enable PCI device after reset, error %d\n", + err); + result = PCI_ERS_RESULT_DISCONNECT; + } else { + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + pci_wake_from_d3(pdev, false); + + /* Check for life */ + reg = rd32(&pf->hw, GLGEN_RTRIG); + if (!reg) + result = PCI_ERS_RESULT_RECOVERED; + else + result = PCI_ERS_RESULT_DISCONNECT; + } + + err = pci_cleanup_aer_uncorrect_error_status(pdev); + if (err) + dev_dbg(&pdev->dev, + "pci_cleanup_aer_uncorrect_error_status failed, error %d\n", + err); + /* non-fatal, continue */ + + return result; +} + +/** + * ice_pci_err_resume - restart operations after PCI error recovery + * @pdev: PCI device information struct + * + * Called to allow the driver to bring things back up after PCI error and/or + * reset recovery have finished + */ +static void ice_pci_err_resume(struct pci_dev *pdev) +{ + struct ice_pf *pf = pci_get_drvdata(pdev); + + if (!pf) { + dev_err(&pdev->dev, + "%s failed, device is unrecoverable\n", __func__); + return; + } + + if (test_bit(__ICE_SUSPENDED, pf->state)) { + dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n", + __func__); + return; + } + + ice_do_reset(pf, ICE_RESET_PFR); + ice_service_task_restart(pf); + mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); +} + +/** + * ice_pci_err_reset_prepare - prepare device driver for PCI reset + * @pdev: PCI device information struct + */ +static void ice_pci_err_reset_prepare(struct pci_dev *pdev) +{ + struct ice_pf *pf = pci_get_drvdata(pdev); + + if (!test_bit(__ICE_SUSPENDED, pf->state)) { + ice_service_task_stop(pf); + + if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) { + set_bit(__ICE_PFR_REQ, pf->state); + ice_prepare_for_reset(pf); + } + } +} + +/** + * ice_pci_err_reset_done - PCI reset done, device driver reset can begin + * @pdev: PCI device information struct + */ +static void ice_pci_err_reset_done(struct pci_dev *pdev) +{ + ice_pci_err_resume(pdev); +} + /* ice_pci_tbl - PCI Device ID Table * * Wildcard entries (PCI_ANY_ID) should come last @@ -2289,12 +2561,21 @@ static const struct pci_device_id ice_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, ice_pci_tbl); +static const struct pci_error_handlers ice_pci_err_handler = { + .error_detected = ice_pci_err_detected, + .slot_reset = ice_pci_err_slot_reset, + .reset_prepare = ice_pci_err_reset_prepare, + .reset_done = ice_pci_err_reset_done, + .resume = ice_pci_err_resume +}; + static struct pci_driver ice_driver = { .name = KBUILD_MODNAME, .id_table = ice_pci_tbl, .probe = ice_probe, .remove = ice_remove, .sriov_configure = ice_sriov_configure, + .err_handler = &ice_pci_err_handler }; /** @@ -2512,9 +2793,10 @@ ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], * @addr: the MAC address entry being added * @vid: VLAN id */ -static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - __always_unused u16 vid) +static int +ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr, + __always_unused u16 vid) { int err; @@ -2538,8 +2820,8 @@ static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], * @netdev: ptr to the netdev being adjusted * @features: the feature set that the stack is suggesting */ -static int ice_set_features(struct net_device *netdev, - netdev_features_t features) +static int +ice_set_features(struct net_device *netdev, netdev_features_t features) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; @@ -2666,7 +2948,7 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_service_task_schedule(pf); - return err; + return 0; } /** @@ -2693,8 +2975,8 @@ int ice_up(struct ice_vsi *vsi) * This function fetches stats from the ring considering the atomic operations * that needs to be performed to read u64 values in 32 bit machine. */ -static void ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, - u64 *bytes) +static void +ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes) { unsigned int start; *pkts = 0; @@ -3276,7 +3558,7 @@ static void ice_vsi_release_all(struct ice_pf *pf) if (!pf->vsi) return; - for (i = 0; i < pf->num_alloc_vsi; i++) { + ice_for_each_vsi(pf, i) { if (!pf->vsi[i]) continue; @@ -3375,16 +3657,12 @@ static int ice_vsi_rebuild_all(struct ice_pf *pf) int i; /* loop through pf->vsi array and reinit the VSI if found */ - for (i = 0; i < pf->num_alloc_vsi; i++) { + ice_for_each_vsi(pf, i) { int err; if (!pf->vsi[i]) continue; - /* VF VSI rebuild isn't supported yet */ - if (pf->vsi[i]->type == ICE_VSI_VF) - continue; - err = ice_vsi_rebuild(pf->vsi[i]); if (err) { dev_err(&pf->pdev->dev, @@ -3412,7 +3690,7 @@ static int ice_vsi_replay_all(struct ice_pf *pf) int i; /* loop through pf->vsi array and replay the VSI if found */ - for (i = 0; i < pf->num_alloc_vsi; i++) { + ice_for_each_vsi(pf, i) { if (!pf->vsi[i]) continue; @@ -3521,9 +3799,7 @@ static void ice_rebuild(struct ice_pf *pf) goto err_vsi_rebuild; } - ice_reset_all_vfs(pf, true); - - for (i = 0; i < pf->num_alloc_vsi; i++) { + ice_for_each_vsi(pf, i) { bool link_up; if (!pf->vsi[i] || pf->vsi[i]->type != ICE_VSI_PF) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 56049739a250..e0218f4c8f0b 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -276,7 +276,8 @@ ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent, status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size, &num_groups_removed, NULL); if (status || num_groups_removed != 1) - ice_debug(hw, ICE_DBG_SCHED, "remove elements failed\n"); + ice_debug(hw, ICE_DBG_SCHED, "remove node failed FW error %d\n", + hw->adminq.sq_last_status); devm_kfree(ice_hw_to_dev(hw), buf); return status; @@ -360,12 +361,8 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT && node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) { u32 teid = le32_to_cpu(node->info.node_teid); - enum ice_status status; - status = ice_sched_remove_elems(hw, node->parent, 1, &teid); - if (status) - ice_debug(hw, ICE_DBG_SCHED, - "remove element failed %d\n", status); + ice_sched_remove_elems(hw, node->parent, 1, &teid); } parent = node->parent; /* root has no parent */ @@ -697,7 +694,8 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, status = ice_aq_add_sched_elems(hw, 1, buf, buf_size, &num_groups_added, NULL); if (status || num_groups_added != 1) { - ice_debug(hw, ICE_DBG_SCHED, "add elements failed\n"); + ice_debug(hw, ICE_DBG_SCHED, "add node failed FW Error %d\n", + hw->adminq.sq_last_status); devm_kfree(ice_hw_to_dev(hw), buf); return ICE_ERR_CFG; } @@ -1271,42 +1269,6 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, } /** - * ice_sched_rm_vsi_child_nodes - remove VSI child nodes from the tree - * @pi: port information structure - * @vsi_node: pointer to the VSI node - * @num_nodes: pointer to the num nodes that needs to be removed per layer - * @owner: node owner (lan or rdma) - * - * This function removes the VSI child nodes from the tree. It gets called for - * lan and rdma separately. - */ -static void -ice_sched_rm_vsi_child_nodes(struct ice_port_info *pi, - struct ice_sched_node *vsi_node, u16 *num_nodes, - u8 owner) -{ - struct ice_sched_node *node, *next; - u8 i, qgl, vsil; - u16 num; - - qgl = ice_sched_get_qgrp_layer(pi->hw); - vsil = ice_sched_get_vsi_layer(pi->hw); - - for (i = qgl; i > vsil; i--) { - num = num_nodes[i]; - node = ice_sched_get_first_node(pi->hw, vsi_node, i); - while (node && num) { - next = node->sibling; - if (node->owner == owner && !node->num_children) { - ice_free_sched_node(pi, node); - num--; - } - node = next; - } - } -} - -/** * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes * @hw: pointer to the hw struct * @tc_node: pointer to TC node @@ -1446,7 +1408,6 @@ static enum ice_status ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 new_numqs, u8 owner) { - u16 prev_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; struct ice_sched_node *vsi_node; struct ice_sched_node *tc_node; @@ -1454,7 +1415,6 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, enum ice_status status = 0; struct ice_hw *hw = pi->hw; u16 prev_numqs; - u8 i; tc_node = ice_sched_get_tc_node(pi, tc); if (!tc_node) @@ -1473,36 +1433,25 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, else return ICE_ERR_PARAM; - /* num queues are not changed */ - if (prev_numqs == new_numqs) + /* num queues are not changed or less than the previous number */ + if (new_numqs <= prev_numqs) return status; - - /* calculate number of nodes based on prev/new number of qs */ - if (prev_numqs) - ice_sched_calc_vsi_child_nodes(hw, prev_numqs, prev_num_nodes); - if (new_numqs) ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes); - - if (prev_numqs > new_numqs) { - for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) - new_num_nodes[i] = prev_num_nodes[i] - new_num_nodes[i]; - - ice_sched_rm_vsi_child_nodes(pi, vsi_node, new_num_nodes, - owner); - } else { - for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++) - new_num_nodes[i] -= prev_num_nodes[i]; - - status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node, - new_num_nodes, owner); - if (status) - return status; - } - + /* Keep the max number of queue configuration all the time. Update the + * tree only if number of queues > previous number of queues. This may + * leave some extra nodes in the tree if number of queues < previous + * number but that wouldn't harm anything. Removing those extra nodes + * may complicate the code if those nodes are part of SRL or + * individually rate limited. + */ + status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node, + new_num_nodes, owner); + if (status) + return status; vsi_ctx->sched.max_lanq[tc] = new_numqs; - return status; + return 0; } /** @@ -1527,6 +1476,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, enum ice_status status = 0; struct ice_hw *hw = pi->hw; + ice_debug(pi->hw, ICE_DBG_SCHED, "add/config VSI %d\n", vsi_handle); tc_node = ice_sched_get_tc_node(pi, tc); if (!tc_node) return ICE_ERR_PARAM; @@ -1646,8 +1596,9 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) { enum ice_status status = ICE_ERR_PARAM; struct ice_vsi_ctx *vsi_ctx; - u8 i, j = 0; + u8 i; + ice_debug(pi->hw, ICE_DBG_SCHED, "removing VSI %d\n", vsi_handle); if (!ice_is_vsi_valid(pi->hw, vsi_handle)) return status; mutex_lock(&pi->sched_lock); @@ -1655,8 +1606,9 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) if (!vsi_ctx) goto exit_sched_rm_vsi_cfg; - for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + ice_for_each_traffic_class(i) { struct ice_sched_node *vsi_node, *tc_node; + u8 j = 0; tc_node = ice_sched_get_tc_node(pi, i); if (!tc_node) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 09d1c314b68f..7dcd9ddf54f7 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -322,8 +322,8 @@ struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle) * * save the VSI context entry for a given VSI handle */ -static void ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, - struct ice_vsi_ctx *vsi) +static void +ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi) { hw->vsi_ctx[vsi_handle] = vsi; } @@ -398,7 +398,7 @@ ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num; } - return status; + return 0; } /** @@ -643,21 +643,43 @@ static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi) fi->fltr_act == ICE_FWD_TO_VSI_LIST || fi->fltr_act == ICE_FWD_TO_Q || fi->fltr_act == ICE_FWD_TO_QGRP)) { - fi->lb_en = true; - /* Do not set lan_en to TRUE if + /* Setting LB for prune actions will result in replicated + * packets to the internal switch that will be dropped. + */ + if (fi->lkup_type != ICE_SW_LKUP_VLAN) + fi->lb_en = true; + + /* Set lan_en to TRUE if * 1. The switch is a VEB AND * 2 - * 2.1 The lookup is MAC with unicast addr for MAC, OR - * 2.2 The lookup is MAC_VLAN with unicast addr for MAC + * 2.1 The lookup is a directional lookup like ethertype, + * promiscuous, ethertype-mac, promiscuous-vlan + * and default-port OR + * 2.2 The lookup is VLAN, OR + * 2.3 The lookup is MAC with mcast or bcast addr for MAC, OR + * 2.4 The lookup is MAC_VLAN with mcast or bcast addr for MAC. * - * In all other cases, the LAN enable has to be set to true. + * OR + * + * The switch is a VEPA. + * + * In all other cases, the LAN enable has to be set to false. */ - if (!(hw->evb_veb && - ((fi->lkup_type == ICE_SW_LKUP_MAC && - is_unicast_ether_addr(fi->l_data.mac.mac_addr)) || - (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN && - is_unicast_ether_addr(fi->l_data.mac_vlan.mac_addr))))) + if (hw->evb_veb) { + if (fi->lkup_type == ICE_SW_LKUP_ETHERTYPE || + fi->lkup_type == ICE_SW_LKUP_PROMISC || + fi->lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC || + fi->lkup_type == ICE_SW_LKUP_PROMISC_VLAN || + fi->lkup_type == ICE_SW_LKUP_DFLT || + fi->lkup_type == ICE_SW_LKUP_VLAN || + (fi->lkup_type == ICE_SW_LKUP_MAC && + !is_unicast_ether_addr(fi->l_data.mac.mac_addr)) || + (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN && + !is_unicast_ether_addr(fi->l_data.mac.mac_addr))) + fi->lan_en = true; + } else { fi->lan_en = true; + } } } @@ -2190,6 +2212,291 @@ ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle, } /** + * ice_determine_promisc_mask + * @fi: filter info to parse + * + * Helper function to determine which ICE_PROMISC_ mask corresponds + * to given filter into. + */ +static u8 ice_determine_promisc_mask(struct ice_fltr_info *fi) +{ + u16 vid = fi->l_data.mac_vlan.vlan_id; + u8 *macaddr = fi->l_data.mac.mac_addr; + bool is_tx_fltr = false; + u8 promisc_mask = 0; + + if (fi->flag == ICE_FLTR_TX) + is_tx_fltr = true; + + if (is_broadcast_ether_addr(macaddr)) + promisc_mask |= is_tx_fltr ? + ICE_PROMISC_BCAST_TX : ICE_PROMISC_BCAST_RX; + else if (is_multicast_ether_addr(macaddr)) + promisc_mask |= is_tx_fltr ? + ICE_PROMISC_MCAST_TX : ICE_PROMISC_MCAST_RX; + else if (is_unicast_ether_addr(macaddr)) + promisc_mask |= is_tx_fltr ? + ICE_PROMISC_UCAST_TX : ICE_PROMISC_UCAST_RX; + if (vid) + promisc_mask |= is_tx_fltr ? + ICE_PROMISC_VLAN_TX : ICE_PROMISC_VLAN_RX; + + return promisc_mask; +} + +/** + * ice_remove_promisc - Remove promisc based filter rules + * @hw: pointer to the hardware structure + * @recp_id: recipe id for which the rule needs to removed + * @v_list: list of promisc entries + */ +static enum ice_status +ice_remove_promisc(struct ice_hw *hw, u8 recp_id, + struct list_head *v_list) +{ + struct ice_fltr_list_entry *v_list_itr, *tmp; + + list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) { + v_list_itr->status = + ice_remove_rule_internal(hw, recp_id, v_list_itr); + if (v_list_itr->status) + return v_list_itr->status; + } + return 0; +} + +/** + * ice_clear_vsi_promisc - clear specified promiscuous mode(s) for given VSI + * @hw: pointer to the hardware structure + * @vsi_handle: VSI handle to clear mode + * @promisc_mask: mask of promiscuous config bits to clear + * @vid: VLAN ID to clear VLAN promiscuous + */ +enum ice_status +ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, + u16 vid) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_list_entry *fm_entry, *tmp; + struct list_head remove_list_head; + struct ice_fltr_mgmt_list_entry *itr; + struct list_head *rule_head; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + enum ice_status status = 0; + u8 recipe_id; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + if (vid) + recipe_id = ICE_SW_LKUP_PROMISC_VLAN; + else + recipe_id = ICE_SW_LKUP_PROMISC; + + rule_head = &sw->recp_list[recipe_id].filt_rules; + rule_lock = &sw->recp_list[recipe_id].filt_rule_lock; + + INIT_LIST_HEAD(&remove_list_head); + + mutex_lock(rule_lock); + list_for_each_entry(itr, rule_head, list_entry) { + u8 fltr_promisc_mask = 0; + + if (!ice_vsi_uses_fltr(itr, vsi_handle)) + continue; + + fltr_promisc_mask |= + ice_determine_promisc_mask(&itr->fltr_info); + + /* Skip if filter is not completely specified by given mask */ + if (fltr_promisc_mask & ~promisc_mask) + continue; + + status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, + &remove_list_head, + &itr->fltr_info); + if (status) { + mutex_unlock(rule_lock); + goto free_fltr_list; + } + } + mutex_unlock(rule_lock); + + status = ice_remove_promisc(hw, recipe_id, &remove_list_head); + +free_fltr_list: + list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) { + list_del(&fm_entry->list_entry); + devm_kfree(ice_hw_to_dev(hw), fm_entry); + } + + return status; +} + +/** + * ice_set_vsi_promisc - set given VSI to given promiscuous mode(s) + * @hw: pointer to the hardware structure + * @vsi_handle: VSI handle to configure + * @promisc_mask: mask of promiscuous config bits + * @vid: VLAN ID to set VLAN promiscuous + */ +enum ice_status +ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, u16 vid) +{ + enum { UCAST_FLTR = 1, MCAST_FLTR, BCAST_FLTR }; + struct ice_fltr_list_entry f_list_entry; + struct ice_fltr_info new_fltr; + enum ice_status status = 0; + bool is_tx_fltr; + u16 hw_vsi_id; + int pkt_type; + u8 recipe_id; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); + + memset(&new_fltr, 0, sizeof(new_fltr)); + + if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX)) { + new_fltr.lkup_type = ICE_SW_LKUP_PROMISC_VLAN; + new_fltr.l_data.mac_vlan.vlan_id = vid; + recipe_id = ICE_SW_LKUP_PROMISC_VLAN; + } else { + new_fltr.lkup_type = ICE_SW_LKUP_PROMISC; + recipe_id = ICE_SW_LKUP_PROMISC; + } + + /* Separate filters must be set for each direction/packet type + * combination, so we will loop over the mask value, store the + * individual type, and clear it out in the input mask as it + * is found. + */ + while (promisc_mask) { + u8 *mac_addr; + + pkt_type = 0; + is_tx_fltr = false; + + if (promisc_mask & ICE_PROMISC_UCAST_RX) { + promisc_mask &= ~ICE_PROMISC_UCAST_RX; + pkt_type = UCAST_FLTR; + } else if (promisc_mask & ICE_PROMISC_UCAST_TX) { + promisc_mask &= ~ICE_PROMISC_UCAST_TX; + pkt_type = UCAST_FLTR; + is_tx_fltr = true; + } else if (promisc_mask & ICE_PROMISC_MCAST_RX) { + promisc_mask &= ~ICE_PROMISC_MCAST_RX; + pkt_type = MCAST_FLTR; + } else if (promisc_mask & ICE_PROMISC_MCAST_TX) { + promisc_mask &= ~ICE_PROMISC_MCAST_TX; + pkt_type = MCAST_FLTR; + is_tx_fltr = true; + } else if (promisc_mask & ICE_PROMISC_BCAST_RX) { + promisc_mask &= ~ICE_PROMISC_BCAST_RX; + pkt_type = BCAST_FLTR; + } else if (promisc_mask & ICE_PROMISC_BCAST_TX) { + promisc_mask &= ~ICE_PROMISC_BCAST_TX; + pkt_type = BCAST_FLTR; + is_tx_fltr = true; + } + + /* Check for VLAN promiscuous flag */ + if (promisc_mask & ICE_PROMISC_VLAN_RX) { + promisc_mask &= ~ICE_PROMISC_VLAN_RX; + } else if (promisc_mask & ICE_PROMISC_VLAN_TX) { + promisc_mask &= ~ICE_PROMISC_VLAN_TX; + is_tx_fltr = true; + } + + /* Set filter DA based on packet type */ + mac_addr = new_fltr.l_data.mac.mac_addr; + if (pkt_type == BCAST_FLTR) { + eth_broadcast_addr(mac_addr); + } else if (pkt_type == MCAST_FLTR || + pkt_type == UCAST_FLTR) { + /* Use the dummy ether header DA */ + ether_addr_copy(mac_addr, dummy_eth_header); + if (pkt_type == MCAST_FLTR) + mac_addr[0] |= 0x1; /* Set multicast bit */ + } + + /* Need to reset this to zero for all iterations */ + new_fltr.flag = 0; + if (is_tx_fltr) { + new_fltr.flag |= ICE_FLTR_TX; + new_fltr.src = hw_vsi_id; + } else { + new_fltr.flag |= ICE_FLTR_RX; + new_fltr.src = hw->port_info->lport; + } + + new_fltr.fltr_act = ICE_FWD_TO_VSI; + new_fltr.vsi_handle = vsi_handle; + new_fltr.fwd_id.hw_vsi_id = hw_vsi_id; + f_list_entry.fltr_info = new_fltr; + + status = ice_add_rule_internal(hw, recipe_id, &f_list_entry); + if (status) + goto set_promisc_exit; + } + +set_promisc_exit: + return status; +} + +/** + * ice_set_vlan_vsi_promisc + * @hw: pointer to the hardware structure + * @vsi_handle: VSI handle to configure + * @promisc_mask: mask of promiscuous config bits + * @rm_vlan_promisc: Clear VLANs VSI promisc mode + * + * Configure VSI with all associated VLANs to given promiscuous mode(s) + */ +enum ice_status +ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, + bool rm_vlan_promisc) +{ + struct ice_switch_info *sw = hw->switch_info; + struct ice_fltr_list_entry *list_itr, *tmp; + struct list_head vsi_list_head; + struct list_head *vlan_head; + struct mutex *vlan_lock; /* Lock to protect filter rule list */ + enum ice_status status; + u16 vlan_id; + + INIT_LIST_HEAD(&vsi_list_head); + vlan_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock; + vlan_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules; + mutex_lock(vlan_lock); + status = ice_add_to_vsi_fltr_list(hw, vsi_handle, vlan_head, + &vsi_list_head); + mutex_unlock(vlan_lock); + if (status) + goto free_fltr_list; + + list_for_each_entry(list_itr, &vsi_list_head, list_entry) { + vlan_id = list_itr->fltr_info.l_data.vlan.vlan_id; + if (rm_vlan_promisc) + status = ice_clear_vsi_promisc(hw, vsi_handle, + promisc_mask, vlan_id); + else + status = ice_set_vsi_promisc(hw, vsi_handle, + promisc_mask, vlan_id); + if (status) + break; + } + +free_fltr_list: + list_for_each_entry_safe(list_itr, tmp, &vsi_list_head, list_entry) { + list_del(&list_itr->list_entry); + devm_kfree(ice_hw_to_dev(hw), list_itr); + } + return status; +} + +/** * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to remove filters from @@ -2224,12 +2531,14 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle, case ICE_SW_LKUP_VLAN: ice_remove_vlan(hw, &remove_list_head); break; + case ICE_SW_LKUP_PROMISC: + case ICE_SW_LKUP_PROMISC_VLAN: + ice_remove_promisc(hw, lkup, &remove_list_head); + break; case ICE_SW_LKUP_MAC_VLAN: case ICE_SW_LKUP_ETHERTYPE: case ICE_SW_LKUP_ETHERTYPE_MAC: - case ICE_SW_LKUP_PROMISC: case ICE_SW_LKUP_DFLT: - case ICE_SW_LKUP_PROMISC_VLAN: case ICE_SW_LKUP_LAST: default: ice_debug(hw, ICE_DBG_SW, "Unsupported lookup type %d\n", lkup); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index d5ef0bd58bf9..e4ce0720b871 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -178,6 +178,17 @@ struct ice_fltr_mgmt_list_entry { u8 counter_index; }; +enum ice_promisc_flags { + ICE_PROMISC_UCAST_RX = 0x1, + ICE_PROMISC_UCAST_TX = 0x2, + ICE_PROMISC_MCAST_RX = 0x4, + ICE_PROMISC_MCAST_TX = 0x8, + ICE_PROMISC_BCAST_RX = 0x10, + ICE_PROMISC_BCAST_TX = 0x20, + ICE_PROMISC_VLAN_RX = 0x40, + ICE_PROMISC_VLAN_TX = 0x80, +}; + /* VSI related commands */ enum ice_status ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx, @@ -199,10 +210,22 @@ enum ice_status ice_update_sw_rule_bridge_mode(struct ice_hw *hw); enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst); enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst); void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle); -enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); +enum ice_status +ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); enum ice_status ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list); + +/* Promisc/defport setup for VSIs */ enum ice_status ice_cfg_dflt_vsi(struct ice_hw *hw, u16 vsi_handle, bool set, u8 direction); +enum ice_status +ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, + u16 vid); +enum ice_status +ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, + u16 vid); +enum ice_status +ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, + bool rm_vlan_promisc); enum ice_status ice_init_def_sw_recp(struct ice_hw *hw); u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index c289d97f477d..f2462799154a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -100,8 +100,8 @@ void ice_free_tx_ring(struct ice_ring *tx_ring) * * Returns true if there's any budget left (e.g. the clean is finished) */ -static bool ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, - int napi_budget) +static bool +ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget) { unsigned int total_bytes = 0, total_pkts = 0; unsigned int budget = vsi->work_lmt; @@ -236,9 +236,9 @@ int ice_setup_tx_ring(struct ice_ring *tx_ring) if (!tx_ring->tx_buf) return -ENOMEM; - /* round up to nearest 4K */ + /* round up to nearest page */ tx_ring->size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), - 4096); + PAGE_SIZE); tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) { @@ -282,8 +282,17 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) if (!rx_buf->page) continue; - dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE); - __free_pages(rx_buf->page, 0); + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(dev, rx_buf->dma, + rx_buf->page_offset, + ICE_RXBUF_2048, DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE, + DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); rx_buf->page = NULL; rx_buf->page_offset = 0; @@ -339,9 +348,9 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring) if (!rx_ring->rx_buf) return -ENOMEM; - /* round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc); - rx_ring->size = ALIGN(rx_ring->size, 4096); + /* round up to nearest page */ + rx_ring->size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), + PAGE_SIZE); rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { @@ -389,8 +398,8 @@ static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) * Returns true if the page was successfully allocated or * reused. */ -static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, - struct ice_rx_buf *bi) +static bool +ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) { struct page *page = bi->page; dma_addr_t dma; @@ -409,7 +418,8 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use @@ -423,6 +433,8 @@ static bool ice_alloc_mapped_page(struct ice_ring *rx_ring, bi->dma = dma; bi->page = page; bi->page_offset = 0; + page_ref_add(page, USHRT_MAX - 1); + bi->pagecnt_bias = USHRT_MAX; return true; } @@ -452,6 +464,12 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) if (!ice_alloc_mapped_page(rx_ring, bi)) goto no_bufs; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, + ICE_RXBUF_2048, + DMA_FROM_DEVICE); + /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ @@ -497,61 +515,43 @@ static bool ice_page_is_reserved(struct page *page) } /** - * ice_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_buf: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware - * @skb: sk_buf to place the data into + * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse + * @rx_buf: Rx buffer to adjust + * @size: Size of adjustment * - * This function will add the data contained in rx_buf->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. + * Update the offset within page so that Rx buf will be ready to be reused. + * For systems with PAGE_SIZE < 8192 this function will flip the page offset + * so the second half of page assigned to Rx buffer will be used, otherwise + * the offset is moved by the @size bytes */ -static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf, - union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb) +static void +ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) { #if (PAGE_SIZE < 8192) - unsigned int truesize = ICE_RXBUF_2048; + /* flip page offset to other buffer */ + rx_buf->page_offset ^= size; #else - unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; - unsigned int truesize; -#endif /* PAGE_SIZE < 8192) */ - - struct page *page; - unsigned int size; - - size = le16_to_cpu(rx_desc->wb.pkt_len) & - ICE_RX_FLX_DESC_PKT_LEN_M; - - page = rx_buf->page; + /* move offset up to the next cache line */ + rx_buf->page_offset += size; +#endif +} +/** + * ice_can_reuse_rx_page - Determine if page can be reused for another Rx + * @rx_buf: buffer containing the page + * + * If page is reusable, we have a green light for calling ice_reuse_rx_page, + * which will assign the current buffer to the buffer that next_to_alloc is + * pointing to; otherwise, the DMA mapping needs to be destroyed and + * page freed + */ +static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) +{ #if (PAGE_SIZE >= 8192) - truesize = ALIGN(size, L1_CACHE_BYTES); -#endif /* PAGE_SIZE >= 8192) */ - - /* will the data fit in the skb we allocated? if so, just - * copy it as it is pretty small anyway - */ - if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buf->page_offset; - - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!ice_page_is_reserved(page))) - return true; - - /* this page cannot be reused so discard it */ - __free_pages(page, 0); - return false; - } - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buf->page_offset, size, truesize); + unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048; +#endif + unsigned int pagecnt_bias = rx_buf->pagecnt_bias; + struct page *page = rx_buf->page; /* avoid re-using remote pages */ if (unlikely(ice_page_is_reserved(page))) @@ -559,36 +559,61 @@ static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf, #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) + if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; - - /* flip page offset to other buffer */ - rx_buf->page_offset ^= truesize; #else - /* move offset up to the next cache line */ - rx_buf->page_offset += truesize; - if (rx_buf->page_offset > last_offset) return false; #endif /* PAGE_SIZE < 8192) */ - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. */ - get_page(rx_buf->page); + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX - 1); + rx_buf->pagecnt_bias = USHRT_MAX; + } return true; } /** + * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag + * @rx_buf: buffer containing page to add + * @skb: sk_buff to place the data into + * @size: packet length from rx_desc + * + * This function will add the data contained in rx_buf->page to the skb. + * It will just attach the page as a frag to the skb. + * The function will then update the page offset. + */ +static void +ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size) +{ +#if (PAGE_SIZE >= 8192) + unsigned int truesize = SKB_DATA_ALIGN(size); +#else + unsigned int truesize = ICE_RXBUF_2048; +#endif + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, + rx_buf->page_offset, size, truesize); + + /* page is being used so we must update the page offset */ + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); +} + +/** * ice_reuse_rx_page - page flip buffer and store it back on the ring * @rx_ring: Rx descriptor ring to store buffers on * @old_buf: donor buffer to have page reused * * Synchronizes page for reuse by the adapter */ -static void ice_reuse_rx_page(struct ice_ring *rx_ring, - struct ice_rx_buf *old_buf) +static void +ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) { u16 nta = rx_ring->next_to_alloc; struct ice_rx_buf *new_buf; @@ -599,121 +624,132 @@ static void ice_reuse_rx_page(struct ice_ring *rx_ring, nta++; rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - /* transfer page from old buffer to new buffer */ - *new_buf = *old_buf; + /* Transfer page from old buffer to new buffer. + * Move each member individually to avoid possible store + * forwarding stalls and unnecessary copy of skb. + */ + new_buf->dma = old_buf->dma; + new_buf->page = old_buf->page; + new_buf->page_offset = old_buf->page_offset; + new_buf->pagecnt_bias = old_buf->pagecnt_bias; } /** - * ice_fetch_rx_buf - Allocate skb and populate it + * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use * @rx_ring: Rx descriptor ring to transact packets on - * @rx_desc: descriptor containing info written by hardware + * @skb: skb to be used + * @size: size of buffer to add to skb * - * This function allocates an skb on the fly, and populates it with the page - * data from the current receive descriptor, taking care to set up the skb - * correctly, as well as handling calling the page recycle function if - * necessary. + * This function will pull an Rx buffer from the ring and synchronize it + * for use by the CPU. */ -static struct sk_buff *ice_fetch_rx_buf(struct ice_ring *rx_ring, - union ice_32b_rx_flex_desc *rx_desc) +static struct ice_rx_buf * +ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, + const unsigned int size) { struct ice_rx_buf *rx_buf; - struct sk_buff *skb; - struct page *page; rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; - page = rx_buf->page; - prefetchw(page); + prefetchw(rx_buf->page); + *skb = rx_buf->skb; + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, + rx_buf->page_offset, size, + DMA_FROM_DEVICE); - skb = rx_buf->skb; + /* We have pulled a buffer for use, so decrement pagecnt_bias */ + rx_buf->pagecnt_bias--; - if (likely(!skb)) { - u8 *page_addr = page_address(page) + rx_buf->page_offset; + return rx_buf; +} - /* prefetch first cache line of first page */ - prefetch(page_addr); +/** + * ice_construct_skb - Allocate skb and populate it + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buf: Rx buffer to pull data from + * @size: the length of the packet + * + * This function allocates an skb. It then populates it with the page + * data from the current receive descriptor, taking care to set up the + * skb correctly. + */ +static struct sk_buff * +ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + unsigned int size) +{ + void *va = page_address(rx_buf->page) + rx_buf->page_offset; + unsigned int headlen; + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); #if L1_CACHE_BYTES < 128 - prefetch((void *)(page_addr + L1_CACHE_BYTES)); + prefetch((u8 *)va + L1_CACHE_BYTES); #endif /* L1_CACHE_BYTES */ - /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - ICE_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_buf_failed++; - return NULL; - } - - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; - skb_record_rx_queue(skb, rx_ring->q_index); - } else { - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, - rx_buf->page_offset, - ICE_RXBUF_2048, - DMA_FROM_DEVICE); + skb_record_rx_queue(skb, rx_ring->q_index); + /* Determine available headroom for copy */ + headlen = size; + if (headlen > ICE_RX_HDR_SIZE) + headlen = eth_get_headlen(va, ICE_RX_HDR_SIZE); - rx_buf->skb = NULL; - } + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); - /* pull page into skb */ - if (ice_add_rx_frag(rx_buf, rx_desc, skb)) { - /* hand second half of page back to the ring */ - ice_reuse_rx_page(rx_ring, rx_buf); - rx_ring->rx_stats.page_reuse_count++; + /* if we exhaust the linear part then add what is left as a frag */ + size -= headlen; + if (size) { +#if (PAGE_SIZE >= 8192) + unsigned int truesize = SKB_DATA_ALIGN(size); +#else + unsigned int truesize = ICE_RXBUF_2048; +#endif + skb_add_rx_frag(skb, 0, rx_buf->page, + rx_buf->page_offset + headlen, size, truesize); + /* buffer is used by skb, update page_offset */ + ice_rx_buf_adjust_pg_offset(rx_buf, truesize); } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE, - DMA_FROM_DEVICE); + /* buffer is unused, reset bias back to rx_buf; data was copied + * onto skb's linear part so there's no need for adjusting + * page offset and we can reuse this buffer as-is + */ + rx_buf->pagecnt_bias++; } - /* clear contents of buffer_info */ - rx_buf->page = NULL; - return skb; } /** - * ice_pull_tail - ice specific version of skb_pull_tail - * @skb: pointer to current skb being adjusted + * ice_put_rx_buf - Clean up used buffer and either recycle or free + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buf: Rx buffer to pull data from * - * This function is an ice specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. + * This function will clean up the contents of the rx_buf. It will + * either recycle the buffer or unmap it and free the associated resources. */ -static void ice_pull_tail(struct sk_buff *skb) +static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned int pull_len; - unsigned char *va; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + /* hand second half of page back to the ring */ + if (ice_can_reuse_rx_page(rx_buf)) { + ice_reuse_rx_page(rx_ring, rx_buf); + rx_ring->rx_stats.page_reuse_count++; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE, + DMA_FROM_DEVICE, ICE_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias); + } - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; + /* clear contents of buffer_info */ + rx_buf->page = NULL; + rx_buf->skb = NULL; } /** @@ -730,10 +766,6 @@ static void ice_pull_tail(struct sk_buff *skb) */ static bool ice_cleanup_headers(struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - ice_pull_tail(skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -751,8 +783,8 @@ static bool ice_cleanup_headers(struct sk_buff *skb) * The status_error_len doesn't need to be shifted because it begins * at offset zero. */ -static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, - const u16 stat_err_bits) +static bool +ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits) { return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits)); @@ -769,9 +801,9 @@ static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, * sk_buff in the next buffer to be chained and return true indicating * that this is in fact a non-EOP buffer. */ -static bool ice_is_non_eop(struct ice_ring *rx_ring, - union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb) +static bool +ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb) { u32 ntc = rx_ring->next_to_clean + 1; @@ -838,8 +870,9 @@ ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, * * skb->protocol must be set before this function is called */ -static void ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb, - union ice_32b_rx_flex_desc *rx_desc, u8 ptype) +static void +ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb, + union ice_32b_rx_flex_desc *rx_desc, u8 ptype) { struct ice_rx_ptype_decoded decoded; u32 rx_error, rx_status; @@ -909,9 +942,10 @@ checksum_fail: * order to populate the hash, checksum, VLAN, protocol, and * other fields within the skb. */ -static void ice_process_skb_fields(struct ice_ring *rx_ring, - union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u8 ptype) +static void +ice_process_skb_fields(struct ice_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u8 ptype) { ice_rx_hash(rx_ring, rx_desc, skb, ptype); @@ -930,13 +964,12 @@ static void ice_process_skb_fields(struct ice_ring *rx_ring, * This function sends the completed packet (via. skb) up the stack using * gro receive functions (with/without vlan tag) */ -static void ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, - u16 vlan_tag) +static void +ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) { if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && - (vlan_tag & VLAN_VID_MASK)) { + (vlan_tag & VLAN_VID_MASK)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - } napi_gro_receive(&rx_ring->q_vector->napi, skb); } @@ -961,7 +994,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) /* start the loop to process RX packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; + struct ice_rx_buf *rx_buf; struct sk_buff *skb; + unsigned int size; u16 stat_err_bits; u16 vlan_tag = 0; u8 rx_ptype; @@ -991,11 +1026,24 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) */ dma_rmb(); + size = le16_to_cpu(rx_desc->wb.pkt_len) & + ICE_RX_FLX_DESC_PKT_LEN_M; + + rx_buf = ice_get_rx_buf(rx_ring, &skb, size); /* allocate (if needed) and populate skb */ - skb = ice_fetch_rx_buf(rx_ring, rx_desc); - if (!skb) + if (skb) + ice_add_rx_frag(rx_buf, skb, size); + else + skb = ice_construct_skb(rx_ring, rx_buf, size); + + /* exit if we failed to retrieve a buffer */ + if (!skb) { + rx_ring->rx_stats.alloc_buf_failed++; + rx_buf->pagecnt_bias++; break; + } + ice_put_rx_buf(rx_ring, rx_buf); cleaned_count++; /* skip if it is NOP desc */ @@ -1048,18 +1096,257 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_pkts; } +static unsigned int ice_itr_divisor(struct ice_port_info *pi) +{ + switch (pi->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_40GB: + return ICE_ITR_ADAPTIVE_MIN_INC * 1024; + case ICE_AQ_LINK_SPEED_25GB: + case ICE_AQ_LINK_SPEED_20GB: + return ICE_ITR_ADAPTIVE_MIN_INC * 512; + case ICE_AQ_LINK_SPEED_100MB: + return ICE_ITR_ADAPTIVE_MIN_INC * 32; + default: + return ICE_ITR_ADAPTIVE_MIN_INC * 256; + } +} + +/** + * ice_update_itr - update the adaptive ITR value based on statistics + * @q_vector: structure containing interrupt and ring information + * @rc: structure containing ring performance data + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + */ +static void +ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc) +{ + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; + bool container_is_rx; + + if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting)) + return; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + container_is_rx = (&q_vector->rx == rc); + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = container_is_rx ? + ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY : + ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + packets = rc->total_pkts; + bytes = rc->total_bytes; + + if (container_is_rx) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) { + itr = ICE_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & ICE_ITR_MASK) == + ICE_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY; + } + + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. + */ + if (packets < 56) { + itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC; + if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { + itr &= ICE_ITR_ADAPTIVE_LATENCY; + itr += ICE_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; + } + + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= ICE_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr >>= 1; + itr &= ICE_ITR_MASK; + if (itr < ICE_ITR_ADAPTIVE_MIN_USECS) + itr = ICE_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = ICE_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * gives the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. + */ + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; + } + + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & ICE_ITR_ADAPTIVE_LATENCY) + avg_wire_size >>= 1; + + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, + ice_itr_divisor(q_vector->vsi->port_info)) * + ICE_ITR_ADAPTIVE_MIN_INC; + + if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { + itr &= ICE_ITR_ADAPTIVE_LATENCY; + itr += ICE_ITR_ADAPTIVE_MAX_USECS; + } + +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + + rc->total_bytes = 0; + rc->total_pkts = 0; +} + /** * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register * @itr_idx: interrupt throttling index - * @reg_itr: interrupt throttling value adjusted based on ITR granularity + * @itr: interrupt throttling value in usecs */ -static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr) +static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) { + /* The itr value is reported in microseconds, and the register value is + * recorded in 2 microsecond units. For this reason we only need to + * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this + * granularity as a shift instead of division. The mask makes sure the + * ITR value is never odd so we don't accidentally write into the field + * prior to the ITR field. + */ + itr &= ICE_ITR_MASK; + return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | (itr_idx << GLINT_DYN_CTL_ITR_INDX_S) | - (reg_itr << GLINT_DYN_CTL_INTERVAL_S); + (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)); } +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 + /** * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt * @vsi: the VSI associated with the q_vector @@ -1068,10 +1355,14 @@ static u32 ice_buildreg_itr(int itr_idx, u16 reg_itr) static void ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) { - struct ice_hw *hw = &vsi->back->hw; - struct ice_ring_container *rc; + struct ice_ring_container *tx = &q_vector->tx; + struct ice_ring_container *rx = &q_vector->rx; u32 itr_val; + /* This will do nothing if dynamic updates are not enabled */ + ice_update_itr(q_vector, tx); + ice_update_itr(q_vector, rx); + /* This block of logic allows us to get away with only updating * one ITR value with each interrupt. The idea is to perform a * pseudo-lazy update with the following criteria. @@ -1080,35 +1371,36 @@ ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector) * 2. If we must reduce an ITR that is given highest priority. * 3. We then give priority to increasing ITR based on amount. */ - if (q_vector->rx.target_itr < q_vector->rx.current_itr) { - rc = &q_vector->rx; + if (rx->target_itr < rx->current_itr) { /* Rx ITR needs to be reduced, this is highest priority */ - itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); - rc->current_itr = rc->target_itr; - } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || - ((q_vector->rx.target_itr - q_vector->rx.current_itr) < - (q_vector->tx.target_itr - q_vector->tx.current_itr))) { - rc = &q_vector->tx; + itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); + rx->current_itr = rx->target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((tx->target_itr < tx->current_itr) || + ((rx->target_itr - rx->current_itr) < + (tx->target_itr - tx->current_itr))) { /* Tx ITR needs to be reduced, this is second priority * Tx ITR needs to be increased more than Rx, fourth priority */ - itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); - rc->current_itr = rc->target_itr; - } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { - rc = &q_vector->rx; + itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr); + tx->current_itr = tx->target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (rx->current_itr != rx->target_itr) { /* Rx ITR needs to be increased, third priority */ - itr_val = ice_buildreg_itr(rc->itr_idx, rc->target_itr); - rc->current_itr = rc->target_itr; + itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); + rx->current_itr = rx->target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; } else { /* Still have to re-enable the interrupts */ itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } - if (!test_bit(__ICE_DOWN, vsi->state)) { - int vector = vsi->hw_base_vector + q_vector->v_idx; - - wr32(hw, GLINT_DYN_CTL(vector), itr_val); - } + if (!test_bit(__ICE_DOWN, vsi->state)) + wr32(&vsi->back->hw, + GLINT_DYN_CTL(vsi->hw_base_vector + q_vector->v_idx), + itr_val); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index fc358ea81816..60131b84b021 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -47,6 +47,9 @@ #define ICE_TX_FLAGS_VLAN_M 0xffff0000 #define ICE_TX_FLAGS_VLAN_S 16 +#define ICE_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + struct ice_tx_buf { struct ice_tx_desc *next_to_watch; struct sk_buff *skb; @@ -73,6 +76,7 @@ struct ice_rx_buf { dma_addr_t dma; struct page *page; unsigned int page_offset; + u16 pagecnt_bias; }; struct ice_q_stats { @@ -124,10 +128,17 @@ enum ice_rx_dtype { #define ICE_ITR_DYNAMIC 0x8000 /* used as flag for itr_setting */ #define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC)) #define ITR_TO_REG(setting) ((setting) & ~ICE_ITR_DYNAMIC) -#define ICE_ITR_GRAN_S 1 /* Assume ITR granularity is 2us */ +#define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */ +#define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S) #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ #define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~ICE_ITR_MASK) +#define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 +#define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define ICE_ITR_ADAPTIVE_MAX_USECS 0x00FA +#define ICE_ITR_ADAPTIVE_LATENCY 0x8000 +#define ICE_ITR_ADAPTIVE_BULK 0x0000 + #define ICE_DFLT_INTRL 0 /* Legacy or Advanced Mode Queue */ @@ -173,21 +184,13 @@ struct ice_ring { u16 next_to_alloc; } ____cacheline_internodealigned_in_smp; -enum ice_latency_range { - ICE_LOWEST_LATENCY = 0, - ICE_LOW_LATENCY = 1, - ICE_BULK_LATENCY = 2, - ICE_ULTRA_LATENCY = 3, -}; - struct ice_ring_container { /* head of linked-list of rings */ struct ice_ring *ring; unsigned long next_update; /* jiffies value of next queue update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_pkts; /* total packets processed this int */ - enum ice_latency_range latency_range; - int itr_idx; /* index in the interrupt vector */ + u16 itr_idx; /* index in the interrupt vector */ u16 target_itr; /* value in usecs divided by the hw->itr_gran */ u16 current_itr; /* value in usecs divided by the hw->itr_gran */ /* high bit set means dynamic ITR, rest is used to store user diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 17086d5b5c33..3a4e67484487 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -24,6 +24,7 @@ static inline bool ice_is_tc_ena(u8 bitmap, u8 tc) /* debug masks - set these bits in hw->debug_mask to control output */ #define ICE_DBG_INIT BIT_ULL(1) #define ICE_DBG_LINK BIT_ULL(4) +#define ICE_DBG_PHY BIT_ULL(5) #define ICE_DBG_QCTX BIT_ULL(6) #define ICE_DBG_NVM BIT_ULL(7) #define ICE_DBG_LAN BIT_ULL(8) @@ -209,6 +210,9 @@ struct ice_nvm_info { #define ICE_MAX_TRAFFIC_CLASS 8 #define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS +#define ice_for_each_traffic_class(_i) \ + for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++) + struct ice_sched_node { struct ice_sched_node *parent; struct ice_sched_node *sibling; /* next sibling in the same layer */ @@ -247,7 +251,6 @@ struct ice_sched_vsi_info { struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS]; struct list_head list_entry; u16 max_lanq[ICE_MAX_TRAFFIC_CLASS]; - u16 vsi_id; }; /* driver defines the policy */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 57155b4a59dc..84e51a0a0795 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -5,6 +5,37 @@ #include "ice_lib.h" /** + * ice_err_to_virt err - translate errors for VF return code + * @ice_err: error return code + */ +static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err) +{ + switch (ice_err) { + case ICE_SUCCESS: + return VIRTCHNL_STATUS_SUCCESS; + case ICE_ERR_BAD_PTR: + case ICE_ERR_INVAL_SIZE: + case ICE_ERR_DEVICE_NOT_SUPPORTED: + case ICE_ERR_PARAM: + case ICE_ERR_CFG: + return VIRTCHNL_STATUS_ERR_PARAM; + case ICE_ERR_NO_MEMORY: + return VIRTCHNL_STATUS_ERR_NO_MEMORY; + case ICE_ERR_NOT_READY: + case ICE_ERR_RESET_FAILED: + case ICE_ERR_FW_API_VER: + case ICE_ERR_AQ_ERROR: + case ICE_ERR_AQ_TIMEOUT: + case ICE_ERR_AQ_FULL: + case ICE_ERR_AQ_NO_WORK: + case ICE_ERR_AQ_EMPTY: + return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; + default: + return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + } +} + +/** * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF * @pf: pointer to the PF structure * @v_opcode: operation code @@ -14,7 +45,7 @@ */ static void ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode, - enum ice_status v_retval, u8 *msg, u16 msglen) + enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { struct ice_hw *hw = &pf->hw; struct ice_vf *vf = pf->vf; @@ -104,7 +135,8 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf) ice_set_pfe_link(vf, &pfe, ls->link_speed, ls->link_info & ICE_AQ_LINK_UP); - ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, 0, (u8 *)&pfe, + ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, + VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe), NULL); } @@ -343,11 +375,41 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr) } /** - * ice_vsi_set_pvid - Set port VLAN id for the VSI - * @vsi: the VSI being changed + * ice_vsi_set_pvid_fill_ctxt - Set VSI ctxt for add pvid + * @ctxt: the vsi ctxt to fill * @vid: the VLAN id to set as a PVID */ -static int ice_vsi_set_pvid(struct ice_vsi *vsi, u16 vid) +static void ice_vsi_set_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt, u16 vid) +{ + ctxt->info.vlan_flags = (ICE_AQ_VSI_VLAN_MODE_UNTAGGED | + ICE_AQ_VSI_PVLAN_INSERT_PVID | + ICE_AQ_VSI_VLAN_EMOD_STR); + ctxt->info.pvid = cpu_to_le16(vid); + ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID | + ICE_AQ_VSI_PROP_SW_VALID); +} + +/** + * ice_vsi_kill_pvid_fill_ctxt - Set VSI ctx for remove pvid + * @ctxt: the VSI ctxt to fill + */ +static void ice_vsi_kill_pvid_fill_ctxt(struct ice_vsi_ctx *ctxt) +{ + ctxt->info.vlan_flags = ICE_AQ_VSI_VLAN_EMOD_NOTHING; + ctxt->info.vlan_flags |= ICE_AQ_VSI_VLAN_MODE_ALL; + ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID | + ICE_AQ_VSI_PROP_SW_VALID); +} + +/** + * ice_vsi_manage_pvid - Enable or disable port VLAN for VSI + * @vsi: the VSI to update + * @vid: the VLAN id to set as a PVID + * @enable: true for enable pvid false for disable + */ +static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) { struct device *dev = &vsi->back->pdev->dev; struct ice_hw *hw = &vsi->back->hw; @@ -359,46 +421,27 @@ static int ice_vsi_set_pvid(struct ice_vsi *vsi, u16 vid) if (!ctxt) return -ENOMEM; - ctxt->info.vlan_flags = (ICE_AQ_VSI_VLAN_MODE_UNTAGGED | - ICE_AQ_VSI_PVLAN_INSERT_PVID | - ICE_AQ_VSI_VLAN_EMOD_STR); - ctxt->info.pvid = cpu_to_le16(vid); - ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID); + ctxt->info = vsi->info; + if (enable) + ice_vsi_set_pvid_fill_ctxt(ctxt, vid); + else + ice_vsi_kill_pvid_fill_ctxt(ctxt); status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_info(dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", + dev_info(dev, "update VSI for port VLAN failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; } - vsi->info.pvid = ctxt->info.pvid; - vsi->info.vlan_flags = ctxt->info.vlan_flags; + vsi->info = ctxt->info; out: devm_kfree(dev, ctxt); return ret; } /** - * ice_vsi_kill_pvid - Remove port VLAN id from the VSI - * @vsi: the VSI being changed - */ -static int ice_vsi_kill_pvid(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - - if (ice_vsi_manage_vlan_stripping(vsi, false)) { - dev_err(&pf->pdev->dev, "Error removing Port VLAN on VSI %i\n", - vsi->vsi_num); - return -ENODEV; - } - - vsi->info.pvid = 0; - return 0; -} - -/** * ice_vf_vsi_setup - Set up a VF VSI * @pf: board private structure * @pi: pointer to the port_info instance @@ -446,8 +489,10 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) vsi->hw_base_vector += 1; /* Check if port VLAN exist before, and restore it accordingly */ - if (vf->port_vlan_id) - ice_vsi_set_pvid(vsi, vf->port_vlan_id); + if (vf->port_vlan_id) { + ice_vsi_manage_pvid(vsi, vf->port_vlan_id, true); + ice_vsi_add_vlan(vsi, vf->port_vlan_id & ICE_VLAN_M); + } eth_broadcast_addr(broadcast); @@ -484,6 +529,8 @@ ice_alloc_vsi_res_exit: */ static int ice_alloc_vf_res(struct ice_vf *vf) { + struct ice_pf *pf = vf->pf; + int tx_rx_queue_left; int status; /* setup VF VSI and necessary resources */ @@ -491,6 +538,15 @@ static int ice_alloc_vf_res(struct ice_vf *vf) if (status) goto ice_alloc_vf_res_exit; + /* Update number of VF queues, in case VF had requested for queue + * changes + */ + tx_rx_queue_left = min_t(int, pf->q_left_tx, pf->q_left_rx); + tx_rx_queue_left += ICE_DFLT_QS_PER_VF; + if (vf->num_req_qs && vf->num_req_qs <= tx_rx_queue_left && + vf->num_req_qs != vf->num_vf_qs) + vf->num_vf_qs = vf->num_req_qs; + if (vf->trusted) set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); else @@ -548,6 +604,10 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) wr32(hw, GLINT_VECT2FUNC(v), reg); } + /* Map mailbox interrupt. We put an explicit 0 here to remind us that + * VF admin queue interrupts will go to VF MSI-X vector 0. + */ + wr32(hw, VPINT_MBX_CTL(abs_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M | 0); /* set regardless of mapping mode */ wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M); @@ -750,6 +810,47 @@ static void ice_cleanup_and_realloc_vf(struct ice_vf *vf) } /** + * ice_vf_set_vsi_promisc - set given VF VSI to given promiscuous mode(s) + * @vf: pointer to the VF info + * @vsi: the VSI being configured + * @promisc_m: mask of promiscuous config bits + * @rm_promisc: promisc flag request from the VF to remove or add filter + * + * This function configures VF VSI promiscuous mode, based on the VF requests, + * for Unicast, Multicast and VLAN + */ +static enum ice_status +ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m, + bool rm_promisc) +{ + struct ice_pf *pf = vf->pf; + enum ice_status status = 0; + struct ice_hw *hw; + + hw = &pf->hw; + if (vf->num_vlan) { + status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m, + rm_promisc); + } else if (vf->port_vlan_id) { + if (rm_promisc) + status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m, + vf->port_vlan_id); + else + status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m, + vf->port_vlan_id); + } else { + if (rm_promisc) + status = ice_clear_vsi_promisc(hw, vsi->idx, promisc_m, + 0); + else + status = ice_set_vsi_promisc(hw, vsi->idx, promisc_m, + 0); + } + + return status; +} + +/** * ice_reset_all_vfs - reset all allocated VFs in one go * @pf: pointer to the PF structure * @is_vflr: true if VFLR was issued, false if not @@ -764,6 +865,7 @@ static void ice_cleanup_and_realloc_vf(struct ice_vf *vf) bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) { struct ice_hw *hw = &pf->hw; + struct ice_vf *vf; int v, i; /* If we don't have any VFs, then there is nothing to reset */ @@ -778,12 +880,17 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) for (v = 0; v < pf->num_alloc_vfs; v++) ice_trigger_vf_reset(&pf->vf[v], is_vflr); - /* Call Disable LAN Tx queue AQ call with VFR bit set and 0 - * queues to inform Firmware about VF reset. - */ - for (v = 0; v < pf->num_alloc_vfs; v++) - ice_dis_vsi_txq(pf->vsi[0]->port_info, 0, NULL, NULL, - ICE_VF_RESET, v, NULL); + for (v = 0; v < pf->num_alloc_vfs; v++) { + struct ice_vsi *vsi; + + vf = &pf->vf[v]; + vsi = pf->vsi[vf->lan_vsi_idx]; + if (test_bit(ICE_VF_STATE_ENA, vf->vf_states)) { + ice_vsi_stop_lan_tx_rings(vsi, ICE_VF_RESET, vf->vf_id); + ice_vsi_stop_rx_rings(vsi); + clear_bit(ICE_VF_STATE_ENA, vf->vf_states); + } + } /* HW requires some time to make sure it can flush the FIFO for a VF * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in @@ -796,9 +903,9 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) /* Check each VF in sequence */ while (v < pf->num_alloc_vfs) { - struct ice_vf *vf = &pf->vf[v]; u32 reg; + vf = &pf->vf[v]; reg = rd32(hw, VPGEN_VFRSTAT(vf->vf_id)); if (!(reg & VPGEN_VFRSTAT_VFRD_M)) break; @@ -818,8 +925,18 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) usleep_range(10000, 20000); /* free VF resources to begin resetting the VSI state */ - for (v = 0; v < pf->num_alloc_vfs; v++) - ice_free_vf_res(&pf->vf[v]); + for (v = 0; v < pf->num_alloc_vfs; v++) { + vf = &pf->vf[v]; + + ice_free_vf_res(vf); + + /* Free VF queues as well, and reallocate later. + * If a given VF has different number of queues + * configured, the request for update will come + * via mailbox communication. + */ + vf->num_vf_qs = 0; + } if (ice_check_avail_res(pf)) { dev_err(&pf->pdev->dev, @@ -828,8 +945,15 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) } /* Finish the reset on each VF */ - for (v = 0; v < pf->num_alloc_vfs; v++) - ice_cleanup_and_realloc_vf(&pf->vf[v]); + for (v = 0; v < pf->num_alloc_vfs; v++) { + vf = &pf->vf[v]; + + vf->num_vf_qs = pf->num_vf_qps; + dev_dbg(&pf->pdev->dev, + "VF-id %d has %d queues configured\n", + vf->vf_id, vf->num_vf_qs); + ice_cleanup_and_realloc_vf(vf); + } ice_flush(hw); clear_bit(__ICE_VF_DIS, pf->state); @@ -847,9 +971,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) { struct ice_pf *pf = vf->pf; - struct ice_hw *hw = &pf->hw; struct ice_vsi *vsi; + struct ice_hw *hw; bool rsd = false; + u8 promisc_m; u32 reg; int i; @@ -875,6 +1000,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) vf->vf_id, NULL); } + hw = &pf->hw; /* poll VPGEN_VFRSTAT reg to make sure * that reset is complete */ @@ -900,6 +1026,21 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) usleep_range(10000, 20000); + /* disable promiscuous modes in case they were enabled + * ignore any error if disabling process failed + */ + if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) { + if (vf->port_vlan_id || vf->num_vlan) + promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; + else + promisc_m = ICE_UCAST_PROMISC_BITS; + + vsi = pf->vsi[vf->lan_vsi_idx]; + if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true)) + dev_err(&pf->pdev->dev, "disabling promiscuous mode failed\n"); + } + /* free VF resources to begin resetting the VSI state */ ice_free_vf_res(vf); @@ -938,7 +1079,7 @@ void ice_vc_notify_reset(struct ice_pf *pf) pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM; - ice_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, ICE_SUCCESS, + ice_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(struct virtchnl_pf_event)); } @@ -961,8 +1102,9 @@ static void ice_vc_notify_vf_reset(struct ice_vf *vf) pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM; - ice_aq_send_msg_to_vf(&vf->pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT, 0, - (u8 *)&pfe, sizeof(pfe), NULL); + ice_aq_send_msg_to_vf(&vf->pf->hw, vf->vf_id, VIRTCHNL_OP_EVENT, + VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe), + NULL); } /** @@ -1012,7 +1154,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) pf->num_alloc_vfs = num_alloc_vfs; /* VF resources get allocated during reset */ - if (!ice_reset_all_vfs(pf, false)) + if (!ice_reset_all_vfs(pf, true)) goto err_unroll_sriov; goto err_unroll_intr; @@ -1182,8 +1324,9 @@ static void ice_vc_dis_vf(struct ice_vf *vf) * * send msg to VF */ -static int ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, - enum ice_status v_retval, u8 *msg, u16 msglen) +static int +ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, + enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { enum ice_status aq_ret; struct ice_pf *pf; @@ -1243,8 +1386,8 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg) if (VF_IS_V10(&vf->vf_ver)) info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS; - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION, ICE_SUCCESS, - (u8 *)&info, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION, + VIRTCHNL_STATUS_SUCCESS, (u8 *)&info, sizeof(struct virtchnl_version_info)); } @@ -1257,15 +1400,15 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg) */ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_vf_resource *vfres = NULL; - enum ice_status aq_ret = 0; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; int len = 0; int ret; if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto err; } @@ -1273,7 +1416,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres = devm_kzalloc(&pf->pdev->dev, len, GFP_KERNEL); if (!vfres) { - aq_ret = ICE_ERR_NO_MEMORY; + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; len = 0; goto err; } @@ -1286,6 +1429,11 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (!vsi->info.pvid) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; @@ -1336,7 +1484,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) err: /* send the response back to the VF */ - ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, aq_ret, + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret, (u8 *)vfres, len); devm_kfree(&pf->pdev->dev, vfres); @@ -1368,7 +1516,7 @@ static struct ice_vsi *ice_find_vsi_from_id(struct ice_pf *pf, u16 id) { int i; - for (i = 0; i < pf->num_alloc_vsi; i++) + ice_for_each_vsi(pf, i) if (pf->vsi[i] && pf->vsi[i]->vsi_num == id) return pf->vsi[i]; @@ -1416,42 +1564,42 @@ static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid) */ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; + struct ice_pf *pf = vf->pf; struct ice_vsi *vsi = NULL; - enum ice_status aq_ret; - int ret; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vsi = ice_find_vsi_from_id(vf->pf, vrk->vsi_id); + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - ret = ice_set_rss(vsi, vrk->key, NULL, 0); - aq_ret = ret ? ICE_ERR_PARAM : ICE_SUCCESS; + if (ice_set_rss(vsi, vrk->key, NULL, 0)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; error_param: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret, NULL, 0); } @@ -1465,40 +1613,40 @@ error_param: static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) { struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct ice_pf *pf = vf->pf; struct ice_vsi *vsi = NULL; - enum ice_status aq_ret; - int ret; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vsi = ice_find_vsi_from_id(vf->pf, vrl->vsi_id); + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (vrl->lut_entries != ICE_VSIQF_HLUT_ARRAY_SIZE) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - ret = ice_set_rss(vsi, NULL, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE); - aq_ret = ret ? ICE_ERR_PARAM : ICE_SUCCESS; + if (ice_set_rss(vsi, NULL, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; error_param: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret, NULL, 0); } @@ -1511,25 +1659,26 @@ error_param: */ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; - enum ice_status aq_ret = 0; + struct ice_pf *pf = vf->pf; struct ice_eth_stats stats; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vsi = ice_find_vsi_from_id(vf->pf, vqs->vsi_id); + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1540,7 +1689,7 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg) error_param: /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, v_ret, (u8 *)&stats, sizeof(stats)); } @@ -1553,29 +1702,30 @@ error_param: */ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; - enum ice_status aq_ret = 0; + struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!vqs->rx_queues && !vqs->tx_queues) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vsi = ice_find_vsi_from_id(vf->pf, vqs->vsi_id); + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1584,15 +1734,15 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) * programmed using ice_vsi_cfg_txqs */ if (ice_vsi_start_rx_rings(vsi)) - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; /* Set flag to indicate that queues are enabled */ - if (!aq_ret) + if (v_ret == VIRTCHNL_STATUS_SUCCESS) set_bit(ICE_VF_STATE_ENA, vf->vf_states); error_param: /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret, NULL, 0); } @@ -1606,30 +1756,31 @@ error_param: */ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; - enum ice_status aq_ret = 0; + struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) && !test_bit(ICE_VF_STATE_ENA, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!vqs->rx_queues && !vqs->tx_queues) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vsi = ice_find_vsi_from_id(vf->pf, vqs->vsi_id); + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1637,23 +1788,23 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) dev_err(&vsi->back->pdev->dev, "Failed to stop tx rings on VSI %d\n", vsi->vsi_num); - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; } if (ice_vsi_stop_rx_rings(vsi)) { dev_err(&vsi->back->pdev->dev, "Failed to stop rx rings on VSI %d\n", vsi->vsi_num); - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; } /* Clear enabled queues flag */ - if (!aq_ret) + if (v_ret == VIRTCHNL_STATUS_SUCCESS) clear_bit(ICE_VF_STATE_ENA, vf->vf_states); error_param: /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret, NULL, 0); } @@ -1666,18 +1817,18 @@ error_param: */ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_irq_map_info *irqmap_info = (struct virtchnl_irq_map_info *)msg; u16 vsi_id, vsi_q_id, vector_id; struct virtchnl_vector_map *map; struct ice_vsi *vsi = NULL; struct ice_pf *pf = vf->pf; - enum ice_status aq_ret = 0; unsigned long qmap; int i; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1689,13 +1840,13 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) /* validate msg params */ if (!(vector_id < pf->hw.func_caps.common_cap .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vsi = ice_find_vsi_from_id(vf->pf, vsi_id); + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1705,7 +1856,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) struct ice_q_vector *q_vector; if (!ice_vc_isvalid_q_id(vf, vsi_id, vsi_q_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } q_vector = vsi->q_vectors[i]; @@ -1719,7 +1870,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) struct ice_q_vector *q_vector; if (!ice_vc_isvalid_q_id(vf, vsi_id, vsi_q_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } q_vector = vsi->q_vectors[i]; @@ -1733,7 +1884,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) ice_vsi_cfg_msix(vsi); error_param: /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret, NULL, 0); } @@ -1746,26 +1897,34 @@ error_param: */ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_vsi_queue_config_info *qci = (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; - enum ice_status aq_ret = 0; + struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; int i; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - vsi = ice_find_vsi_from_id(vf->pf, qci->vsi_id); + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { - aq_ret = ICE_ERR_PARAM; + goto error_param; + } + + if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF) { + dev_err(&pf->pdev->dev, + "VF-%d requesting more than supported number of queues: %d\n", + vf->vf_id, qci->num_queue_pairs); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1775,7 +1934,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) qpi->rxq.vsi_id != qci->vsi_id || qpi->rxq.queue_id != qpi->txq.queue_id || !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } /* copy Tx queue info from VF into VSI */ @@ -1785,13 +1944,13 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; vsi->rx_rings[i]->count = qpi->rxq.ring_len; if (qpi->rxq.databuffer_size > ((16 * 1024) - 128)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } vsi->rx_buf_len = qpi->rxq.databuffer_size; if (qpi->rxq.max_pkt_size >= (16 * 1024) || qpi->rxq.max_pkt_size < 64) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } vsi->max_frame = qpi->rxq.max_pkt_size; @@ -1802,15 +1961,16 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) */ vsi->num_txq = qci->num_queue_pairs; vsi->num_rxq = qci->num_queue_pairs; + /* All queues of VF VSI are in TC 0 */ + vsi->tc_cfg.tc_info[0].qcount_tx = qci->num_queue_pairs; + vsi->tc_cfg.tc_info[0].qcount_rx = qci->num_queue_pairs; - if (!ice_vsi_cfg_lan_txqs(vsi) && !ice_vsi_cfg_rxqs(vsi)) - aq_ret = 0; - else - aq_ret = ICE_ERR_PARAM; + if (ice_vsi_cfg_lan_txqs(vsi) || ice_vsi_cfg_rxqs(vsi)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; error_param: /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret, NULL, 0); } @@ -1852,11 +2012,11 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf) static int ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_ether_addr_list *al = (struct virtchnl_ether_addr_list *)msg; struct ice_pf *pf = vf->pf; enum virtchnl_ops vc_op; - enum ice_status ret; LIST_HEAD(mac_list); struct ice_vsi *vsi; int mac_count = 0; @@ -1869,19 +2029,27 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) { - ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto handle_mac_exit; } if (set && !ice_is_vf_trusted(vf) && (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) { dev_err(&pf->pdev->dev, - "Can't add more MAC addresses, because VF is not trusted, switch the VF to trusted mode in order to add more functionalities\n"); - ret = ICE_ERR_PARAM; + "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n", + vf->vf_id); + /* There is no need to let VF know about not being trusted + * to add more MAC addr, so we can just return success message. + */ + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto handle_mac_exit; } vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto handle_mac_exit; + } for (i = 0; i < al->num_elements; i++) { u8 *maddr = al->list[i].addr; @@ -1893,40 +2061,39 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) * already added. Just continue. */ dev_info(&pf->pdev->dev, - "mac %pM already set for VF %d\n", + "MAC %pM already set for VF %d\n", maddr, vf->vf_id); continue; } else { /* VF can't remove dflt_lan_addr/bcast mac */ dev_err(&pf->pdev->dev, - "can't remove mac %pM for VF %d\n", + "VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n", maddr, vf->vf_id); - ret = ICE_ERR_PARAM; - goto handle_mac_exit; + continue; } } /* check for the invalid cases and bail if necessary */ if (is_zero_ether_addr(maddr)) { dev_err(&pf->pdev->dev, - "invalid mac %pM provided for VF %d\n", + "invalid MAC %pM provided for VF %d\n", maddr, vf->vf_id); - ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto handle_mac_exit; } if (is_unicast_ether_addr(maddr) && !ice_can_vf_change_mac(vf)) { dev_err(&pf->pdev->dev, - "can't change unicast mac for untrusted VF %d\n", + "can't change unicast MAC for untrusted VF %d\n", vf->vf_id); - ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto handle_mac_exit; } /* get here if maddr is multicast or if VF can change mac */ if (ice_add_mac_to_list(vsi, &mac_list, al->list[i].addr)) { - ret = ICE_ERR_NO_MEMORY; + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; goto handle_mac_exit; } mac_count++; @@ -1934,14 +2101,14 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) /* program the updated filter list */ if (set) - ret = ice_add_mac(&pf->hw, &mac_list); + v_ret = ice_err_to_virt_err(ice_add_mac(&pf->hw, &mac_list)); else - ret = ice_remove_mac(&pf->hw, &mac_list); + v_ret = ice_err_to_virt_err(ice_remove_mac(&pf->hw, &mac_list)); - if (ret) { + if (v_ret) { dev_err(&pf->pdev->dev, - "can't update mac filters for VF %d, error %d\n", - vf->vf_id, ret); + "can't update MAC filters for VF %d, error %d\n", + vf->vf_id, v_ret); } else { if (set) vf->num_mac += mac_count; @@ -1952,7 +2119,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) handle_mac_exit: ice_free_fltr_list(&pf->pdev->dev, &mac_list); /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, vc_op, ret, NULL, 0); + return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0); } /** @@ -1991,35 +2158,38 @@ static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg) */ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_vf_res_request *vfres = (struct virtchnl_vf_res_request *)msg; int req_queues = vfres->num_queue_pairs; - enum ice_status aq_ret = 0; struct ice_pf *pf = vf->pf; + int max_allowed_vf_queues; int tx_rx_queue_left; int cur_queues; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - cur_queues = pf->num_vf_qps; + cur_queues = vf->num_vf_qs; tx_rx_queue_left = min_t(int, pf->q_left_tx, pf->q_left_rx); + max_allowed_vf_queues = tx_rx_queue_left + cur_queues; if (req_queues <= 0) { dev_err(&pf->pdev->dev, "VF %d tried to request %d queues. Ignoring.\n", vf->vf_id, req_queues); - } else if (req_queues > ICE_MAX_QS_PER_VF) { + } else if (req_queues > ICE_MAX_BASE_QS_PER_VF) { dev_err(&pf->pdev->dev, "VF %d tried to request more than %d queues.\n", - vf->vf_id, ICE_MAX_QS_PER_VF); - vfres->num_queue_pairs = ICE_MAX_QS_PER_VF; + vf->vf_id, ICE_MAX_BASE_QS_PER_VF); + vfres->num_queue_pairs = ICE_MAX_BASE_QS_PER_VF; } else if (req_queues - cur_queues > tx_rx_queue_left) { dev_warn(&pf->pdev->dev, "VF %d requested %d more queues, but only %d left.\n", vf->vf_id, req_queues - cur_queues, tx_rx_queue_left); - vfres->num_queue_pairs = tx_rx_queue_left + cur_queues; + vfres->num_queue_pairs = min_t(int, max_allowed_vf_queues, + ICE_MAX_BASE_QS_PER_VF); } else { /* request is successful, then reset VF */ vf->num_req_qs = req_queues; @@ -2033,7 +2203,7 @@ static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) error_param: /* send the response to the VF */ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, - aq_ret, (u8 *)vfres, sizeof(*vfres)); + v_ret, (u8 *)vfres, sizeof(*vfres)); } /** @@ -2093,11 +2263,12 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, VLAN_VID_MASK)); if (vlan_id || qos) { - ret = ice_vsi_set_pvid(vsi, vlanprio); + ret = ice_vsi_manage_pvid(vsi, vlanprio, true); if (ret) goto error_set_pvid; } else { - ice_vsi_kill_pvid(vsi); + ice_vsi_manage_pvid(vsi, 0, false); + vsi->info.pvid = 0; } if (vlan_id) { @@ -2129,48 +2300,57 @@ error_set_pvid: */ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) { + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_vlan_filter_list *vfl = (struct virtchnl_vlan_filter_list *)msg; - enum ice_status aq_ret = 0; struct ice_pf *pf = vf->pf; + bool vlan_promisc = false; struct ice_vsi *vsi; + struct ice_hw *hw; + int status = 0; + u8 promisc_m; int i; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (add_v && !ice_is_vf_trusted(vf) && vf->num_vlan >= ICE_MAX_VLAN_PER_VF) { dev_info(&pf->pdev->dev, - "VF is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n"); - aq_ret = ICE_ERR_PARAM; + "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n", + vf->vf_id); + /* There is no need to let VF know about being not trusted, + * so we can just return success message here + */ + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } for (i = 0; i < vfl->num_elements; i++) { if (vfl->vlan_id[i] > ICE_MAX_VLANID) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; dev_err(&pf->pdev->dev, "invalid VF VLAN id %d\n", vfl->vlan_id[i]); goto error_param; } } - vsi = ice_find_vsi_from_id(vf->pf, vfl->vsi_id); + hw = &pf->hw; + vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } if (vsi->info.pvid) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -2178,23 +2358,47 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) dev_err(&pf->pdev->dev, "%sable VLAN stripping failed for VSI %i\n", add_v ? "en" : "dis", vsi->vsi_num); - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } + if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + vlan_promisc = true; + if (add_v) { for (i = 0; i < vfl->num_elements; i++) { u16 vid = vfl->vlan_id[i]; - if (!ice_vsi_add_vlan(vsi, vid)) { - vf->num_vlan++; + if (ice_vsi_add_vlan(vsi, vid)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } - /* Enable VLAN pruning when VLAN 0 is added */ - if (unlikely(!vid)) - if (ice_cfg_vlan_pruning(vsi, true)) - aq_ret = ICE_ERR_PARAM; + vf->num_vlan++; + /* Enable VLAN pruning when VLAN is added */ + if (!vlan_promisc) { + status = ice_cfg_vlan_pruning(vsi, true, false); + if (status) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(&pf->pdev->dev, + "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", + vid, status); + goto error_param; + } } else { - aq_ret = ICE_ERR_PARAM; + /* Enable Ucast/Mcast VLAN promiscuous mode */ + promisc_m = ICE_PROMISC_VLAN_TX | + ICE_PROMISC_VLAN_RX; + + status = ice_set_vsi_promisc(hw, vsi->idx, + promisc_m, vid); + if (status) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(&pf->pdev->dev, + "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n", + vid, status); + } } } } else { @@ -2204,12 +2408,22 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) /* Make sure ice_vsi_kill_vlan is successful before * updating VLAN information */ - if (!ice_vsi_kill_vlan(vsi, vid)) { - vf->num_vlan--; + if (ice_vsi_kill_vlan(vsi, vid)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vf->num_vlan--; + /* Disable VLAN pruning when removing VLAN */ + ice_cfg_vlan_pruning(vsi, false, false); - /* Disable VLAN pruning when removing VLAN 0 */ - if (unlikely(!vid)) - ice_cfg_vlan_pruning(vsi, false); + /* Disable Unicast/Multicast VLAN promiscuous mode */ + if (vlan_promisc) { + promisc_m = ICE_PROMISC_VLAN_TX | + ICE_PROMISC_VLAN_RX; + + ice_clear_vsi_promisc(hw, vsi->idx, + promisc_m, vid); } } } @@ -2217,10 +2431,10 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) error_param: /* send the response to the VF */ if (add_v) - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, v_ret, NULL, 0); else - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, aq_ret, + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, v_ret, NULL, 0); } @@ -2256,22 +2470,22 @@ static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg) */ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf) { - enum ice_status aq_ret = 0; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } vsi = pf->vsi[vf->lan_vsi_idx]; if (ice_vsi_manage_vlan_stripping(vsi, true)) - aq_ret = ICE_ERR_AQ_ERROR; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, - aq_ret, NULL, 0); + v_ret, NULL, 0); } /** @@ -2282,22 +2496,27 @@ error_param: */ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf) { - enum ice_status aq_ret = 0; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - aq_ret = ICE_ERR_PARAM; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + if (ice_vsi_manage_vlan_stripping(vsi, false)) - aq_ret = ICE_ERR_AQ_ERROR; + v_ret = VIRTCHNL_STATUS_ERR_PARAM; error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, - aq_ret, NULL, 0); + v_ret, NULL, 0); } /** @@ -2333,7 +2552,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) /* Perform basic checks on the msg */ err = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen); if (err) { - if (err == VIRTCHNL_ERR_PARAM) + if (err == VIRTCHNL_STATUS_ERR_PARAM) err = -EPERM; else err = -EINVAL; @@ -2355,7 +2574,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) error_handler: if (err) { - ice_vc_send_msg_to_vf(vf, v_opcode, ICE_ERR_PARAM, NULL, 0); + ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM, + NULL, 0); dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n", vf_id, v_opcode, msglen, err); return; @@ -2418,7 +2638,8 @@ error_handler: default: dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", v_opcode, vf_id); - err = ice_vc_send_msg_to_vf(vf, v_opcode, ICE_ERR_NOT_IMPL, + err = ice_vc_send_msg_to_vf(vf, v_opcode, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, 0); break; } @@ -2427,7 +2648,7 @@ error_handler: * as it is busy with pending work. */ dev_info(&pf->pdev->dev, - "PF failed to honor VF %d, opcode %d\n, error %d\n", + "PF failed to honor VF %d, opcode %d, error %d\n", vf_id, v_opcode, err); } } @@ -2440,8 +2661,8 @@ error_handler: * * return VF configuration */ -int ice_get_vf_cfg(struct net_device *netdev, int vf_id, - struct ifla_vf_info *ivi) +int +ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; @@ -2587,7 +2808,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) ether_addr_copy(vf->dflt_lan_addr.addr, mac); vf->pf_set_mac = true; netdev_info(netdev, - "mac on VF %d set to %pM\n. VF driver will be reinitialized\n", + "MAC on VF %d set to %pM. VF driver will be reinitialized\n", vf_id, mac); ice_vc_dis_vf(vf); @@ -2690,7 +2911,8 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) ice_set_pfe_link(vf, &pfe, ls->link_speed, vf->link_up); /* Notify the VF of its new link state */ - ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, 0, (u8 *)&pfe, + ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT, + VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe), NULL); return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 01470a8ee03a..932e2ab3380b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -70,6 +70,7 @@ struct ice_vf { u8 spoofchk; u16 num_mac; u16 num_vlan; + u16 num_vf_qs; /* num of queue configured per VF */ u8 num_req_qs; /* num of queue pairs requested by VF */ }; @@ -77,8 +78,8 @@ struct ice_vf { void ice_process_vflr_event(struct ice_pf *pf); int ice_sriov_configure(struct pci_dev *pdev, int num_vfs); int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); -int ice_get_vf_cfg(struct net_device *netdev, int vf_id, - struct ifla_vf_info *ivi); +int +ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); void ice_free_vfs(struct ice_pf *pf); void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event); @@ -86,11 +87,9 @@ void ice_vc_notify_link_state(struct ice_pf *pf); void ice_vc_notify_reset(struct ice_pf *pf); bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr); -int ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, - u16 vlan_id, u8 qos, __be16 vlan_proto); - -int ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, - int max_tx_rate); +int +ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, + __be16 vlan_proto); int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted); @@ -162,12 +161,5 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } -static inline int -ice_set_vf_bw(struct net_device __always_unused *netdev, - int __always_unused vf_id, int __always_unused min_tx_rate, - int __always_unused max_tx_rate) -{ - return -EOPNOTSUPP; -} #endif /* CONFIG_PCI_IOV */ #endif /* _ICE_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index c57671068245..c645d9e648e0 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -3158,8 +3158,8 @@ static int igb_set_eee(struct net_device *netdev, } else if (!edata->eee_enabled) { dev_err(&adapter->pdev->dev, "Setting EEE options are not supported with EEE disabled\n"); - return -EINVAL; - } + return -EINVAL; + } adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 69b230c53fed..bea7175d171b 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2480,7 +2480,7 @@ static int igb_set_features(struct net_device *netdev, else igb_reset(adapter); - return 0; + return 1; } static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], @@ -3452,6 +3452,9 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) break; } } + + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); + pm_runtime_put_noidle(&pdev->dev); return 0; diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 80faccc34cda..0f5534ce27b0 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -29,9 +29,15 @@ unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter); void igc_set_flag_queue_pairs(struct igc_adapter *adapter, const u32 max_rss_queues); int igc_reinit_queues(struct igc_adapter *adapter); +void igc_write_rss_indir_tbl(struct igc_adapter *adapter); bool igc_has_link(struct igc_adapter *adapter); void igc_reset(struct igc_adapter *adapter); int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx); +int igc_add_mac_steering_filter(struct igc_adapter *adapter, + const u8 *addr, u8 queue, u8 flags); +int igc_del_mac_steering_filter(struct igc_adapter *adapter, + const u8 *addr, u8 queue, u8 flags); +void igc_update_stats(struct igc_adapter *adapter); extern char igc_driver_name[]; extern char igc_driver_version[]; @@ -51,6 +57,13 @@ extern char igc_driver_version[]; #define IGC_FLAG_VLAN_PROMISC BIT(15) #define IGC_FLAG_RX_LEGACY BIT(16) +#define IGC_FLAG_RSS_FIELD_IPV4_UDP BIT(6) +#define IGC_FLAG_RSS_FIELD_IPV6_UDP BIT(7) + +#define IGC_MRQC_ENABLE_RSS_MQ 0x00000002 +#define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 +#define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 + #define IGC_START_ITR 648 /* ~6000 ints/sec */ #define IGC_4K_ITR 980 #define IGC_20K_ITR 196 @@ -284,15 +297,50 @@ struct igc_q_vector { struct igc_ring ring[0] ____cacheline_internodealigned_in_smp; }; +#define MAX_ETYPE_FILTER (4 - 1) + +enum igc_filter_match_flags { + IGC_FILTER_FLAG_ETHER_TYPE = 0x1, + IGC_FILTER_FLAG_VLAN_TCI = 0x2, + IGC_FILTER_FLAG_SRC_MAC_ADDR = 0x4, + IGC_FILTER_FLAG_DST_MAC_ADDR = 0x8, +}; + +/* RX network flow classification data structure */ +struct igc_nfc_input { + /* Byte layout in order, all values with MSB first: + * match_flags - 1 byte + * etype - 2 bytes + * vlan_tci - 2 bytes + */ + u8 match_flags; + __be16 etype; + __be16 vlan_tci; + u8 src_addr[ETH_ALEN]; + u8 dst_addr[ETH_ALEN]; +}; + +struct igc_nfc_filter { + struct hlist_node nfc_node; + struct igc_nfc_input filter; + unsigned long cookie; + u16 etype_reg_index; + u16 sw_idx; + u16 action; +}; + struct igc_mac_addr { u8 addr[ETH_ALEN]; u8 queue; u8 state; /* bitmask */ }; -#define IGC_MAC_STATE_DEFAULT 0x1 -#define IGC_MAC_STATE_MODIFIED 0x2 -#define IGC_MAC_STATE_IN_USE 0x4 +#define IGC_MAC_STATE_DEFAULT 0x1 +#define IGC_MAC_STATE_IN_USE 0x2 +#define IGC_MAC_STATE_SRC_ADDR 0x4 +#define IGC_MAC_STATE_QUEUE_STEERING 0x8 + +#define IGC_MAX_RXNFC_FILTERS 16 /* Board specific private data structure */ struct igc_adapter { @@ -356,12 +404,22 @@ struct igc_adapter { u16 tx_ring_count; u16 rx_ring_count; + u32 tx_hwtstamp_timeouts; + u32 tx_hwtstamp_skipped; + u32 rx_hwtstamp_cleared; u32 *shadow_vfta; u32 rss_queues; + u32 rss_indir_tbl_init; + + /* RX network flow classification support */ + struct hlist_head nfc_filter_list; + struct hlist_head cls_flower_list; + unsigned int nfc_filter_count; /* lock for RX network flow classification filter */ spinlock_t nfc_lock; + bool etype_bitmap[MAX_ETYPE_FILTER]; struct igc_mac_addr *mac_table; @@ -447,6 +505,10 @@ static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data) /* forward declaration */ void igc_reinit_locked(struct igc_adapter *); +int igc_add_filter(struct igc_adapter *adapter, + struct igc_nfc_filter *input); +int igc_erase_filter(struct igc_adapter *adapter, + struct igc_nfc_filter *input); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index 76d4991d7284..58d1109d7f3f 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2018 Intel Corporation */ -#ifndef _IGC_BASE_H -#define _IGC_BASE_H +#ifndef _IGC_BASE_H_ +#define _IGC_BASE_H_ /* forward declaration */ void igc_rx_fifo_flush_base(struct igc_hw *hw); diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 7d1bdcd1225a..a9a30268de59 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -310,6 +310,12 @@ IGC_RXDEXT_STATERR_CXE | \ IGC_RXDEXT_STATERR_RXE) +#define IGC_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 +#define IGC_MRQC_RSS_FIELD_IPV4 0x00020000 +#define IGC_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 +#define IGC_MRQC_RSS_FIELD_IPV6 0x00100000 +#define IGC_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 + /* Header split receive */ #define IGC_RFCTL_IPV6_EX_DIS 0x00010000 #define IGC_RFCTL_LEF 0x00040000 @@ -325,6 +331,10 @@ #define I225_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ #define I225_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ +/* Receive Checksum Control */ +#define IGC_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ +#define IGC_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ + /* GPY211 - I225 defines */ #define GPY_MMD_MASK 0xFFFF0000 #define GPY_MMD_SHIFT 16 @@ -390,4 +400,11 @@ #define IGC_N0_QUEUE -1 +#define IGC_MAX_MAC_HDR_LEN 127 +#define IGC_MAX_NETWORK_HDR_LEN 511 + +#define IGC_VLAPQF_QUEUE_SEL(_n, q_idx) ((q_idx) << ((_n) * 4)) +#define IGC_VLAPQF_P_VALID(_n) (0x1 << (3 + (_n) * 4)) +#define IGC_VLAPQF_QUEUE_MASK 0x03 + #endif /* _IGC_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index eff37a6c0afa..ac98f1d96892 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -2,10 +2,120 @@ /* Copyright (c) 2018 Intel Corporation */ /* ethtool support for igc */ +#include <linux/if_vlan.h> #include <linux/pm_runtime.h> #include "igc.h" +/* forward declaration */ +struct igc_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define IGC_STAT(_name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(struct igc_adapter, _stat), \ + .stat_offset = offsetof(struct igc_adapter, _stat) \ +} + +static const struct igc_stats igc_gstrings_stats[] = { + IGC_STAT("rx_packets", stats.gprc), + IGC_STAT("tx_packets", stats.gptc), + IGC_STAT("rx_bytes", stats.gorc), + IGC_STAT("tx_bytes", stats.gotc), + IGC_STAT("rx_broadcast", stats.bprc), + IGC_STAT("tx_broadcast", stats.bptc), + IGC_STAT("rx_multicast", stats.mprc), + IGC_STAT("tx_multicast", stats.mptc), + IGC_STAT("multicast", stats.mprc), + IGC_STAT("collisions", stats.colc), + IGC_STAT("rx_crc_errors", stats.crcerrs), + IGC_STAT("rx_no_buffer_count", stats.rnbc), + IGC_STAT("rx_missed_errors", stats.mpc), + IGC_STAT("tx_aborted_errors", stats.ecol), + IGC_STAT("tx_carrier_errors", stats.tncrs), + IGC_STAT("tx_window_errors", stats.latecol), + IGC_STAT("tx_abort_late_coll", stats.latecol), + IGC_STAT("tx_deferred_ok", stats.dc), + IGC_STAT("tx_single_coll_ok", stats.scc), + IGC_STAT("tx_multi_coll_ok", stats.mcc), + IGC_STAT("tx_timeout_count", tx_timeout_count), + IGC_STAT("rx_long_length_errors", stats.roc), + IGC_STAT("rx_short_length_errors", stats.ruc), + IGC_STAT("rx_align_errors", stats.algnerrc), + IGC_STAT("tx_tcp_seg_good", stats.tsctc), + IGC_STAT("tx_tcp_seg_failed", stats.tsctfc), + IGC_STAT("rx_flow_control_xon", stats.xonrxc), + IGC_STAT("rx_flow_control_xoff", stats.xoffrxc), + IGC_STAT("tx_flow_control_xon", stats.xontxc), + IGC_STAT("tx_flow_control_xoff", stats.xofftxc), + IGC_STAT("rx_long_byte_count", stats.gorc), + IGC_STAT("tx_dma_out_of_sync", stats.doosync), + IGC_STAT("tx_smbus", stats.mgptc), + IGC_STAT("rx_smbus", stats.mgprc), + IGC_STAT("dropped_smbus", stats.mgpdc), + IGC_STAT("os2bmc_rx_by_bmc", stats.o2bgptc), + IGC_STAT("os2bmc_tx_by_bmc", stats.b2ospc), + IGC_STAT("os2bmc_tx_by_host", stats.o2bspc), + IGC_STAT("os2bmc_rx_by_host", stats.b2ogprc), + IGC_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts), + IGC_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped), + IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), +}; + +#define IGC_NETDEV_STAT(_net_stat) { \ + .stat_string = __stringify(_net_stat), \ + .sizeof_stat = FIELD_SIZEOF(struct rtnl_link_stats64, _net_stat), \ + .stat_offset = offsetof(struct rtnl_link_stats64, _net_stat) \ +} + +static const struct igc_stats igc_gstrings_net_stats[] = { + IGC_NETDEV_STAT(rx_errors), + IGC_NETDEV_STAT(tx_errors), + IGC_NETDEV_STAT(tx_dropped), + IGC_NETDEV_STAT(rx_length_errors), + IGC_NETDEV_STAT(rx_over_errors), + IGC_NETDEV_STAT(rx_frame_errors), + IGC_NETDEV_STAT(rx_fifo_errors), + IGC_NETDEV_STAT(tx_fifo_errors), + IGC_NETDEV_STAT(tx_heartbeat_errors) +}; + +enum igc_diagnostics_results { + TEST_REG = 0, + TEST_EEP, + TEST_IRQ, + TEST_LOOP, + TEST_LINK +}; + +static const char igc_gstrings_test[][ETH_GSTRING_LEN] = { + [TEST_REG] = "Register test (offline)", + [TEST_EEP] = "Eeprom test (offline)", + [TEST_IRQ] = "Interrupt test (offline)", + [TEST_LOOP] = "Loopback test (offline)", + [TEST_LINK] = "Link test (on/offline)" +}; + +#define IGC_TEST_LEN (sizeof(igc_gstrings_test) / ETH_GSTRING_LEN) + +#define IGC_GLOBAL_STATS_LEN \ + (sizeof(igc_gstrings_stats) / sizeof(struct igc_stats)) +#define IGC_NETDEV_STATS_LEN \ + (sizeof(igc_gstrings_net_stats) / sizeof(struct igc_stats)) +#define IGC_RX_QUEUE_STATS_LEN \ + (sizeof(struct igc_rx_queue_stats) / sizeof(u64)) +#define IGC_TX_QUEUE_STATS_LEN 3 /* packets, bytes, restart_queue */ +#define IGC_QUEUE_STATS_LEN \ + ((((struct igc_adapter *)netdev_priv(netdev))->num_rx_queues * \ + IGC_RX_QUEUE_STATS_LEN) + \ + (((struct igc_adapter *)netdev_priv(netdev))->num_tx_queues * \ + IGC_TX_QUEUE_STATS_LEN)) +#define IGC_STATS_LEN \ + (IGC_GLOBAL_STATS_LEN + IGC_NETDEV_STATS_LEN + IGC_QUEUE_STATS_LEN) + static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = { #define IGC_PRIV_FLAGS_LEGACY_RX BIT(0) "legacy-rx", @@ -545,6 +655,127 @@ static int igc_set_pauseparam(struct net_device *netdev, return retval; } +static void igc_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_TEST: + memcpy(data, *igc_gstrings_test, + IGC_TEST_LEN * ETH_GSTRING_LEN); + break; + case ETH_SS_STATS: + for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) { + memcpy(p, igc_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < IGC_NETDEV_STATS_LEN; i++) { + memcpy(p, igc_gstrings_net_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_tx_queues; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_restart", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_rx_queues; i++) { + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_drops", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_csum_err", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_alloc_failed", i); + p += ETH_GSTRING_LEN; + } + /* BUG_ON(p - data != IGC_STATS_LEN * ETH_GSTRING_LEN); */ + break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, igc_priv_flags_strings, + IGC_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); + break; + } +} + +static int igc_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return IGC_STATS_LEN; + case ETH_SS_TEST: + return IGC_TEST_LEN; + case ETH_SS_PRIV_FLAGS: + return IGC_PRIV_FLAGS_STR_LEN; + default: + return -ENOTSUPP; + } +} + +static void igc_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct rtnl_link_stats64 *net_stats = &adapter->stats64; + unsigned int start; + struct igc_ring *ring; + int i, j; + char *p; + + spin_lock(&adapter->stats64_lock); + igc_update_stats(adapter); + + for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) { + p = (char *)adapter + igc_gstrings_stats[i].stat_offset; + data[i] = (igc_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + for (j = 0; j < IGC_NETDEV_STATS_LEN; j++, i++) { + p = (char *)net_stats + igc_gstrings_net_stats[j].stat_offset; + data[i] = (igc_gstrings_net_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + for (j = 0; j < adapter->num_tx_queues; j++) { + u64 restart2; + + ring = adapter->tx_ring[j]; + do { + start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + data[i] = ring->tx_stats.packets; + data[i + 1] = ring->tx_stats.bytes; + data[i + 2] = ring->tx_stats.restart_queue; + } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + do { + start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); + restart2 = ring->tx_stats.restart_queue2; + } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); + data[i + 2] += restart2; + + i += IGC_TX_QUEUE_STATS_LEN; + } + for (j = 0; j < adapter->num_rx_queues; j++) { + ring = adapter->rx_ring[j]; + do { + start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + data[i] = ring->rx_stats.packets; + data[i + 1] = ring->rx_stats.bytes; + data[i + 2] = ring->rx_stats.drops; + data[i + 3] = ring->rx_stats.csum_err; + data[i + 4] = ring->rx_stats.alloc_failed; + } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + i += IGC_RX_QUEUE_STATS_LEN; + } + spin_unlock(&adapter->stats64_lock); +} + static int igc_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { @@ -643,6 +874,605 @@ static int igc_set_coalesce(struct net_device *netdev, return 0; } +#define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +static int igc_get_ethtool_nfc_entry(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = &cmd->fs; + struct igc_nfc_filter *rule = NULL; + + /* report total rule count */ + cmd->data = IGC_MAX_RXNFC_FILTERS; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (fsp->location <= rule->sw_idx) + break; + } + + if (!rule || fsp->location != rule->sw_idx) + return -EINVAL; + + if (rule->filter.match_flags) { + fsp->flow_type = ETHER_FLOW; + fsp->ring_cookie = rule->action; + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { + fsp->h_u.ether_spec.h_proto = rule->filter.etype; + fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { + fsp->flow_type |= FLOW_EXT; + fsp->h_ext.vlan_tci = rule->filter.vlan_tci; + fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { + ether_addr_copy(fsp->h_u.ether_spec.h_dest, + rule->filter.dst_addr); + /* As we only support matching by the full + * mask, return the mask to userspace + */ + eth_broadcast_addr(fsp->m_u.ether_spec.h_dest); + } + if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { + ether_addr_copy(fsp->h_u.ether_spec.h_source, + rule->filter.src_addr); + /* As we only support matching by the full + * mask, return the mask to userspace + */ + eth_broadcast_addr(fsp->m_u.ether_spec.h_source); + } + + return 0; + } + return -EINVAL; +} + +static int igc_get_ethtool_nfc_all(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct igc_nfc_filter *rule; + int cnt = 0; + + /* report total rule count */ + cmd->data = IGC_MAX_RXNFC_FILTERS; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + rule_locs[cnt] = rule->sw_idx; + cnt++; + } + + cmd->rule_cnt = cnt; + + return 0; +} + +static int igc_get_rss_hash_opts(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + cmd->data = 0; + + /* Report default options for RSS on igc */ + switch (cmd->flow_type) { + case TCP_V4_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + /* Fall through */ + case UDP_V4_FLOW: + if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + /* Fall through */ + case SCTP_V4_FLOW: + /* Fall through */ + case AH_ESP_V4_FLOW: + /* Fall through */ + case AH_V4_FLOW: + /* Fall through */ + case ESP_V4_FLOW: + /* Fall through */ + case IPV4_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + case TCP_V6_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + /* Fall through */ + case UDP_V6_FLOW: + if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + /* Fall through */ + case SCTP_V6_FLOW: + /* Fall through */ + case AH_ESP_V6_FLOW: + /* Fall through */ + case AH_V6_FLOW: + /* Fall through */ + case ESP_V6_FLOW: + /* Fall through */ + case IPV6_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int igc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct igc_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = adapter->num_rx_queues; + ret = 0; + break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = adapter->nfc_filter_count; + ret = 0; + break; + case ETHTOOL_GRXCLSRULE: + ret = igc_get_ethtool_nfc_entry(adapter, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + ret = igc_get_ethtool_nfc_all(adapter, cmd, rule_locs); + break; + case ETHTOOL_GRXFH: + ret = igc_get_rss_hash_opts(adapter, cmd); + break; + default: + break; + } + + return ret; +} + +#define UDP_RSS_FLAGS (IGC_FLAG_RSS_FIELD_IPV4_UDP | \ + IGC_FLAG_RSS_FIELD_IPV6_UDP) +static int igc_set_rss_hash_opt(struct igc_adapter *adapter, + struct ethtool_rxnfc *nfc) +{ + u32 flags = adapter->flags; + + /* RSS does not support anything other than hashing + * to queues on src and dst IPs and ports + */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + !(nfc->data & RXH_L4_B_0_1) || + !(nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + case UDP_V4_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags &= ~IGC_FLAG_RSS_FIELD_IPV4_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags |= IGC_FLAG_RSS_FIELD_IPV4_UDP; + break; + default: + return -EINVAL; + } + break; + case UDP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags &= ~IGC_FLAG_RSS_FIELD_IPV6_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags |= IGC_FLAG_RSS_FIELD_IPV6_UDP; + break; + default: + return -EINVAL; + } + break; + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + (nfc->data & RXH_L4_B_0_1) || + (nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + /* if we changed something we need to update flags */ + if (flags != adapter->flags) { + struct igc_hw *hw = &adapter->hw; + u32 mrqc = rd32(IGC_MRQC); + + if ((flags & UDP_RSS_FLAGS) && + !(adapter->flags & UDP_RSS_FLAGS)) + dev_err(&adapter->pdev->dev, + "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n"); + + adapter->flags = flags; + + /* Perform hash on these packet types */ + mrqc |= IGC_MRQC_RSS_FIELD_IPV4 | + IGC_MRQC_RSS_FIELD_IPV4_TCP | + IGC_MRQC_RSS_FIELD_IPV6 | + IGC_MRQC_RSS_FIELD_IPV6_TCP; + + mrqc &= ~(IGC_MRQC_RSS_FIELD_IPV4_UDP | + IGC_MRQC_RSS_FIELD_IPV6_UDP); + + if (flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; + + if (flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; + + wr32(IGC_MRQC, mrqc); + } + + return 0; +} + +static int igc_rxnfc_write_etype_filter(struct igc_adapter *adapter, + struct igc_nfc_filter *input) +{ + struct igc_hw *hw = &adapter->hw; + u8 i; + u32 etqf; + u16 etype; + + /* find an empty etype filter register */ + for (i = 0; i < MAX_ETYPE_FILTER; ++i) { + if (!adapter->etype_bitmap[i]) + break; + } + if (i == MAX_ETYPE_FILTER) { + dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n"); + return -EINVAL; + } + + adapter->etype_bitmap[i] = true; + + etqf = rd32(IGC_ETQF(i)); + etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK); + + etqf |= IGC_ETQF_FILTER_ENABLE; + etqf &= ~IGC_ETQF_ETYPE_MASK; + etqf |= (etype & IGC_ETQF_ETYPE_MASK); + + etqf &= ~IGC_ETQF_QUEUE_MASK; + etqf |= ((input->action << IGC_ETQF_QUEUE_SHIFT) + & IGC_ETQF_QUEUE_MASK); + etqf |= IGC_ETQF_QUEUE_ENABLE; + + wr32(IGC_ETQF(i), etqf); + + input->etype_reg_index = i; + + return 0; +} + +static int igc_rxnfc_write_vlan_prio_filter(struct igc_adapter *adapter, + struct igc_nfc_filter *input) +{ + struct igc_hw *hw = &adapter->hw; + u8 vlan_priority; + u16 queue_index; + u32 vlapqf; + + vlapqf = rd32(IGC_VLAPQF); + vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) + >> VLAN_PRIO_SHIFT; + queue_index = (vlapqf >> (vlan_priority * 4)) & IGC_VLAPQF_QUEUE_MASK; + + /* check whether this vlan prio is already set */ + if (vlapqf & IGC_VLAPQF_P_VALID(vlan_priority) && + queue_index != input->action) { + dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n"); + return -EEXIST; + } + + vlapqf |= IGC_VLAPQF_P_VALID(vlan_priority); + vlapqf |= IGC_VLAPQF_QUEUE_SEL(vlan_priority, input->action); + + wr32(IGC_VLAPQF, vlapqf); + + return 0; +} + +int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input) +{ + struct igc_hw *hw = &adapter->hw; + int err = -EINVAL; + + if (hw->mac.type == igc_i225 && + !(input->filter.match_flags & ~IGC_FILTER_FLAG_SRC_MAC_ADDR)) { + dev_err(&adapter->pdev->dev, + "i225 doesn't support flow classification rules specifying only source addresses.\n"); + return -EOPNOTSUPP; + } + + if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { + err = igc_rxnfc_write_etype_filter(adapter, input); + if (err) + return err; + } + + if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { + err = igc_add_mac_steering_filter(adapter, + input->filter.dst_addr, + input->action, 0); + err = min_t(int, err, 0); + if (err) + return err; + } + + if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { + err = igc_add_mac_steering_filter(adapter, + input->filter.src_addr, + input->action, + IGC_MAC_STATE_SRC_ADDR); + err = min_t(int, err, 0); + if (err) + return err; + } + + if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) + err = igc_rxnfc_write_vlan_prio_filter(adapter, input); + + return err; +} + +static void igc_clear_etype_filter_regs(struct igc_adapter *adapter, + u16 reg_index) +{ + struct igc_hw *hw = &adapter->hw; + u32 etqf = rd32(IGC_ETQF(reg_index)); + + etqf &= ~IGC_ETQF_QUEUE_ENABLE; + etqf &= ~IGC_ETQF_QUEUE_MASK; + etqf &= ~IGC_ETQF_FILTER_ENABLE; + + wr32(IGC_ETQF(reg_index), etqf); + + adapter->etype_bitmap[reg_index] = false; +} + +static void igc_clear_vlan_prio_filter(struct igc_adapter *adapter, + u16 vlan_tci) +{ + struct igc_hw *hw = &adapter->hw; + u8 vlan_priority; + u32 vlapqf; + + vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + + vlapqf = rd32(IGC_VLAPQF); + vlapqf &= ~IGC_VLAPQF_P_VALID(vlan_priority); + vlapqf &= ~IGC_VLAPQF_QUEUE_SEL(vlan_priority, + IGC_VLAPQF_QUEUE_MASK); + + wr32(IGC_VLAPQF, vlapqf); +} + +int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input) +{ + if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) + igc_clear_etype_filter_regs(adapter, + input->etype_reg_index); + + if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) + igc_clear_vlan_prio_filter(adapter, + ntohs(input->filter.vlan_tci)); + + if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) + igc_del_mac_steering_filter(adapter, input->filter.src_addr, + input->action, + IGC_MAC_STATE_SRC_ADDR); + + if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) + igc_del_mac_steering_filter(adapter, input->filter.dst_addr, + input->action, 0); + + return 0; +} + +static int igc_update_ethtool_nfc_entry(struct igc_adapter *adapter, + struct igc_nfc_filter *input, + u16 sw_idx) +{ + struct igc_nfc_filter *rule, *parent; + int err = -EINVAL; + + parent = NULL; + rule = NULL; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + /* hash found, or no matching entry */ + if (rule->sw_idx >= sw_idx) + break; + parent = rule; + } + + /* if there is an old rule occupying our place remove it */ + if (rule && rule->sw_idx == sw_idx) { + if (!input) + err = igc_erase_filter(adapter, rule); + + hlist_del(&rule->nfc_node); + kfree(rule); + adapter->nfc_filter_count--; + } + + /* If no input this was a delete, err should be 0 if a rule was + * successfully found and removed from the list else -EINVAL + */ + if (!input) + return err; + + /* initialize node */ + INIT_HLIST_NODE(&input->nfc_node); + + /* add filter to the list */ + if (parent) + hlist_add_behind(&input->nfc_node, &parent->nfc_node); + else + hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list); + + /* update counts */ + adapter->nfc_filter_count++; + + return 0; +} + +static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct net_device *netdev = adapter->netdev; + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct igc_nfc_filter *input, *rule; + int err = 0; + + if (!(netdev->hw_features & NETIF_F_NTUPLE)) + return -EOPNOTSUPP; + + /* Don't allow programming if the action is a queue greater than + * the number of online Rx queues. + */ + if (fsp->ring_cookie == RX_CLS_FLOW_DISC || + fsp->ring_cookie >= adapter->num_rx_queues) { + dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n"); + return -EINVAL; + } + + /* Don't allow indexes to exist outside of available space */ + if (fsp->location >= IGC_MAX_RXNFC_FILTERS) { + dev_err(&adapter->pdev->dev, "Location out of range\n"); + return -EINVAL; + } + + if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) + return -EINVAL; + + input = kzalloc(sizeof(*input), GFP_KERNEL); + if (!input) + return -ENOMEM; + + if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) { + input->filter.etype = fsp->h_u.ether_spec.h_proto; + input->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE; + } + + /* Only support matching addresses by the full mask */ + if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) { + input->filter.match_flags |= IGC_FILTER_FLAG_SRC_MAC_ADDR; + ether_addr_copy(input->filter.src_addr, + fsp->h_u.ether_spec.h_source); + } + + /* Only support matching addresses by the full mask */ + if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) { + input->filter.match_flags |= IGC_FILTER_FLAG_DST_MAC_ADDR; + ether_addr_copy(input->filter.dst_addr, + fsp->h_u.ether_spec.h_dest); + } + + if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) { + if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { + err = -EINVAL; + goto err_out; + } + input->filter.vlan_tci = fsp->h_ext.vlan_tci; + input->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI; + } + + input->action = fsp->ring_cookie; + input->sw_idx = fsp->location; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (!memcmp(&input->filter, &rule->filter, + sizeof(input->filter))) { + err = -EEXIST; + dev_err(&adapter->pdev->dev, + "ethtool: this filter is already set\n"); + goto err_out_w_lock; + } + } + + err = igc_add_filter(adapter, input); + if (err) + goto err_out_w_lock; + + igc_update_ethtool_nfc_entry(adapter, input, input->sw_idx); + + spin_unlock(&adapter->nfc_lock); + return 0; + +err_out_w_lock: + spin_unlock(&adapter->nfc_lock); +err_out: + kfree(input); + return err; +} + +static int igc_del_ethtool_nfc_entry(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + int err; + + spin_lock(&adapter->nfc_lock); + err = igc_update_ethtool_nfc_entry(adapter, NULL, fsp->location); + spin_unlock(&adapter->nfc_lock); + + return err; +} + +static int igc_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct igc_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + ret = igc_set_rss_hash_opt(adapter, cmd); + break; + case ETHTOOL_SRXCLSRLINS: + ret = igc_add_ethtool_nfc_entry(adapter, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = igc_del_ethtool_nfc_entry(adapter, cmd); + default: + break; + } + + return ret; +} + void igc_write_rss_indir_tbl(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -885,17 +1715,13 @@ static int igc_get_link_ksettings(struct net_device *netdev, if (hw->mac.type == igc_i225 && (status & IGC_STATUS_SPEED_2500)) { speed = SPEED_2500; - hw_dbg("2500 Mbs, "); } else { speed = SPEED_1000; - hw_dbg("1000 Mbs, "); } } else if (status & IGC_STATUS_SPEED_100) { speed = SPEED_100; - hw_dbg("100 Mbs, "); } else { speed = SPEED_10; - hw_dbg("10 Mbs, "); } if ((status & IGC_STATUS_FD) || hw->phy.media_type != igc_media_type_copper) @@ -1011,8 +1837,13 @@ static const struct ethtool_ops igc_ethtool_ops = { .set_ringparam = igc_set_ringparam, .get_pauseparam = igc_get_pauseparam, .set_pauseparam = igc_set_pauseparam, + .get_strings = igc_get_strings, + .get_sset_count = igc_get_sset_count, + .get_ethtool_stats = igc_get_ethtool_stats, .get_coalesce = igc_get_coalesce, .set_coalesce = igc_set_coalesce, + .get_rxnfc = igc_get_rxnfc, + .set_rxnfc = igc_set_rxnfc, .get_rxfh_indir_size = igc_get_rxfh_indir_size, .get_rxfh = igc_get_rxfh, .set_rxfh = igc_set_rxfh, diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 87a11879bf2d..a883b3f357e7 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -620,6 +620,55 @@ static void igc_configure_tx(struct igc_adapter *adapter) */ static void igc_setup_mrqc(struct igc_adapter *adapter) { + struct igc_hw *hw = &adapter->hw; + u32 j, num_rx_queues; + u32 mrqc, rxcsum; + u32 rss_key[10]; + + netdev_rss_key_fill(rss_key, sizeof(rss_key)); + for (j = 0; j < 10; j++) + wr32(IGC_RSSRK(j), rss_key[j]); + + num_rx_queues = adapter->rss_queues; + + if (adapter->rss_indir_tbl_init != num_rx_queues) { + for (j = 0; j < IGC_RETA_SIZE; j++) + adapter->rss_indir_tbl[j] = + (j * num_rx_queues) / IGC_RETA_SIZE; + adapter->rss_indir_tbl_init = num_rx_queues; + } + igc_write_rss_indir_tbl(adapter); + + /* Disable raw packet checksumming so that RSS hash is placed in + * descriptor on writeback. No need to enable TCP/UDP/IP checksum + * offloads as they are enabled by default + */ + rxcsum = rd32(IGC_RXCSUM); + rxcsum |= IGC_RXCSUM_PCSD; + + /* Enable Receive Checksum Offload for SCTP */ + rxcsum |= IGC_RXCSUM_CRCOFL; + + /* Don't need to set TUOFL or IPOFL, they default to 1 */ + wr32(IGC_RXCSUM, rxcsum); + + /* Generate RSS hash based on packet types, TCP/UDP + * port numbers and/or IPv4/v6 src and dst addresses + */ + mrqc = IGC_MRQC_RSS_FIELD_IPV4 | + IGC_MRQC_RSS_FIELD_IPV4_TCP | + IGC_MRQC_RSS_FIELD_IPV6 | + IGC_MRQC_RSS_FIELD_IPV6_TCP | + IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; + + if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; + if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) + mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; + + mrqc |= IGC_MRQC_ENABLE_RSS_MQ; + + wr32(IGC_MRQC, mrqc); } /** @@ -1738,12 +1787,200 @@ void igc_up(struct igc_adapter *adapter) * igc_update_stats - Update the board statistics counters * @adapter: board private structure */ -static void igc_update_stats(struct igc_adapter *adapter) +void igc_update_stats(struct igc_adapter *adapter) { + struct rtnl_link_stats64 *net_stats = &adapter->stats64; + struct pci_dev *pdev = adapter->pdev; + struct igc_hw *hw = &adapter->hw; + u64 _bytes, _packets; + u64 bytes, packets; + unsigned int start; + u32 mpc; + int i; + + /* Prevent stats update while adapter is being reset, or if the pci + * connection is down. + */ + if (adapter->link_speed == 0) + return; + if (pci_channel_offline(pdev)) + return; + + packets = 0; + bytes = 0; + + rcu_read_lock(); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igc_ring *ring = adapter->rx_ring[i]; + u32 rqdpc = rd32(IGC_RQDPC(i)); + + if (hw->mac.type >= igc_i225) + wr32(IGC_RQDPC(i), 0); + + if (rqdpc) { + ring->rx_stats.drops += rqdpc; + net_stats->rx_fifo_errors += rqdpc; + } + + do { + start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + _bytes = ring->rx_stats.bytes; + _packets = ring->rx_stats.packets; + } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + bytes += _bytes; + packets += _packets; + } + + net_stats->rx_bytes = bytes; + net_stats->rx_packets = packets; + + packets = 0; + bytes = 0; + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + do { + start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + _bytes = ring->tx_stats.bytes; + _packets = ring->tx_stats.packets; + } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + bytes += _bytes; + packets += _packets; + } + net_stats->tx_bytes = bytes; + net_stats->tx_packets = packets; + rcu_read_unlock(); + + /* read stats registers */ + adapter->stats.crcerrs += rd32(IGC_CRCERRS); + adapter->stats.gprc += rd32(IGC_GPRC); + adapter->stats.gorc += rd32(IGC_GORCL); + rd32(IGC_GORCH); /* clear GORCL */ + adapter->stats.bprc += rd32(IGC_BPRC); + adapter->stats.mprc += rd32(IGC_MPRC); + adapter->stats.roc += rd32(IGC_ROC); + + adapter->stats.prc64 += rd32(IGC_PRC64); + adapter->stats.prc127 += rd32(IGC_PRC127); + adapter->stats.prc255 += rd32(IGC_PRC255); + adapter->stats.prc511 += rd32(IGC_PRC511); + adapter->stats.prc1023 += rd32(IGC_PRC1023); + adapter->stats.prc1522 += rd32(IGC_PRC1522); + adapter->stats.symerrs += rd32(IGC_SYMERRS); + adapter->stats.sec += rd32(IGC_SEC); + + mpc = rd32(IGC_MPC); + adapter->stats.mpc += mpc; + net_stats->rx_fifo_errors += mpc; + adapter->stats.scc += rd32(IGC_SCC); + adapter->stats.ecol += rd32(IGC_ECOL); + adapter->stats.mcc += rd32(IGC_MCC); + adapter->stats.latecol += rd32(IGC_LATECOL); + adapter->stats.dc += rd32(IGC_DC); + adapter->stats.rlec += rd32(IGC_RLEC); + adapter->stats.xonrxc += rd32(IGC_XONRXC); + adapter->stats.xontxc += rd32(IGC_XONTXC); + adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); + adapter->stats.xofftxc += rd32(IGC_XOFFTXC); + adapter->stats.fcruc += rd32(IGC_FCRUC); + adapter->stats.gptc += rd32(IGC_GPTC); + adapter->stats.gotc += rd32(IGC_GOTCL); + rd32(IGC_GOTCH); /* clear GOTCL */ + adapter->stats.rnbc += rd32(IGC_RNBC); + adapter->stats.ruc += rd32(IGC_RUC); + adapter->stats.rfc += rd32(IGC_RFC); + adapter->stats.rjc += rd32(IGC_RJC); + adapter->stats.tor += rd32(IGC_TORH); + adapter->stats.tot += rd32(IGC_TOTH); + adapter->stats.tpr += rd32(IGC_TPR); + + adapter->stats.ptc64 += rd32(IGC_PTC64); + adapter->stats.ptc127 += rd32(IGC_PTC127); + adapter->stats.ptc255 += rd32(IGC_PTC255); + adapter->stats.ptc511 += rd32(IGC_PTC511); + adapter->stats.ptc1023 += rd32(IGC_PTC1023); + adapter->stats.ptc1522 += rd32(IGC_PTC1522); + + adapter->stats.mptc += rd32(IGC_MPTC); + adapter->stats.bptc += rd32(IGC_BPTC); + + adapter->stats.tpt += rd32(IGC_TPT); + adapter->stats.colc += rd32(IGC_COLC); + + adapter->stats.algnerrc += rd32(IGC_ALGNERRC); + + adapter->stats.tsctc += rd32(IGC_TSCTC); + adapter->stats.tsctfc += rd32(IGC_TSCTFC); + + adapter->stats.iac += rd32(IGC_IAC); + adapter->stats.icrxoc += rd32(IGC_ICRXOC); + adapter->stats.icrxptc += rd32(IGC_ICRXPTC); + adapter->stats.icrxatc += rd32(IGC_ICRXATC); + adapter->stats.ictxptc += rd32(IGC_ICTXPTC); + adapter->stats.ictxatc += rd32(IGC_ICTXATC); + adapter->stats.ictxqec += rd32(IGC_ICTXQEC); + adapter->stats.ictxqmtc += rd32(IGC_ICTXQMTC); + adapter->stats.icrxdmtc += rd32(IGC_ICRXDMTC); + + /* Fill out the OS statistics structure */ + net_stats->multicast = adapter->stats.mprc; + net_stats->collisions = adapter->stats.colc; + + /* Rx Errors */ + + /* RLEC on some newer hardware can be incorrect so build + * our own version based on RUC and ROC + */ + net_stats->rx_errors = adapter->stats.rxerrc + + adapter->stats.crcerrs + adapter->stats.algnerrc + + adapter->stats.ruc + adapter->stats.roc + + adapter->stats.cexterr; + net_stats->rx_length_errors = adapter->stats.ruc + + adapter->stats.roc; + net_stats->rx_crc_errors = adapter->stats.crcerrs; + net_stats->rx_frame_errors = adapter->stats.algnerrc; + net_stats->rx_missed_errors = adapter->stats.mpc; + + /* Tx Errors */ + net_stats->tx_errors = adapter->stats.ecol + + adapter->stats.latecol; + net_stats->tx_aborted_errors = adapter->stats.ecol; + net_stats->tx_window_errors = adapter->stats.latecol; + net_stats->tx_carrier_errors = adapter->stats.tncrs; + + /* Tx Dropped needs to be maintained elsewhere */ + + /* Management Stats */ + adapter->stats.mgptc += rd32(IGC_MGTPTC); + adapter->stats.mgprc += rd32(IGC_MGTPRC); + adapter->stats.mgpdc += rd32(IGC_MGTPDC); } static void igc_nfc_filter_exit(struct igc_adapter *adapter) { + struct igc_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igc_erase_filter(adapter, rule); + + hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node) + igc_erase_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); +} + +static void igc_nfc_filter_restore(struct igc_adapter *adapter) +{ + struct igc_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igc_add_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); } /** @@ -1890,6 +2127,86 @@ static struct net_device_stats *igc_get_stats(struct net_device *netdev) return &netdev->stats; } +static netdev_features_t igc_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + /* Since there is no support for separate Rx/Tx vlan accel + * enable/disable make sure Tx flag is always in same state as Rx. + */ + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; + else + features &= ~NETIF_F_HW_VLAN_CTAG_TX; + + return features; +} + +static int igc_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct igc_adapter *adapter = netdev_priv(netdev); + + /* Add VLAN support */ + if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) + return 0; + + if (!(features & NETIF_F_NTUPLE)) { + struct hlist_node *node2; + struct igc_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + hlist_for_each_entry_safe(rule, node2, + &adapter->nfc_filter_list, nfc_node) { + igc_erase_filter(adapter, rule); + hlist_del(&rule->nfc_node); + kfree(rule); + } + spin_unlock(&adapter->nfc_lock); + adapter->nfc_filter_count = 0; + } + + netdev->features = features; + + if (netif_running(netdev)) + igc_reinit_locked(adapter); + else + igc_reset(adapter); + + return 1; +} + +static netdev_features_t +igc_features_check(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features) +{ + unsigned int network_hdr_len, mac_hdr_len; + + /* Make certain the headers can be described by a context descriptor */ + mac_hdr_len = skb_network_header(skb) - skb->data; + if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) + return features & ~(NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_TSO | + NETIF_F_TSO6); + + network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); + if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) + return features & ~(NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC | + NETIF_F_TSO | + NETIF_F_TSO6); + + /* We can only support IPv4 TSO in tunnels if we can mangle the + * inner IP ID field, so strip TSO if MANGLEID is not supported. + */ + if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) + features &= ~NETIF_F_TSO; + + return features; +} + /** * igc_configure - configure the hardware for RX and TX * @adapter: private board structure @@ -1906,6 +2223,7 @@ static void igc_configure(struct igc_adapter *adapter) igc_setup_mrqc(adapter); igc_setup_rctl(adapter); + igc_nfc_filter_restore(adapter); igc_configure_tx(adapter); igc_configure_rx(adapter); @@ -1967,6 +2285,127 @@ static void igc_set_default_mac_filter(struct igc_adapter *adapter) igc_rar_set_index(adapter, 0); } +/* If the filter to be added and an already existing filter express + * the same address and address type, it should be possible to only + * override the other configurations, for example the queue to steer + * traffic. + */ +static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry, + const u8 *addr, const u8 flags) +{ + if (!(entry->state & IGC_MAC_STATE_IN_USE)) + return true; + + if ((entry->state & IGC_MAC_STATE_SRC_ADDR) != + (flags & IGC_MAC_STATE_SRC_ADDR)) + return false; + + if (!ether_addr_equal(addr, entry->addr)) + return false; + + return true; +} + +/* Add a MAC filter for 'addr' directing matching traffic to 'queue', + * 'flags' is used to indicate what kind of match is made, match is by + * default for the destination address, if matching by source address + * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used. + */ +static int igc_add_mac_filter_flags(struct igc_adapter *adapter, + const u8 *addr, const u8 queue, + const u8 flags) +{ + struct igc_hw *hw = &adapter->hw; + int rar_entries = hw->mac.rar_entry_count; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Search for the first empty entry in the MAC table. + * Do not touch entries at the end of the table reserved for the VF MAC + * addresses. + */ + for (i = 0; i < rar_entries; i++) { + if (!igc_mac_entry_can_be_used(&adapter->mac_table[i], + addr, flags)) + continue; + + ether_addr_copy(adapter->mac_table[i].addr, addr); + adapter->mac_table[i].queue = queue; + adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE | flags; + + igc_rar_set_index(adapter, i); + return i; + } + + return -ENOSPC; +} + +int igc_add_mac_steering_filter(struct igc_adapter *adapter, + const u8 *addr, u8 queue, u8 flags) +{ + return igc_add_mac_filter_flags(adapter, addr, queue, + IGC_MAC_STATE_QUEUE_STEERING | flags); +} + +/* Remove a MAC filter for 'addr' directing matching traffic to + * 'queue', 'flags' is used to indicate what kind of match need to be + * removed, match is by default for the destination address, if + * matching by source address is to be removed the flag + * IGC_MAC_STATE_SRC_ADDR can be used. + */ +static int igc_del_mac_filter_flags(struct igc_adapter *adapter, + const u8 *addr, const u8 queue, + const u8 flags) +{ + struct igc_hw *hw = &adapter->hw; + int rar_entries = hw->mac.rar_entry_count; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + /* Search for matching entry in the MAC table based on given address + * and queue. Do not touch entries at the end of the table reserved + * for the VF MAC addresses. + */ + for (i = 0; i < rar_entries; i++) { + if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE)) + continue; + if ((adapter->mac_table[i].state & flags) != flags) + continue; + if (adapter->mac_table[i].queue != queue) + continue; + if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) + continue; + + /* When a filter for the default address is "deleted", + * we return it to its initial configuration + */ + if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) { + adapter->mac_table[i].state = + IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; + } else { + adapter->mac_table[i].state = 0; + adapter->mac_table[i].queue = 0; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + } + + igc_rar_set_index(adapter, i); + return 0; + } + + return -ENOENT; +} + +int igc_del_mac_steering_filter(struct igc_adapter *adapter, + const u8 *addr, u8 queue, u8 flags) +{ + return igc_del_mac_filter_flags(adapter, addr, queue, + IGC_MAC_STATE_QUEUE_STEERING | flags); +} + /** * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -3434,6 +3873,9 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_set_mac_address = igc_set_mac, .ndo_change_mtu = igc_change_mtu, .ndo_get_stats = igc_get_stats, + .ndo_fix_features = igc_fix_features, + .ndo_set_features = igc_set_features, + .ndo_features_check = igc_features_check, }; /* PCIe configuration access */ @@ -3663,6 +4105,9 @@ static int igc_probe(struct pci_dev *pdev, if (err) goto err_sw_init; + /* copy netdev features into list of user selectable features */ + netdev->hw_features |= NETIF_F_NTUPLE; + /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 5afe7a8d3faf..50d7c04dccf5 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -80,8 +80,23 @@ /* MSI-X Table Register Descriptions */ #define IGC_PBACL 0x05B68 /* MSIx PBA Clear - R/W 1 to clear */ +/* RSS registers */ +#define IGC_MRQC 0x05818 /* Multiple Receive Control - RW */ + +/* Filtering Registers */ +#define IGC_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ + +/* ETQF register bit definitions */ +#define IGC_ETQF_FILTER_ENABLE BIT(26) +#define IGC_ETQF_QUEUE_ENABLE BIT(31) +#define IGC_ETQF_QUEUE_SHIFT 16 +#define IGC_ETQF_QUEUE_MASK 0x00070000 +#define IGC_ETQF_ETYPE_MASK 0x0000FFFF + /* Redirection Table - RW Array */ #define IGC_RETA(_i) (0x05C00 + ((_i) * 4)) +/* RSS Random Key - RW Array */ +#define IGC_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* Receive Register Descriptions */ #define IGC_RCTL 0x00100 /* Rx Control - RW */ @@ -101,6 +116,7 @@ #define IGC_UTA 0x0A000 /* Unicast Table Array - RW */ #define IGC_RAL(_n) (0x05400 + ((_n) * 0x08)) #define IGC_RAH(_n) (0x05404 + ((_n) * 0x08)) +#define IGC_VLAPQF 0x055B0 /* VLAN Priority Queue Filter VLAPQF */ /* Transmit Register Descriptions */ #define IGC_TCTL 0x00400 /* Tx Control - RW */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e100054a3765..16c728984164 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8483,8 +8483,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, #ifdef IXGBE_FCOE static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct ixgbe_adapter *adapter; struct ixgbe_ring_feature *f; @@ -8514,7 +8513,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, break; /* fall through */ default: - return fallback(dev, skb, sb_dev); + return netdev_pick_tx(dev, skb, sb_dev); } f = &adapter->ring_feature[RING_F_FCOE]; @@ -9796,7 +9795,7 @@ static int ixgbe_set_features(struct net_device *netdev, NETIF_F_HW_VLAN_CTAG_FILTER)) ixgbe_set_rx_mode(netdev); - return 0; + return 1; } /** diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index ff0f4c503f53..67cce2736806 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -101,6 +101,7 @@ #define MVPP2_CLS_FLOW_TBL1_REG 0x1828 #define MVPP2_CLS_FLOW_TBL1_N_FIELDS_MASK 0x7 #define MVPP2_CLS_FLOW_TBL1_N_FIELDS(x) (x) +#define MVPP2_CLS_FLOW_TBL1_LU_TYPE(lu) (((lu) & 0x3f) << 3) #define MVPP2_CLS_FLOW_TBL1_PRIO_MASK 0x3f #define MVPP2_CLS_FLOW_TBL1_PRIO(x) ((x) << 9) #define MVPP2_CLS_FLOW_TBL1_SEQ_MASK 0x7 @@ -123,7 +124,10 @@ #define MVPP22_CLS_C2_TCAM_DATA2 0x1b18 #define MVPP22_CLS_C2_TCAM_DATA3 0x1b1c #define MVPP22_CLS_C2_TCAM_DATA4 0x1b20 +#define MVPP22_CLS_C2_LU_TYPE(lu) ((lu) & 0x3f) #define MVPP22_CLS_C2_PORT_ID(port) ((port) << 8) +#define MVPP22_CLS_C2_TCAM_INV 0x1b24 +#define MVPP22_CLS_C2_TCAM_INV_BIT BIT(31) #define MVPP22_CLS_C2_HIT_CTR 0x1b50 #define MVPP22_CLS_C2_ACT 0x1b60 #define MVPP22_CLS_C2_ACT_RSS_EN(act) (((act) & 0x3) << 19) @@ -610,6 +614,8 @@ #define MVPP2_BIT_TO_WORD(bit) ((bit) / 32) #define MVPP2_BIT_IN_WORD(bit) ((bit) % 32) +#define MVPP2_N_PRS_FLOWS 52 + /* RSS constants */ #define MVPP22_RSS_TABLE_ENTRIES 32 @@ -710,6 +716,7 @@ enum mvpp2_prs_l3_cast { #define MVPP2_DESC_DMA_MASK DMA_BIT_MASK(40) /* Definitions */ +struct mvpp2_dbgfs_entries; /* Shared Packet Processor resources */ struct mvpp2 { @@ -771,6 +778,9 @@ struct mvpp2 { /* Debugfs root entry */ struct dentry *dbgfs_dir; + + /* Debugfs entries private data */ + struct mvpp2_dbgfs_entries *dbgfs_entries; }; struct mvpp2_pcpu_stats { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index efdb7a656835..1087974d3b98 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -22,7 +22,7 @@ } \ } -static struct mvpp2_cls_flow cls_flows[MVPP2_N_FLOWS] = { +static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* TCP over IPv4 flows, Not fragmented, no vlan tag */ MVPP2_DEF_FLOW(TCP_V4_FLOW, MVPP2_FL_IP4_TCP_NF_UNTAG, MVPP22_CLS_HEK_IP4_5T, @@ -429,12 +429,6 @@ static void mvpp2_cls_flow_port_id_sel(struct mvpp2_cls_flow_entry *fe, fe->data[0] &= ~MVPP2_CLS_FLOW_TBL0_PORT_ID_SEL; } -static void mvpp2_cls_flow_seq_set(struct mvpp2_cls_flow_entry *fe, u32 seq) -{ - fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_SEQ(MVPP2_CLS_FLOW_TBL1_SEQ_MASK); - fe->data[1] |= MVPP2_CLS_FLOW_TBL1_SEQ(seq); -} - static void mvpp2_cls_flow_last_set(struct mvpp2_cls_flow_entry *fe, bool is_last) { @@ -454,9 +448,16 @@ static void mvpp2_cls_flow_port_add(struct mvpp2_cls_flow_entry *fe, fe->data[0] |= MVPP2_CLS_FLOW_TBL0_PORT_ID(port); } +static void mvpp2_cls_flow_lu_type_set(struct mvpp2_cls_flow_entry *fe, + u8 lu_type) +{ + fe->data[1] &= ~MVPP2_CLS_FLOW_TBL1_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK); + fe->data[1] |= MVPP2_CLS_FLOW_TBL1_LU_TYPE(lu_type); +} + /* Initialize the parser entry for the given flow */ static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv, - struct mvpp2_cls_flow *flow) + const struct mvpp2_cls_flow *flow) { mvpp2_prs_add_flow(priv, flow->flow_id, flow->prs_ri.ri, flow->prs_ri.ri_mask); @@ -464,7 +465,7 @@ static void mvpp2_cls_flow_prs_init(struct mvpp2 *priv, /* Initialize the Lookup Id table entry for the given flow */ static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv, - struct mvpp2_cls_flow *flow) + const struct mvpp2_cls_flow *flow) { struct mvpp2_cls_lookup_entry le; @@ -477,7 +478,7 @@ static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv, /* We point on the first lookup in the sequence for the flow, that is * the C2 lookup. */ - le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_FLOW_C2_ENTRY(flow->flow_id)); + le.data |= MVPP2_CLS_LKP_FLOW_PTR(MVPP2_CLS_FLT_FIRST(flow->flow_id)); /* CLS is always enabled, RSS is enabled/disabled in C2 lookup */ le.data |= MVPP2_CLS_LKP_TBL_LOOKUP_EN_MASK; @@ -485,21 +486,86 @@ static void mvpp2_cls_flow_lkp_init(struct mvpp2 *priv, mvpp2_cls_lookup_write(priv, &le); } +static void mvpp2_cls_c2_write(struct mvpp2 *priv, + struct mvpp2_cls_c2_entry *c2) +{ + u32 val; + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index); + + val = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_INV); + if (c2->valid) + val &= ~MVPP22_CLS_C2_TCAM_INV_BIT; + else + val |= MVPP22_CLS_C2_TCAM_INV_BIT; + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_INV, val); + + mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act); + + mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]); + mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]); + mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]); + mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]); + + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]); + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]); + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]); + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]); + /* Writing TCAM_DATA4 flushes writes to TCAM_DATA0-4 and INV to HW */ + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]); +} + +void mvpp2_cls_c2_read(struct mvpp2 *priv, int index, + struct mvpp2_cls_c2_entry *c2) +{ + u32 val; + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index); + + c2->index = index; + + c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0); + c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1); + c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2); + c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3); + c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4); + + c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT); + + c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0); + c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1); + c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2); + c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3); + + val = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_INV); + c2->valid = !(val & MVPP22_CLS_C2_TCAM_INV_BIT); +} + /* Initialize the flow table entries for the given flow */ -static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow) +static void mvpp2_cls_flow_init(struct mvpp2 *priv, + const struct mvpp2_cls_flow *flow) { struct mvpp2_cls_flow_entry fe; - int i; + int i, pri = 0; + + /* Assign default values to all entries in the flow */ + for (i = MVPP2_CLS_FLT_FIRST(flow->flow_id); + i <= MVPP2_CLS_FLT_LAST(flow->flow_id); i++) { + memset(&fe, 0, sizeof(fe)); + fe.index = i; + mvpp2_cls_flow_pri_set(&fe, pri++); - /* C2 lookup */ - memset(&fe, 0, sizeof(fe)); - fe.index = MVPP2_FLOW_C2_ENTRY(flow->flow_id); + if (i == MVPP2_CLS_FLT_LAST(flow->flow_id)) + mvpp2_cls_flow_last_set(&fe, 1); + + mvpp2_cls_flow_write(priv, &fe); + } + + /* RSS config C2 lookup */ + mvpp2_cls_flow_read(priv, MVPP2_CLS_FLT_C2_RSS_ENTRY(flow->flow_id), + &fe); mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C2); mvpp2_cls_flow_port_id_sel(&fe, true); - mvpp2_cls_flow_last_set(&fe, 0); - mvpp2_cls_flow_pri_set(&fe, 0); - mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_FIRST1); + mvpp2_cls_flow_lu_type_set(&fe, MVPP2_CLS_LU_ALL); /* Add all ports */ for (i = 0; i < MVPP2_MAX_PORTS; i++) @@ -509,22 +575,19 @@ static void mvpp2_cls_flow_init(struct mvpp2 *priv, struct mvpp2_cls_flow *flow) /* C3Hx lookups */ for (i = 0; i < MVPP2_MAX_PORTS; i++) { - memset(&fe, 0, sizeof(fe)); - fe.index = MVPP2_PORT_FLOW_HASH_ENTRY(i, flow->flow_id); + mvpp2_cls_flow_read(priv, + MVPP2_CLS_FLT_HASH_ENTRY(i, flow->flow_id), + &fe); + /* Set a default engine. Will be overwritten when setting the + * real HEK parameters + */ + mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C3HA); mvpp2_cls_flow_port_id_sel(&fe, true); - mvpp2_cls_flow_pri_set(&fe, i + 1); - mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_MIDDLE); mvpp2_cls_flow_port_add(&fe, BIT(i)); mvpp2_cls_flow_write(priv, &fe); } - - /* Update the last entry */ - mvpp2_cls_flow_last_set(&fe, 1); - mvpp2_cls_flow_seq_set(&fe, MVPP2_CLS_FLOW_SEQ_LAST); - - mvpp2_cls_flow_write(priv, &fe); } /* Adds a field to the Header Extracted Key generation parameters*/ @@ -555,6 +618,9 @@ static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe, for_each_set_bit(i, &hash_opts, MVPP22_CLS_HEK_N_FIELDS) { switch (BIT(i)) { + case MVPP22_CLS_HEK_OPT_MAC_DA: + field_id = MVPP22_CLS_FIELD_MAC_DA; + break; case MVPP22_CLS_HEK_OPT_VLAN: field_id = MVPP22_CLS_FIELD_VLAN; break; @@ -586,9 +652,9 @@ static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe, return 0; } -struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow) +const struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow) { - if (flow >= MVPP2_N_FLOWS) + if (flow >= MVPP2_N_PRS_FLOWS) return NULL; return &cls_flows[flow]; @@ -608,21 +674,17 @@ struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow) static int mvpp2_port_rss_hash_opts_set(struct mvpp2_port *port, int flow_type, u16 requested_opts) { + const struct mvpp2_cls_flow *flow; struct mvpp2_cls_flow_entry fe; - struct mvpp2_cls_flow *flow; int i, engine, flow_index; u16 hash_opts; - for (i = 0; i < MVPP2_N_FLOWS; i++) { + for_each_cls_flow_id_with_type(i, flow_type) { flow = mvpp2_cls_flow_get(i); if (!flow) return -EINVAL; - if (flow->flow_type != flow_type) - continue; - - flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id, - flow->flow_id); + flow_index = MVPP2_CLS_FLT_HASH_ENTRY(port->id, flow->flow_id); mvpp2_cls_flow_read(port->priv, flow_index, &fe); @@ -697,21 +759,17 @@ u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe) */ static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type) { + const struct mvpp2_cls_flow *flow; struct mvpp2_cls_flow_entry fe; - struct mvpp2_cls_flow *flow; int i, flow_index; u16 hash_opts = 0; - for (i = 0; i < MVPP2_N_FLOWS; i++) { + for_each_cls_flow_id_with_type(i, flow_type) { flow = mvpp2_cls_flow_get(i); if (!flow) return 0; - if (flow->flow_type != flow_type) - continue; - - flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(port->id, - flow->flow_id); + flow_index = MVPP2_CLS_FLT_HASH_ENTRY(port->id, flow->flow_id); mvpp2_cls_flow_read(port->priv, flow_index, &fe); @@ -723,10 +781,10 @@ static u16 mvpp2_port_rss_hash_opts_get(struct mvpp2_port *port, int flow_type) static void mvpp2_cls_port_init_flows(struct mvpp2 *priv) { - struct mvpp2_cls_flow *flow; + const struct mvpp2_cls_flow *flow; int i; - for (i = 0; i < MVPP2_N_FLOWS; i++) { + for (i = 0; i < MVPP2_N_PRS_FLOWS; i++) { flow = mvpp2_cls_flow_get(i); if (!flow) break; @@ -737,47 +795,6 @@ static void mvpp2_cls_port_init_flows(struct mvpp2 *priv) } } -static void mvpp2_cls_c2_write(struct mvpp2 *priv, - struct mvpp2_cls_c2_entry *c2) -{ - mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, c2->index); - - /* Write TCAM */ - mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA0, c2->tcam[0]); - mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA1, c2->tcam[1]); - mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA2, c2->tcam[2]); - mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA3, c2->tcam[3]); - mvpp2_write(priv, MVPP22_CLS_C2_TCAM_DATA4, c2->tcam[4]); - - mvpp2_write(priv, MVPP22_CLS_C2_ACT, c2->act); - - mvpp2_write(priv, MVPP22_CLS_C2_ATTR0, c2->attr[0]); - mvpp2_write(priv, MVPP22_CLS_C2_ATTR1, c2->attr[1]); - mvpp2_write(priv, MVPP22_CLS_C2_ATTR2, c2->attr[2]); - mvpp2_write(priv, MVPP22_CLS_C2_ATTR3, c2->attr[3]); -} - -void mvpp2_cls_c2_read(struct mvpp2 *priv, int index, - struct mvpp2_cls_c2_entry *c2) -{ - mvpp2_write(priv, MVPP22_CLS_C2_TCAM_IDX, index); - - c2->index = index; - - c2->tcam[0] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA0); - c2->tcam[1] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA1); - c2->tcam[2] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA2); - c2->tcam[3] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA3); - c2->tcam[4] = mvpp2_read(priv, MVPP22_CLS_C2_TCAM_DATA4); - - c2->act = mvpp2_read(priv, MVPP22_CLS_C2_ACT); - - c2->attr[0] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR0); - c2->attr[1] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR1); - c2->attr[2] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR2); - c2->attr[3] = mvpp2_read(priv, MVPP22_CLS_C2_ATTR3); -} - static void mvpp2_port_c2_cls_init(struct mvpp2_port *port) { struct mvpp2_cls_c2_entry c2; @@ -791,6 +808,10 @@ static void mvpp2_port_c2_cls_init(struct mvpp2_port *port) c2.tcam[4] = MVPP22_CLS_C2_PORT_ID(pmap); c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_PORT_ID(pmap)); + /* Match on Lookup Type */ + c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK)); + c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_ALL); + /* Update RSS status after matching this entry */ c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK); @@ -809,6 +830,8 @@ static void mvpp2_port_c2_cls_init(struct mvpp2_port *port) c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) | MVPP22_CLS_C2_ATTR0_QLOW(ql); + c2.valid = true; + mvpp2_cls_c2_write(port->priv, &c2); } @@ -817,6 +840,7 @@ void mvpp2_cls_init(struct mvpp2 *priv) { struct mvpp2_cls_lookup_entry le; struct mvpp2_cls_flow_entry fe; + struct mvpp2_cls_c2_entry c2; int index; /* Enable classifier */ @@ -840,6 +864,14 @@ void mvpp2_cls_init(struct mvpp2 *priv) mvpp2_cls_lookup_write(priv, &le); } + /* Clear C2 TCAM engine table */ + memset(&c2, 0, sizeof(c2)); + c2.valid = false; + for (index = 0; index < MVPP22_CLS_C2_N_ENTRIES; index++) { + c2.index = index; + mvpp2_cls_c2_write(priv, &c2); + } + mvpp2_cls_port_init_flows(priv); } @@ -902,12 +934,12 @@ static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port) mvpp2_cls_c2_write(port->priv, &c2); } -void mvpp22_rss_enable(struct mvpp2_port *port) +void mvpp22_port_rss_enable(struct mvpp2_port *port) { mvpp2_rss_port_c2_enable(port); } -void mvpp22_rss_disable(struct mvpp2_port *port) +void mvpp22_port_rss_disable(struct mvpp2_port *port) { mvpp2_rss_port_c2_disable(port); } @@ -1037,7 +1069,7 @@ int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info) return 0; } -void mvpp22_rss_port_init(struct mvpp2_port *port) +void mvpp22_port_rss_init(struct mvpp2_port *port) { struct mvpp2 *priv = port->priv; int i; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h index 089f05f29891..96304ffc5d49 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h @@ -71,14 +71,6 @@ enum mvpp2_cls_field_id { MVPP22_CLS_FIELD_L4DIP = 0x1e, }; -enum mvpp2_cls_flow_seq { - MVPP2_CLS_FLOW_SEQ_NORMAL = 0, - MVPP2_CLS_FLOW_SEQ_FIRST1, - MVPP2_CLS_FLOW_SEQ_FIRST2, - MVPP2_CLS_FLOW_SEQ_LAST, - MVPP2_CLS_FLOW_SEQ_MIDDLE -}; - /* Classifier C2 engine constants */ #define MVPP22_CLS_C2_TCAM_EN(data) ((data) << 16) @@ -105,34 +97,25 @@ enum mvpp22_cls_c2_fwd_action { struct mvpp2_cls_c2_entry { u32 index; + /* TCAM lookup key */ u32 tcam[MVPP2_CLS_C2_TCAM_WORDS]; + /* Actions to perform upon TCAM match */ u32 act; + /* Attributes relative to the actions to perform */ u32 attr[MVPP2_CLS_C2_ATTR_WORDS]; + /* Entry validity */ + u8 valid; }; /* Classifier C2 engine entries */ -#define MVPP22_CLS_C2_RSS_ENTRY(port) (port) -#define MVPP22_CLS_C2_N_ENTRIES MVPP2_MAX_PORTS +#define MVPP22_CLS_C2_N_ENTRIES 256 -/* RSS flow entries in the flow table. We have 2 entries per port for RSS. - * - * The first performs a lookup using the C2 TCAM engine, to tag the - * packet for software forwarding (needed for RSS), enable or disable RSS, and - * assign the default rx queue. - * - * The second configures the hash generation, by specifying which fields of the - * packet header are used to generate the hash, and specifies the relevant hash - * engine to use. - */ -#define MVPP22_RSS_FLOW_C2_OFFS 0 -#define MVPP22_RSS_FLOW_HASH_OFFS 1 -#define MVPP22_RSS_FLOW_SIZE (MVPP22_RSS_FLOW_HASH_OFFS + 1) +/* Number of per-port dedicated entries in the C2 TCAM */ +#define MVPP22_CLS_C2_PORT_RANGE 8 -#define MVPP22_RSS_FLOW_C2(port) ((port) * MVPP22_RSS_FLOW_SIZE + \ - MVPP22_RSS_FLOW_C2_OFFS) -#define MVPP22_RSS_FLOW_HASH(port) ((port) * MVPP22_RSS_FLOW_SIZE + \ - MVPP22_RSS_FLOW_HASH_OFFS) -#define MVPP22_RSS_FLOW_FIRST(port) MVPP22_RSS_FLOW_C2(port) +#define MVPP22_CLS_C2_PORT_FIRST(p) (MVPP22_CLS_C2_N_ENTRIES - \ + ((p) * MVPP22_CLS_C2_PORT_RANGE)) +#define MVPP22_CLS_C2_RSS_ENTRY(p) (MVPP22_CLS_C2_PORT_FIRST(p) - 1) /* Packet flow ID */ enum mvpp2_prs_flow { @@ -162,6 +145,15 @@ enum mvpp2_prs_flow { MVPP2_FL_LAST, }; +enum mvpp2_cls_lu_type { + MVPP2_CLS_LU_ALL = 0, +}; + +/* LU Type defined for all engines, and specified in the flow table */ +#define MVPP2_CLS_LU_TYPE_MASK 0x3f + +#define MVPP2_N_FLOWS (MVPP2_FL_LAST - MVPP2_FL_START) + struct mvpp2_cls_flow { /* The L2-L4 traffic flow type */ int flow_type; @@ -176,12 +168,37 @@ struct mvpp2_cls_flow { struct mvpp2_prs_result_info prs_ri; }; -#define MVPP2_N_FLOWS 52 +#define MVPP2_CLS_FLT_ENTRIES_PER_FLOW (MVPP2_MAX_PORTS + 1) +#define MVPP2_CLS_FLT_FIRST(id) (((id) - MVPP2_FL_START) * \ + MVPP2_CLS_FLT_ENTRIES_PER_FLOW) +#define MVPP2_CLS_FLT_C2_RSS_ENTRY(id) (MVPP2_CLS_FLT_FIRST(id)) +#define MVPP2_CLS_FLT_HASH_ENTRY(port, id) (MVPP2_CLS_FLT_C2_RSS_ENTRY(id) + (port) + 1) +#define MVPP2_CLS_FLT_LAST(id) (MVPP2_CLS_FLT_FIRST(id) + \ + MVPP2_CLS_FLT_ENTRIES_PER_FLOW - 1) + +/* Iterate on each classifier flow id. Sets 'i' to be the index of the first + * entry in the cls_flows table for each different flow_id. + * This relies on entries having the same flow_id in the cls_flows table being + * contiguous. + */ +#define for_each_cls_flow_id(i) \ + for ((i) = 0; (i) < MVPP2_N_PRS_FLOWS; (i)++) \ + if ((i) > 0 && \ + cls_flows[(i)].flow_id == cls_flows[(i) - 1].flow_id) \ + continue; \ + else + +/* Iterate on each classifier flow that has a given flow_type. Sets 'i' to be + * the index of the first entry in the cls_flow table for each different flow_id + * that has the given flow_type. This allows to operate on all flows that + * matches a given ethtool flow type. + */ +#define for_each_cls_flow_id_with_type(i, type) \ + for_each_cls_flow_id((i)) \ + if (cls_flows[(i)].flow_type != (type)) \ + continue; \ + else -#define MVPP2_ENTRIES_PER_FLOW (MVPP2_MAX_PORTS + 1) -#define MVPP2_FLOW_C2_ENTRY(id) ((id) * MVPP2_ENTRIES_PER_FLOW) -#define MVPP2_PORT_FLOW_HASH_ENTRY(port, id) ((id) * MVPP2_ENTRIES_PER_FLOW + \ - (port) + 1) struct mvpp2_cls_flow_entry { u32 index; u32 data[MVPP2_CLS_FLOWS_TBL_DATA_WORDS]; @@ -194,11 +211,10 @@ struct mvpp2_cls_lookup_entry { }; void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table); +void mvpp22_port_rss_init(struct mvpp2_port *port); -void mvpp22_rss_port_init(struct mvpp2_port *port); - -void mvpp22_rss_enable(struct mvpp2_port *port); -void mvpp22_rss_disable(struct mvpp2_port *port); +void mvpp22_port_rss_enable(struct mvpp2_port *port); +void mvpp22_port_rss_disable(struct mvpp2_port *port); int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info); int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info); @@ -213,7 +229,7 @@ int mvpp2_cls_flow_eng_get(struct mvpp2_cls_flow_entry *fe); u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe); -struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow); +const struct mvpp2_cls_flow *mvpp2_cls_flow_get(int flow); u32 mvpp2_cls_flow_hits(struct mvpp2 *priv, int index); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c index f9744a61e5dd..0ee39ea47b6b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c @@ -18,22 +18,48 @@ struct mvpp2_dbgfs_prs_entry { struct mvpp2 *priv; }; +struct mvpp2_dbgfs_c2_entry { + int id; + struct mvpp2 *priv; +}; + struct mvpp2_dbgfs_flow_entry { int flow; struct mvpp2 *priv; }; +struct mvpp2_dbgfs_flow_tbl_entry { + int id; + struct mvpp2 *priv; +}; + struct mvpp2_dbgfs_port_flow_entry { struct mvpp2_port *port; struct mvpp2_dbgfs_flow_entry *dbg_fe; }; +struct mvpp2_dbgfs_entries { + /* Entries for Header Parser debug info */ + struct mvpp2_dbgfs_prs_entry prs_entries[MVPP2_PRS_TCAM_SRAM_SIZE]; + + /* Entries for Classifier C2 engine debug info */ + struct mvpp2_dbgfs_c2_entry c2_entries[MVPP22_CLS_C2_N_ENTRIES]; + + /* Entries for Classifier Flow Table debug info */ + struct mvpp2_dbgfs_flow_tbl_entry flt_entries[MVPP2_CLS_FLOWS_TBL_SIZE]; + + /* Entries for Classifier flows debug info */ + struct mvpp2_dbgfs_flow_entry flow_entries[MVPP2_N_PRS_FLOWS]; + + /* Entries for per-port flows debug info */ + struct mvpp2_dbgfs_port_flow_entry port_flow_entries[MVPP2_MAX_PORTS]; +}; + static int mvpp2_dbgfs_flow_flt_hits_show(struct seq_file *s, void *unused) { - struct mvpp2_dbgfs_flow_entry *entry = s->private; - int id = MVPP2_FLOW_C2_ENTRY(entry->flow); + struct mvpp2_dbgfs_flow_tbl_entry *entry = s->private; - u32 hits = mvpp2_cls_flow_hits(entry->priv, id); + u32 hits = mvpp2_cls_flow_hits(entry->priv, entry->id); seq_printf(s, "%u\n", hits); @@ -58,7 +84,7 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_dec_hits); static int mvpp2_dbgfs_flow_type_show(struct seq_file *s, void *unused) { struct mvpp2_dbgfs_flow_entry *entry = s->private; - struct mvpp2_cls_flow *f; + const struct mvpp2_cls_flow *f; const char *flow_name; f = mvpp2_cls_flow_get(entry->flow); @@ -93,30 +119,12 @@ static int mvpp2_dbgfs_flow_type_show(struct seq_file *s, void *unused) return 0; } -static int mvpp2_dbgfs_flow_type_open(struct inode *inode, struct file *file) -{ - return single_open(file, mvpp2_dbgfs_flow_type_show, inode->i_private); -} - -static int mvpp2_dbgfs_flow_type_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct mvpp2_dbgfs_flow_entry *flow_entry = seq->private; - - kfree(flow_entry); - return single_release(inode, file); -} - -static const struct file_operations mvpp2_dbgfs_flow_type_fops = { - .open = mvpp2_dbgfs_flow_type_open, - .read = seq_read, - .release = mvpp2_dbgfs_flow_type_release, -}; +DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_type); static int mvpp2_dbgfs_flow_id_show(struct seq_file *s, void *unused) { - struct mvpp2_dbgfs_flow_entry *entry = s->private; - struct mvpp2_cls_flow *f; + const struct mvpp2_dbgfs_flow_entry *entry = s->private; + const struct mvpp2_cls_flow *f; f = mvpp2_cls_flow_get(entry->flow); if (!f) @@ -134,7 +142,7 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused) struct mvpp2_dbgfs_port_flow_entry *entry = s->private; struct mvpp2_port *port = entry->port; struct mvpp2_cls_flow_entry fe; - struct mvpp2_cls_flow *f; + const struct mvpp2_cls_flow *f; int flow_index; u16 hash_opts; @@ -142,7 +150,7 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused) if (!f) return -EINVAL; - flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id); + flow_index = MVPP2_CLS_FLT_HASH_ENTRY(entry->port->id, f->flow_id); mvpp2_cls_flow_read(port->priv, flow_index, &fe); @@ -153,42 +161,21 @@ static int mvpp2_dbgfs_port_flow_hash_opt_show(struct seq_file *s, void *unused) return 0; } -static int mvpp2_dbgfs_port_flow_hash_opt_open(struct inode *inode, - struct file *file) -{ - return single_open(file, mvpp2_dbgfs_port_flow_hash_opt_show, - inode->i_private); -} - -static int mvpp2_dbgfs_port_flow_hash_opt_release(struct inode *inode, - struct file *file) -{ - struct seq_file *seq = file->private_data; - struct mvpp2_dbgfs_port_flow_entry *flow_entry = seq->private; - - kfree(flow_entry); - return single_release(inode, file); -} - -static const struct file_operations mvpp2_dbgfs_port_flow_hash_opt_fops = { - .open = mvpp2_dbgfs_port_flow_hash_opt_open, - .read = seq_read, - .release = mvpp2_dbgfs_port_flow_hash_opt_release, -}; +DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_flow_hash_opt); static int mvpp2_dbgfs_port_flow_engine_show(struct seq_file *s, void *unused) { struct mvpp2_dbgfs_port_flow_entry *entry = s->private; struct mvpp2_port *port = entry->port; struct mvpp2_cls_flow_entry fe; - struct mvpp2_cls_flow *f; + const struct mvpp2_cls_flow *f; int flow_index, engine; f = mvpp2_cls_flow_get(entry->dbg_fe->flow); if (!f) return -EINVAL; - flow_index = MVPP2_PORT_FLOW_HASH_ENTRY(entry->port->id, f->flow_id); + flow_index = MVPP2_CLS_FLT_HASH_ENTRY(entry->port->id, f->flow_id); mvpp2_cls_flow_read(port->priv, flow_index, &fe); @@ -203,11 +190,10 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_port_flow_engine); static int mvpp2_dbgfs_flow_c2_hits_show(struct seq_file *s, void *unused) { - struct mvpp2_port *port = s->private; + struct mvpp2_dbgfs_c2_entry *entry = s->private; u32 hits; - hits = mvpp2_cls_c2_hit_count(port->priv, - MVPP22_CLS_C2_RSS_ENTRY(port->id)); + hits = mvpp2_cls_c2_hit_count(entry->priv, entry->id); seq_printf(s, "%u\n", hits); @@ -218,11 +204,11 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_hits); static int mvpp2_dbgfs_flow_c2_rxq_show(struct seq_file *s, void *unused) { - struct mvpp2_port *port = s->private; + struct mvpp2_dbgfs_c2_entry *entry = s->private; struct mvpp2_cls_c2_entry c2; u8 qh, ql; - mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2); + mvpp2_cls_c2_read(entry->priv, entry->id, &c2); qh = (c2.attr[0] >> MVPP22_CLS_C2_ATTR0_QHIGH_OFFS) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK; @@ -239,11 +225,11 @@ DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_flow_c2_rxq); static int mvpp2_dbgfs_flow_c2_enable_show(struct seq_file *s, void *unused) { - struct mvpp2_port *port = s->private; + struct mvpp2_dbgfs_c2_entry *entry = s->private; struct mvpp2_cls_c2_entry c2; int enabled; - mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2); + mvpp2_cls_c2_read(entry->priv, entry->id, &c2); enabled = !!(c2.attr[2] & MVPP22_CLS_C2_ATTR2_RSS_EN); @@ -456,25 +442,7 @@ static int mvpp2_dbgfs_prs_valid_show(struct seq_file *s, void *unused) return 0; } -static int mvpp2_dbgfs_prs_valid_open(struct inode *inode, struct file *file) -{ - return single_open(file, mvpp2_dbgfs_prs_valid_show, inode->i_private); -} - -static int mvpp2_dbgfs_prs_valid_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct mvpp2_dbgfs_prs_entry *entry = seq->private; - - kfree(entry); - return single_release(inode, file); -} - -static const struct file_operations mvpp2_dbgfs_prs_valid_fops = { - .open = mvpp2_dbgfs_prs_valid_open, - .read = seq_read, - .release = mvpp2_dbgfs_prs_valid_release, -}; +DEFINE_SHOW_ATTRIBUTE(mvpp2_dbgfs_prs_valid); static int mvpp2_dbgfs_flow_port_init(struct dentry *parent, struct mvpp2_port *port, @@ -487,10 +455,7 @@ static int mvpp2_dbgfs_flow_port_init(struct dentry *parent, if (IS_ERR(port_dir)) return PTR_ERR(port_dir); - /* This will be freed by 'hash_opts' release op */ - port_entry = kmalloc(sizeof(*port_entry), GFP_KERNEL); - if (!port_entry) - return -ENOMEM; + port_entry = &port->priv->dbgfs_entries->port_flow_entries[port->id]; port_entry->port = port; port_entry->dbg_fe = entry; @@ -518,17 +483,11 @@ static int mvpp2_dbgfs_flow_entry_init(struct dentry *parent, if (!flow_entry_dir) return -ENOMEM; - /* This will be freed by 'type' release op */ - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; + entry = &priv->dbgfs_entries->flow_entries[flow]; entry->flow = flow; entry->priv = priv; - debugfs_create_file("flow_hits", 0444, flow_entry_dir, entry, - &mvpp2_dbgfs_flow_flt_hits_fops); - debugfs_create_file("dec_hits", 0444, flow_entry_dir, entry, &mvpp2_dbgfs_flow_dec_hits_fops); @@ -545,6 +504,7 @@ static int mvpp2_dbgfs_flow_entry_init(struct dentry *parent, if (ret) return ret; } + return 0; } @@ -557,7 +517,7 @@ static int mvpp2_dbgfs_flow_init(struct dentry *parent, struct mvpp2 *priv) if (!flow_dir) return -ENOMEM; - for (i = 0; i < MVPP2_N_FLOWS; i++) { + for (i = 0; i < MVPP2_N_PRS_FLOWS; i++) { ret = mvpp2_dbgfs_flow_entry_init(flow_dir, priv, i); if (ret) return ret; @@ -582,10 +542,7 @@ static int mvpp2_dbgfs_prs_entry_init(struct dentry *parent, if (!prs_entry_dir) return -ENOMEM; - /* The 'valid' entry's ops will free that */ - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; + entry = &priv->dbgfs_entries->prs_entries[tid]; entry->tid = tid; entry->priv = priv; @@ -630,6 +587,98 @@ static int mvpp2_dbgfs_prs_init(struct dentry *parent, struct mvpp2 *priv) return 0; } +static int mvpp2_dbgfs_c2_entry_init(struct dentry *parent, + struct mvpp2 *priv, int id) +{ + struct mvpp2_dbgfs_c2_entry *entry; + struct dentry *c2_entry_dir; + char c2_entry_name[10]; + + if (id >= MVPP22_CLS_C2_N_ENTRIES) + return -EINVAL; + + sprintf(c2_entry_name, "%03d", id); + + c2_entry_dir = debugfs_create_dir(c2_entry_name, parent); + if (!c2_entry_dir) + return -ENOMEM; + + entry = &priv->dbgfs_entries->c2_entries[id]; + + entry->id = id; + entry->priv = priv; + + debugfs_create_file("hits", 0444, c2_entry_dir, entry, + &mvpp2_dbgfs_flow_c2_hits_fops); + + debugfs_create_file("default_rxq", 0444, c2_entry_dir, entry, + &mvpp2_dbgfs_flow_c2_rxq_fops); + + debugfs_create_file("rss_enable", 0444, c2_entry_dir, entry, + &mvpp2_dbgfs_flow_c2_enable_fops); + + return 0; +} + +static int mvpp2_dbgfs_flow_tbl_entry_init(struct dentry *parent, + struct mvpp2 *priv, int id) +{ + struct mvpp2_dbgfs_flow_tbl_entry *entry; + struct dentry *flow_tbl_entry_dir; + char flow_tbl_entry_name[10]; + + if (id >= MVPP2_CLS_FLOWS_TBL_SIZE) + return -EINVAL; + + sprintf(flow_tbl_entry_name, "%03d", id); + + flow_tbl_entry_dir = debugfs_create_dir(flow_tbl_entry_name, parent); + if (!flow_tbl_entry_dir) + return -ENOMEM; + + entry = &priv->dbgfs_entries->flt_entries[id]; + + entry->id = id; + entry->priv = priv; + + debugfs_create_file("hits", 0444, flow_tbl_entry_dir, entry, + &mvpp2_dbgfs_flow_flt_hits_fops); + + return 0; +} + +static int mvpp2_dbgfs_cls_init(struct dentry *parent, struct mvpp2 *priv) +{ + struct dentry *cls_dir, *c2_dir, *flow_tbl_dir; + int i, ret; + + cls_dir = debugfs_create_dir("classifier", parent); + if (!cls_dir) + return -ENOMEM; + + c2_dir = debugfs_create_dir("c2", cls_dir); + if (!c2_dir) + return -ENOMEM; + + for (i = 0; i < MVPP22_CLS_C2_N_ENTRIES; i++) { + ret = mvpp2_dbgfs_c2_entry_init(c2_dir, priv, i); + if (ret) + return ret; + } + + flow_tbl_dir = debugfs_create_dir("flow_table", cls_dir); + if (!flow_tbl_dir) + return -ENOMEM; + + for (i = 0; i < MVPP2_CLS_FLOWS_TBL_SIZE; i++) { + ret = mvpp2_dbgfs_flow_tbl_entry_init(flow_tbl_dir, priv, i); + if (ret) + return ret; + } + + return 0; +} + static int mvpp2_dbgfs_port_init(struct dentry *parent, struct mvpp2_port *port) { @@ -648,21 +697,14 @@ static int mvpp2_dbgfs_port_init(struct dentry *parent, debugfs_create_file("vid_filter", 0444, port_dir, port, &mvpp2_dbgfs_port_vid_fops); - debugfs_create_file("c2_hits", 0444, port_dir, port, - &mvpp2_dbgfs_flow_c2_hits_fops); - - debugfs_create_file("default_rxq", 0444, port_dir, port, - &mvpp2_dbgfs_flow_c2_rxq_fops); - - debugfs_create_file("rss_enable", 0444, port_dir, port, - &mvpp2_dbgfs_flow_c2_enable_fops); - return 0; } void mvpp2_dbgfs_cleanup(struct mvpp2 *priv) { debugfs_remove_recursive(priv->dbgfs_dir); + + kfree(priv->dbgfs_entries); } void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name) @@ -682,11 +724,18 @@ void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name) return; priv->dbgfs_dir = mvpp2_dir; + priv->dbgfs_entries = kzalloc(sizeof(*priv->dbgfs_entries), GFP_KERNEL); + if (!priv->dbgfs_entries) + goto err; ret = mvpp2_dbgfs_prs_init(mvpp2_dir, priv); if (ret) goto err; + ret = mvpp2_dbgfs_cls_init(mvpp2_dir, priv); + if (ret) + goto err; + for (i = 0; i < priv->port_count; i++) { ret = mvpp2_dbgfs_port_init(mvpp2_dir, priv->port_list[i]); if (ret) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 25fbed2b8d94..f128ea22b339 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3741,9 +3741,9 @@ static int mvpp2_set_features(struct net_device *dev, if (changed & NETIF_F_RXHASH) { if (features & NETIF_F_RXHASH) - mvpp22_rss_enable(port); + mvpp22_port_rss_enable(port); else - mvpp22_rss_disable(port); + mvpp22_port_rss_disable(port); } return 0; @@ -4301,7 +4301,7 @@ static int mvpp2_port_init(struct mvpp2_port *port) mvpp2_cls_port_config(port); if (mvpp22_rss_is_supported()) - mvpp22_rss_port_init(port); + mvpp22_port_rss_init(port); /* Provide an initial Rx packet size */ port->pkt_size = MVPP2_RX_PKT_SIZE(port->dev->mtu); @@ -4848,6 +4848,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, struct mvpp2_port *port; struct mvpp2_port_pcpu *port_pcpu; struct device_node *port_node = to_of_node(port_fwnode); + netdev_features_t features; struct net_device *dev; struct resource *res; struct phylink *phylink; @@ -4856,7 +4857,6 @@ static int mvpp2_port_probe(struct platform_device *pdev, unsigned long flags = 0; bool has_tx_irqs; u32 id; - int features; int phy_mode; int err, i; diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index ff8057ed97ee..8491db57b0b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -26,6 +26,7 @@ config MLX4_EN_DCB config MLX4_CORE tristate depends on PCI + select NET_DEVLINK default n config MLX4_DEBUG diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 2cbd2bd7c67c..fba54fb06e18 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -685,16 +685,15 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, } u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct mlx4_en_priv *priv = netdev_priv(dev); u16 rings_p_up = priv->num_tx_rings_p_up; if (netdev_get_num_tc(dev)) - return fallback(dev, skb, NULL); + return netdev_pick_tx(dev, skb, NULL); - return fallback(dev, skb, NULL) % rings_p_up; + return netdev_pick_tx(dev, skb, NULL) % rings_p_up; } static void mlx4_bf_copy(void __iomem *dst, const void *src, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 8137454e2534..630f15977f09 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -698,8 +698,7 @@ void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 6debffb8336b..9aca8086ee01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -5,6 +5,7 @@ config MLX5_CORE tristate "Mellanox 5th generation network adapters (ConnectX series) core driver" depends on PCI + select NET_DEVLINK imply PTP_1588_CLOCK imply VXLAN default n diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 71c65cc17904..9e71cf03369c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -769,8 +769,7 @@ struct mlx5e_profile { void mlx5e_build_ptys2ethtool_map(void); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe *wqe, u16 pi); @@ -885,6 +884,53 @@ static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); } +static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_ETH(mdev, swp) && + MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso); +} + +struct mlx5e_swp_spec { + __be16 l3_proto; + u8 l4_proto; + u8 is_tun; + __be16 tun_l3_proto; + u8 tun_l4_proto; +}; + +static inline void +mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, + struct mlx5e_swp_spec *swp_spec) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + if (swp_spec->l4_proto) { + eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2; + if (swp_spec->l4_proto == IPPROTO_UDP) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + } + + if (swp_spec->is_tun) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */ + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } + switch (swp_spec->tun_l4_proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + /* fall through */ + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe **wqe, u16 *pi) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index eac245a93f91..b0ce68feb0f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -165,23 +165,23 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } /** - * update_buffer_lossy() - * mtu: device's MTU - * pfc_en: <input> current pfc configuration - * buffer: <input> current prio to buffer mapping - * xoff: <input> xoff value - * port_buffer: <output> port receive buffer configuration - * change: <output> + * update_buffer_lossy - Update buffer configuration based on pfc + * @mtu: device's MTU + * @pfc_en: <input> current pfc configuration + * @buffer: <input> current prio to buffer mapping + * @xoff: <input> xoff value + * @port_buffer: <output> port receive buffer configuration + * @change: <output> * - * Update buffer configuration based on pfc configuraiton and priority - * to buffer mapping. - * Buffer's lossy bit is changed to: - * lossless if there is at least one PFC enabled priority mapped to this buffer - * lossy if all priorities mapped to this buffer are PFC disabled + * Update buffer configuration based on pfc configuraiton and + * priority to buffer mapping. + * Buffer's lossy bit is changed to: + * lossless if there is at least one PFC enabled priority + * mapped to this buffer lossy if all priorities mapped to + * this buffer are PFC disabled * - * Return: - * Return 0 if no error. - * Set change to true if buffer configuration is modified. + * @return: 0 if no error, + * sets change to true if buffer configuration was modified. */ static int update_buffer_lossy(unsigned int mtu, u8 pfc_en, u8 *buffer, u32 xoff, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 1dd225380a66..6da7c88742dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -40,6 +40,57 @@ #include "en_accel/tls_rxtx.h" #include "en.h" +#if IS_ENABLED(CONFIG_GENEVE) +static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) +{ + return mlx5_tx_swp_supported(mdev); +} + +static inline void +mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) +{ + struct mlx5e_swp_spec swp_spec = {}; + unsigned int offset = 0; + __be16 l3_proto; + u8 l4_proto; + + l3_proto = vlan_get_protocol(skb); + switch (l3_proto) { + case htons(ETH_P_IP): + l4_proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); + break; + default: + return; + } + + if (l4_proto != IPPROTO_UDP || + udp_hdr(skb)->dest != cpu_to_be16(GENEVE_UDP_PORT)) + return; + swp_spec.l3_proto = l3_proto; + swp_spec.l4_proto = l4_proto; + swp_spec.is_tun = true; + if (inner_ip_hdr(skb)->version == 6) { + swp_spec.tun_l3_proto = htons(ETH_P_IPV6); + swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr; + } else { + swp_spec.tun_l3_proto = htons(ETH_P_IP); + swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol; + } + + mlx5e_set_eseg_swp(skb, eseg, &swp_spec); +} + +#else +static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) +{ + return false; +} + +#endif /* CONFIG_GENEVE */ + static inline void mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 53608afd39b6..0dd17514caae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -136,7 +136,7 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u8 mode, struct xfrm_offload *xo) { - u8 proto; + struct mlx5e_swp_spec swp_spec = {}; /* Tunnel Mode: * SWP: OutL3 InL3 InL4 @@ -146,35 +146,23 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, * SWP: OutL3 InL4 * InL3 * Pkt: MAC IP ESP L4 - * - * Offsets are in 2-byte words, counting from start of frame */ - eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; - - if (mode == XFRM_MODE_TUNNEL) { - eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + swp_spec.l3_proto = skb->protocol; + swp_spec.is_tun = mode == XFRM_MODE_TUNNEL; + if (swp_spec.is_tun) { if (xo->proto == IPPROTO_IPV6) { - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - proto = inner_ipv6_hdr(skb)->nexthdr; + swp_spec.tun_l3_proto = htons(ETH_P_IPV6); + swp_spec.tun_l4_proto = inner_ipv6_hdr(skb)->nexthdr; } else { - proto = inner_ip_hdr(skb)->protocol; + swp_spec.tun_l3_proto = htons(ETH_P_IP); + swp_spec.tun_l4_proto = inner_ip_hdr(skb)->protocol; } } else { - eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - proto = xo->proto; - } - switch (proto) { - case IPPROTO_UDP: - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; - /* Fall through */ - case IPPROTO_TCP: - eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; - break; + swp_spec.tun_l3_proto = skb->protocol; + swp_spec.tun_l4_proto = xo->proto; } + + mlx5e_set_eseg_swp(skb, eseg, &swp_spec); } void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b5fdbd3190d9..e08a1eb04e22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -34,6 +34,7 @@ #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> +#include <net/geneve.h> #include <linux/bpf.h> #include <linux/if_bridge.h> #include <net/page_pool.h> @@ -43,6 +44,7 @@ #include "en_rep.h" #include "en_accel/ipsec.h" #include "en_accel/ipsec_rxtx.h" +#include "en_accel/en_accel.h" #include "en_accel/tls.h" #include "accel/ipsec.h" #include "accel/tls.h" @@ -2173,10 +2175,13 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + bool allow_swp; + allow_swp = mlx5_geneve_tx_allowed(priv->mdev) || + !!MLX5_IPSEC_DEV(priv->mdev); mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev)); + MLX5_SET(sqc, sqc, allow_swp, allow_swp); } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -4103,6 +4108,12 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, /* Verify if UDP port is being offloaded by HW */ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) return features; + +#if IS_ENABLED(CONFIG_GENEVE) + /* Support Geneve offload for default UDP port */ + if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev)) + return features; +#endif } out: @@ -4674,7 +4685,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; - if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) || + MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { netdev->hw_enc_features |= NETIF_F_IP_CSUM; netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; @@ -4682,7 +4694,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; } - if (mlx5_vxlan_allowed(mdev->vxlan)) { + if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b4967a0ff8c7..c68edcc84af8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1827,6 +1827,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct pedit_headers { struct ethhdr eth; + struct vlan_hdr vlan; struct iphdr ip4; struct ipv6hdr ip6; struct tcphdr tcp; @@ -1884,6 +1885,7 @@ static struct mlx5_fields fields[] = { OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0), OFFLOAD(SMAC_15_0, 2, eth.h_source[4], 0), OFFLOAD(ETHERTYPE, 2, eth.h_proto, 0), + OFFLOAD(FIRST_VID, 2, vlan.h_vlan_TCI, 0), OFFLOAD(IP_TTL, 1, ip4.ttl, 0), OFFLOAD(SIPV4, 4, ip4.saddr, 0), @@ -2247,6 +2249,35 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) return (fsystem_guid == psystem_guid); } +static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + u32 *action, struct netlink_ext_ack *extack) +{ + u16 mask16 = VLAN_VID_MASK; + u16 val16 = act->vlan.vid & VLAN_VID_MASK; + const struct flow_action_entry pedit_act = { + .id = FLOW_ACTION_MANGLE, + .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, + .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), + .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), + .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), + }; + int err; + + if (act->vlan.prio) { + NL_SET_ERR_MSG_MOD(extack, "Setting VLAN prio is not supported"); + return -EOPNOTSUPP; + } + + err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, + hdrs, NULL); + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return err; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -2282,6 +2313,15 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; break; + case FLOW_ACTION_VLAN_MANGLE: + err = add_vlan_rewrite_action(priv, + MLX5_FLOW_NAMESPACE_KERNEL, + act, parse_attr, hdrs, + &action, extack); + if (err) + return err; + + break; case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, act->csum_flags, @@ -2490,8 +2530,7 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, } break; default: - /* action is FLOW_ACT_VLAN_MANGLE */ - return -EOPNOTSUPP; + return -EINVAL; } attr->total_vlan = vlan_idx + 1; @@ -2625,7 +2664,27 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, break; case FLOW_ACTION_VLAN_PUSH: case FLOW_ACTION_VLAN_POP: - err = parse_tc_vlan_action(priv, act, attr, &action); + if (act->id == FLOW_ACTION_VLAN_PUSH && + (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { + /* Replace vlan pop+push with vlan modify */ + action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = add_vlan_rewrite_action(priv, + MLX5_FLOW_NAMESPACE_FDB, + act, parse_attr, hdrs, + &action, extack); + } else { + err = parse_tc_vlan_action(priv, act, attr, &action); + } + if (err) + return err; + + attr->split_count = attr->out_count; + break; + case FLOW_ACTION_VLAN_MANGLE: + err = add_vlan_rewrite_action(priv, + MLX5_FLOW_NAMESPACE_FDB, + act, parse_attr, hdrs, + &action, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 25a8f8260c14..41e2a01d3713 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -32,6 +32,7 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> +#include <net/geneve.h> #include <net/dsfield.h> #include "en.h" #include "ipoib/ipoib.h" @@ -110,11 +111,10 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb #endif u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { + int channel_ix = netdev_pick_tx(dev, skb, NULL); struct mlx5e_priv *priv = netdev_priv(dev); - int channel_ix = fallback(dev, skb, NULL); u16 num_channels; int up = 0; @@ -392,6 +392,10 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg = &wqe->eth; dseg = wqe->data; +#if IS_ENABLED(CONFIG_GENEVE) + if (skb->encapsulation) + mlx5e_tx_tunnel_accel(skb, eseg); +#endif mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); eseg->mss = mss; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index bb6e5b5d9681..46a747f7c162 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -900,14 +900,12 @@ mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); +#ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { -#ifdef CONFIG_RFS_ACCEL return dev->priv.eq_table->rmap; -#else - return NULL; -#endif } +#endif struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3f3cd32ae60a..e0ba59b5296f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -431,6 +431,9 @@ static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, return index; } +/* TODO: This mlx5e_tc function shouldn't be called by eswitch */ +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f2260391be5b..6c72f33f6d09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1287,13 +1287,13 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) int esw_offloads_init_reps(struct mlx5_eswitch *esw) { - int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); + int total_vports = MLX5_TOTAL_VPORTS(esw->dev); struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; u8 hw_id[ETH_ALEN], rep_type; int vport; - esw->offloads.vport_reps = kcalloc(total_vfs, + esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), GFP_KERNEL); if (!esw->offloads.vport_reps) @@ -1523,8 +1523,6 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, return 0; } -void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); - static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) { mlx5e_tc_clean_fdb_peer_flows(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c index 40f4a19b1ce1..be69c1d7941a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -80,10 +80,8 @@ void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, mlx5_query_port_tun_entropy(mdev, &entropy_flags); tun_entropy->num_enabling_entries = 0; tun_entropy->num_disabling_entries = 0; - tun_entropy->enabled = entropy_flags.calc_enabled; - tun_entropy->enabled = - (entropy_flags.calc_supported) ? - entropy_flags.calc_enabled : true; + tun_entropy->enabled = entropy_flags.calc_supported ? + entropy_flags.calc_enabled : true; } static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c index 9a8fd762167b..b9d4f4e19ff9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mlx5/driver.h> +#include <net/vxlan.h> #include "mlx5_core.h" #include "vxlan.h" @@ -204,8 +205,8 @@ struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev) spin_lock_init(&vxlan->lock); hash_init(vxlan->htable); - /* Hardware adds 4789 by default */ - mlx5_vxlan_add_port(vxlan, 4789); + /* Hardware adds 4789 (IANA_VXLAN_UDP_PORT) by default */ + mlx5_vxlan_add_port(vxlan, IANA_VXLAN_UDP_PORT); return vxlan; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7b331674622c..6fb99be60584 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -111,7 +111,6 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); -bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, @@ -176,6 +175,11 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw); void mlx5e_init(void); void mlx5e_cleanup(void); +static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) +{ + return pci_num_vf(dev->pdev) ? true : false; +} + static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) { /* LACP owner conditions: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 7b23fa8d2d60..a249b3c3843d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -36,13 +36,6 @@ #include "mlx5_core.h" #include "eswitch.h" -bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) -{ - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - - return !!sriov->num_vfs; -} - static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -151,33 +144,10 @@ out: mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } -static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err = 0; - - if (pci_num_vf(pdev)) { - mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n"); - return -EBUSY; - } - - err = pci_enable_sriov(pdev, num_vfs); - if (err) - mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); - - return err; -} - -static void mlx5_pci_disable_sriov(struct pci_dev *pdev) -{ - pci_disable_sriov(pdev); -} - static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err = 0; + int err; err = mlx5_device_enable_sriov(dev, num_vfs); if (err) { @@ -185,42 +155,37 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) return err; } - err = mlx5_pci_enable_sriov(pdev, num_vfs); + err = pci_enable_sriov(pdev, num_vfs); if (err) { - mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err); + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); mlx5_device_disable_sriov(dev); - return err; } - - sriov->num_vfs = num_vfs; - - return 0; + return err; } static void mlx5_sriov_disable(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - mlx5_pci_disable_sriov(pdev); + pci_disable_sriov(pdev); mlx5_device_disable_sriov(dev); - sriov->num_vfs = 0; } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err = 0; mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); - if (!mlx5_core_is_pf(dev)) - return -EPERM; if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); else mlx5_sriov_disable(pdev); + if (!err) + sriov->num_vfs = num_vfs; return err ? err : num_vfs; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 9c195dfed031..b6b3ff0fe17f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -4,6 +4,7 @@ config MLXSW_CORE tristate "Mellanox Technologies Switch ASICs support" + select NET_DEVLINK ---help--- This driver supports Mellanox Technologies Switch ASICs family. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index d23d53c0e284..e70bb673eeec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1718,7 +1718,9 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_res_get); -int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port) +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, + u32 port_number, bool split, + u32 split_port_subnumber) { struct devlink *devlink = priv_to_devlink(mlxsw_core); struct mlxsw_core_port *mlxsw_core_port = @@ -1727,6 +1729,8 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port) int err; mlxsw_core_port->local_port = local_port; + devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_number, split, split_port_subnumber); err = devlink_port_register(devlink, devlink_port, local_port); if (err) memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); @@ -1746,17 +1750,13 @@ void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) EXPORT_SYMBOL(mlxsw_core_port_fini); void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, - void *port_driver_priv, struct net_device *dev, - u32 port_number, bool split, - u32 split_port_subnumber) + void *port_driver_priv, struct net_device *dev) { struct mlxsw_core_port *mlxsw_core_port = &mlxsw_core->ports[local_port]; struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; mlxsw_core_port->port_driver_priv = port_driver_priv; - devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, - port_number, split, split_port_subnumber); devlink_port_type_eth_set(devlink_port, dev); } EXPORT_SYMBOL(mlxsw_core_port_eth_set); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 8ec53f027575..74e95e943b24 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -164,12 +164,12 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 local_port); void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port); -int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port); +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, + u32 port_number, bool split, + u32 split_port_subnumber); void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port); void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port, - void *port_driver_priv, struct net_device *dev, - u32 port_number, bool split, - u32 split_port_subnumber); + void *port_driver_priv, struct net_device *dev); void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port, void *port_driver_priv); void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 00c390024350..0ee1656609f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -150,7 +150,8 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module) struct net_device *dev; int err; - err = mlxsw_core_port_init(mlxsw_m->core, local_port); + err = mlxsw_core_port_init(mlxsw_m->core, local_port, + module + 1, false, 0); if (err) { dev_err(mlxsw_m->bus_info->dev, "Port %d: Failed to init core port\n", local_port); @@ -190,7 +191,7 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module) } mlxsw_core_port_eth_set(mlxsw_m->core, mlxsw_m_port->local_port, - mlxsw_m_port, dev, module + 1, false, 0); + mlxsw_m_port, dev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9eb63300c1d3..eaf86c4c2f6c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3391,7 +3391,8 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, struct net_device *dev; int err; - err = mlxsw_core_port_init(mlxsw_sp->core, local_port); + err = mlxsw_core_port_init(mlxsw_sp->core, local_port, + module + 1, split, lane / width); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n", local_port); @@ -3573,8 +3574,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, } mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port, - mlxsw_sp_port, dev, module + 1, - mlxsw_sp_port->split, lane / width); + mlxsw_sp_port, dev); mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c index bcf2e79a21c8..e1e7e0dd808d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -267,7 +267,8 @@ static int mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port, { int err; - err = mlxsw_core_port_init(mlxsw_sib->core, local_port); + err = mlxsw_core_port_init(mlxsw_sib->core, local_port, + module + 1, false, 0); if (err) { dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to init core port\n", local_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 533fe6235b7c..568883fc40df 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1102,7 +1102,7 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, } mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port, - mlxsw_sx_port, dev, module + 1, false, 0); + mlxsw_sx_port, dev); mlxsw_sx->ports[local_port] = mlxsw_sx_port; return 0; @@ -1127,7 +1127,8 @@ static int mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, { int err; - err = mlxsw_core_port_init(mlxsw_sx->core, local_port); + err = mlxsw_core_port_init(mlxsw_sx->core, local_port, + module + 1, false, 0); if (err) { dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n", local_port); diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 8f72587b5a2c..0567e4f387a5 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Microchip ENC28J60 ethernet driver (MAC + PHY) * @@ -5,11 +6,6 @@ * Author: Claudio Lanconelli <lanconelli.claudio@eptar.com> * based on enc28j60.c written by David Anders for 2.4 kernel version * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * * $Id: enc28j60.c,v 1.22 2007/12/20 10:47:01 claudio Exp $ */ @@ -18,9 +14,9 @@ #include <linux/types.h> #include <linux/fcntl.h> #include <linux/interrupt.h> +#include <linux/property.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/init.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -28,7 +24,6 @@ #include <linux/skbuff.h> #include <linux/delay.h> #include <linux/spi/spi.h> -#include <linux/of_net.h> #include "enc28j60_hw.h" @@ -41,10 +36,11 @@ (NETIF_MSG_PROBE | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN | NETIF_MSG_LINK) /* Buffer size required for the largest SPI transfer (i.e., reading a - * frame). */ + * frame). + */ #define SPI_TRANSFER_BUF_LEN (4 + MAX_FRAMELEN) -#define TX_TIMEOUT (4 * HZ) +#define TX_TIMEOUT (4 * HZ) /* Max TX retries in case of collision as suggested by errata datasheet */ #define MAX_TX_RETRYCOUNT 16 @@ -83,11 +79,12 @@ static struct { /* * SPI read buffer - * wait for the SPI transfer and copy received data to destination + * Wait for the SPI transfer and copy received data to destination. */ static int spi_read_buf(struct enc28j60_net *priv, int len, u8 *data) { + struct device *dev = &priv->spi->dev; u8 *rx_buf = priv->spi_transfer_buf + 4; u8 *tx_buf = priv->spi_transfer_buf; struct spi_transfer tx = { @@ -113,8 +110,8 @@ spi_read_buf(struct enc28j60_net *priv, int len, u8 *data) ret = msg.status; } if (ret && netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() failed: ret = %d\n", - __func__, ret); + dev_printk(KERN_DEBUG, dev, "%s() failed: ret = %d\n", + __func__, ret); return ret; } @@ -122,9 +119,9 @@ spi_read_buf(struct enc28j60_net *priv, int len, u8 *data) /* * SPI write buffer */ -static int spi_write_buf(struct enc28j60_net *priv, int len, - const u8 *data) +static int spi_write_buf(struct enc28j60_net *priv, int len, const u8 *data) { + struct device *dev = &priv->spi->dev; int ret; if (len > SPI_TRANSFER_BUF_LEN - 1 || len <= 0) @@ -134,8 +131,8 @@ static int spi_write_buf(struct enc28j60_net *priv, int len, memcpy(&priv->spi_transfer_buf[1], data, len); ret = spi_write(priv->spi, priv->spi_transfer_buf, len + 1); if (ret && netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() failed: ret = %d\n", - __func__, ret); + dev_printk(KERN_DEBUG, dev, "%s() failed: ret = %d\n", + __func__, ret); } return ret; } @@ -143,9 +140,9 @@ static int spi_write_buf(struct enc28j60_net *priv, int len, /* * basic SPI read operation */ -static u8 spi_read_op(struct enc28j60_net *priv, u8 op, - u8 addr) +static u8 spi_read_op(struct enc28j60_net *priv, u8 op, u8 addr) { + struct device *dev = &priv->spi->dev; u8 tx_buf[2]; u8 rx_buf[4]; u8 val = 0; @@ -159,8 +156,8 @@ static u8 spi_read_op(struct enc28j60_net *priv, u8 op, tx_buf[0] = op | (addr & ADDR_MASK); ret = spi_write_then_read(priv->spi, tx_buf, 1, rx_buf, slen); if (ret) - printk(KERN_DEBUG DRV_NAME ": %s() failed: ret = %d\n", - __func__, ret); + dev_printk(KERN_DEBUG, dev, "%s() failed: ret = %d\n", + __func__, ret); else val = rx_buf[slen - 1]; @@ -170,28 +167,25 @@ static u8 spi_read_op(struct enc28j60_net *priv, u8 op, /* * basic SPI write operation */ -static int spi_write_op(struct enc28j60_net *priv, u8 op, - u8 addr, u8 val) +static int spi_write_op(struct enc28j60_net *priv, u8 op, u8 addr, u8 val) { + struct device *dev = &priv->spi->dev; int ret; priv->spi_transfer_buf[0] = op | (addr & ADDR_MASK); priv->spi_transfer_buf[1] = val; ret = spi_write(priv->spi, priv->spi_transfer_buf, 2); if (ret && netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() failed: ret = %d\n", - __func__, ret); + dev_printk(KERN_DEBUG, dev, "%s() failed: ret = %d\n", + __func__, ret); return ret; } static void enc28j60_soft_reset(struct enc28j60_net *priv) { - if (netif_msg_hw(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__); - spi_write_op(priv, ENC28J60_SOFT_RESET, 0, ENC28J60_SOFT_RESET); /* Errata workaround #1, CLKRDY check is unreliable, - * delay at least 1 mS instead */ + * delay at least 1 ms instead */ udelay(2000); } @@ -203,7 +197,7 @@ static void enc28j60_set_bank(struct enc28j60_net *priv, u8 addr) u8 b = (addr & BANK_MASK) >> 5; /* These registers (EIE, EIR, ESTAT, ECON2, ECON1) - * are present in all banks, no need to switch bank + * are present in all banks, no need to switch bank. */ if (addr >= EIE && addr <= ECON1) return; @@ -242,15 +236,13 @@ static void enc28j60_set_bank(struct enc28j60_net *priv, u8 addr) /* * Register bit field Set */ -static void nolock_reg_bfset(struct enc28j60_net *priv, - u8 addr, u8 mask) +static void nolock_reg_bfset(struct enc28j60_net *priv, u8 addr, u8 mask) { enc28j60_set_bank(priv, addr); spi_write_op(priv, ENC28J60_BIT_FIELD_SET, addr, mask); } -static void locked_reg_bfset(struct enc28j60_net *priv, - u8 addr, u8 mask) +static void locked_reg_bfset(struct enc28j60_net *priv, u8 addr, u8 mask) { mutex_lock(&priv->lock); nolock_reg_bfset(priv, addr, mask); @@ -260,15 +252,13 @@ static void locked_reg_bfset(struct enc28j60_net *priv, /* * Register bit field Clear */ -static void nolock_reg_bfclr(struct enc28j60_net *priv, - u8 addr, u8 mask) +static void nolock_reg_bfclr(struct enc28j60_net *priv, u8 addr, u8 mask) { enc28j60_set_bank(priv, addr); spi_write_op(priv, ENC28J60_BIT_FIELD_CLR, addr, mask); } -static void locked_reg_bfclr(struct enc28j60_net *priv, - u8 addr, u8 mask) +static void locked_reg_bfclr(struct enc28j60_net *priv, u8 addr, u8 mask) { mutex_lock(&priv->lock); nolock_reg_bfclr(priv, addr, mask); @@ -278,15 +268,13 @@ static void locked_reg_bfclr(struct enc28j60_net *priv, /* * Register byte read */ -static int nolock_regb_read(struct enc28j60_net *priv, - u8 address) +static int nolock_regb_read(struct enc28j60_net *priv, u8 address) { enc28j60_set_bank(priv, address); return spi_read_op(priv, ENC28J60_READ_CTRL_REG, address); } -static int locked_regb_read(struct enc28j60_net *priv, - u8 address) +static int locked_regb_read(struct enc28j60_net *priv, u8 address) { int ret; @@ -300,8 +288,7 @@ static int locked_regb_read(struct enc28j60_net *priv, /* * Register word read */ -static int nolock_regw_read(struct enc28j60_net *priv, - u8 address) +static int nolock_regw_read(struct enc28j60_net *priv, u8 address) { int rl, rh; @@ -312,8 +299,7 @@ static int nolock_regw_read(struct enc28j60_net *priv, return (rh << 8) | rl; } -static int locked_regw_read(struct enc28j60_net *priv, - u8 address) +static int locked_regw_read(struct enc28j60_net *priv, u8 address) { int ret; @@ -327,15 +313,13 @@ static int locked_regw_read(struct enc28j60_net *priv, /* * Register byte write */ -static void nolock_regb_write(struct enc28j60_net *priv, - u8 address, u8 data) +static void nolock_regb_write(struct enc28j60_net *priv, u8 address, u8 data) { enc28j60_set_bank(priv, address); spi_write_op(priv, ENC28J60_WRITE_CTRL_REG, address, data); } -static void locked_regb_write(struct enc28j60_net *priv, - u8 address, u8 data) +static void locked_regb_write(struct enc28j60_net *priv, u8 address, u8 data) { mutex_lock(&priv->lock); nolock_regb_write(priv, address, data); @@ -345,8 +329,7 @@ static void locked_regb_write(struct enc28j60_net *priv, /* * Register word write */ -static void nolock_regw_write(struct enc28j60_net *priv, - u8 address, u16 data) +static void nolock_regw_write(struct enc28j60_net *priv, u8 address, u16 data) { enc28j60_set_bank(priv, address); spi_write_op(priv, ENC28J60_WRITE_CTRL_REG, address, (u8) data); @@ -354,8 +337,7 @@ static void nolock_regw_write(struct enc28j60_net *priv, (u8) (data >> 8)); } -static void locked_regw_write(struct enc28j60_net *priv, - u8 address, u16 data) +static void locked_regw_write(struct enc28j60_net *priv, u8 address, u16 data) { mutex_lock(&priv->lock); nolock_regw_write(priv, address, data); @@ -364,20 +346,23 @@ static void locked_regw_write(struct enc28j60_net *priv, /* * Buffer memory read - * Select the starting address and execute a SPI buffer read + * Select the starting address and execute a SPI buffer read. */ -static void enc28j60_mem_read(struct enc28j60_net *priv, - u16 addr, int len, u8 *data) +static void enc28j60_mem_read(struct enc28j60_net *priv, u16 addr, int len, + u8 *data) { mutex_lock(&priv->lock); nolock_regw_write(priv, ERDPTL, addr); #ifdef CONFIG_ENC28J60_WRITEVERIFY if (netif_msg_drv(priv)) { + struct device *dev = &priv->spi->dev; u16 reg; + reg = nolock_regw_read(priv, ERDPTL); if (reg != addr) - printk(KERN_DEBUG DRV_NAME ": %s() error writing ERDPT " - "(0x%04x - 0x%04x)\n", __func__, reg, addr); + dev_printk(KERN_DEBUG, dev, + "%s() error writing ERDPT (0x%04x - 0x%04x)\n", + __func__, reg, addr); } #endif spi_read_buf(priv, len, data); @@ -390,6 +375,8 @@ static void enc28j60_mem_read(struct enc28j60_net *priv, static void enc28j60_packet_write(struct enc28j60_net *priv, int len, const u8 *data) { + struct device *dev = &priv->spi->dev; + mutex_lock(&priv->lock); /* Set the write pointer to start of transmit buffer area */ nolock_regw_write(priv, EWRPTL, TXSTART_INIT); @@ -398,9 +385,9 @@ enc28j60_packet_write(struct enc28j60_net *priv, int len, const u8 *data) u16 reg; reg = nolock_regw_read(priv, EWRPTL); if (reg != TXSTART_INIT) - printk(KERN_DEBUG DRV_NAME - ": %s() ERWPT:0x%04x != 0x%04x\n", - __func__, reg, TXSTART_INIT); + dev_printk(KERN_DEBUG, dev, + "%s() ERWPT:0x%04x != 0x%04x\n", + __func__, reg, TXSTART_INIT); } #endif /* Set the TXND pointer to correspond to the packet size given */ @@ -408,30 +395,28 @@ enc28j60_packet_write(struct enc28j60_net *priv, int len, const u8 *data) /* write per-packet control byte */ spi_write_op(priv, ENC28J60_WRITE_BUF_MEM, 0, 0x00); if (netif_msg_hw(priv)) - printk(KERN_DEBUG DRV_NAME - ": %s() after control byte ERWPT:0x%04x\n", - __func__, nolock_regw_read(priv, EWRPTL)); + dev_printk(KERN_DEBUG, dev, + "%s() after control byte ERWPT:0x%04x\n", + __func__, nolock_regw_read(priv, EWRPTL)); /* copy the packet into the transmit buffer */ spi_write_buf(priv, len, data); if (netif_msg_hw(priv)) - printk(KERN_DEBUG DRV_NAME - ": %s() after write packet ERWPT:0x%04x, len=%d\n", - __func__, nolock_regw_read(priv, EWRPTL), len); + dev_printk(KERN_DEBUG, dev, + "%s() after write packet ERWPT:0x%04x, len=%d\n", + __func__, nolock_regw_read(priv, EWRPTL), len); mutex_unlock(&priv->lock); } -static unsigned long msec20_to_jiffies; - static int poll_ready(struct enc28j60_net *priv, u8 reg, u8 mask, u8 val) { - unsigned long timeout = jiffies + msec20_to_jiffies; + struct device *dev = &priv->spi->dev; + unsigned long timeout = jiffies + msecs_to_jiffies(20); /* 20 msec timeout read */ while ((nolock_regb_read(priv, reg) & mask) != val) { if (time_after(jiffies, timeout)) { if (netif_msg_drv(priv)) - dev_dbg(&priv->spi->dev, - "reg %02x ready timeout!\n", reg); + dev_dbg(dev, "reg %02x ready timeout!\n", reg); return -ETIMEDOUT; } cpu_relax(); @@ -449,7 +434,7 @@ static int wait_phy_ready(struct enc28j60_net *priv) /* * PHY register read - * PHY registers are not accessed directly, but through the MII + * PHY registers are not accessed directly, but through the MII. */ static u16 enc28j60_phy_read(struct enc28j60_net *priv, u8 address) { @@ -465,7 +450,7 @@ static u16 enc28j60_phy_read(struct enc28j60_net *priv, u8 address) /* quit reading */ nolock_regb_write(priv, MICMD, 0x00); /* return the data */ - ret = nolock_regw_read(priv, MIRDL); + ret = nolock_regw_read(priv, MIRDL); mutex_unlock(&priv->lock); return ret; @@ -494,13 +479,13 @@ static int enc28j60_set_hw_macaddr(struct net_device *ndev) { int ret; struct enc28j60_net *priv = netdev_priv(ndev); + struct device *dev = &priv->spi->dev; mutex_lock(&priv->lock); if (!priv->hw_enable) { if (netif_msg_drv(priv)) - printk(KERN_INFO DRV_NAME - ": %s: Setting MAC address to %pM\n", - ndev->name, ndev->dev_addr); + dev_info(dev, "%s: Setting MAC address to %pM\n", + ndev->name, ndev->dev_addr); /* NOTE: MAC address in ENC28J60 is byte-backward */ nolock_regb_write(priv, MAADR5, ndev->dev_addr[0]); nolock_regb_write(priv, MAADR4, ndev->dev_addr[1]); @@ -511,9 +496,9 @@ static int enc28j60_set_hw_macaddr(struct net_device *ndev) ret = 0; } else { if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME - ": %s() Hardware must be disabled to set " - "Mac address\n", __func__); + dev_printk(KERN_DEBUG, dev, + "%s() Hardware must be disabled to set Mac address\n", + __func__); ret = -EBUSY; } mutex_unlock(&priv->lock); @@ -532,7 +517,7 @@ static int enc28j60_set_mac_address(struct net_device *dev, void *addr) if (!is_valid_ether_addr(address->sa_data)) return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, address->sa_data, dev->addr_len); + ether_addr_copy(dev->dev_addr, address->sa_data); return enc28j60_set_hw_macaddr(dev); } @@ -541,33 +526,36 @@ static int enc28j60_set_mac_address(struct net_device *dev, void *addr) */ static void enc28j60_dump_regs(struct enc28j60_net *priv, const char *msg) { + struct device *dev = &priv->spi->dev; + mutex_lock(&priv->lock); - printk(KERN_DEBUG DRV_NAME " %s\n" - "HwRevID: 0x%02x\n" - "Cntrl: ECON1 ECON2 ESTAT EIR EIE\n" - " 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n" - "MAC : MACON1 MACON3 MACON4\n" - " 0x%02x 0x%02x 0x%02x\n" - "Rx : ERXST ERXND ERXWRPT ERXRDPT ERXFCON EPKTCNT MAMXFL\n" - " 0x%04x 0x%04x 0x%04x 0x%04x " - "0x%02x 0x%02x 0x%04x\n" - "Tx : ETXST ETXND MACLCON1 MACLCON2 MAPHSUP\n" - " 0x%04x 0x%04x 0x%02x 0x%02x 0x%02x\n", - msg, nolock_regb_read(priv, EREVID), - nolock_regb_read(priv, ECON1), nolock_regb_read(priv, ECON2), - nolock_regb_read(priv, ESTAT), nolock_regb_read(priv, EIR), - nolock_regb_read(priv, EIE), nolock_regb_read(priv, MACON1), - nolock_regb_read(priv, MACON3), nolock_regb_read(priv, MACON4), - nolock_regw_read(priv, ERXSTL), nolock_regw_read(priv, ERXNDL), - nolock_regw_read(priv, ERXWRPTL), - nolock_regw_read(priv, ERXRDPTL), - nolock_regb_read(priv, ERXFCON), - nolock_regb_read(priv, EPKTCNT), - nolock_regw_read(priv, MAMXFLL), nolock_regw_read(priv, ETXSTL), - nolock_regw_read(priv, ETXNDL), - nolock_regb_read(priv, MACLCON1), - nolock_regb_read(priv, MACLCON2), - nolock_regb_read(priv, MAPHSUP)); + dev_printk(KERN_DEBUG, dev, + " %s\n" + "HwRevID: 0x%02x\n" + "Cntrl: ECON1 ECON2 ESTAT EIR EIE\n" + " 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n" + "MAC : MACON1 MACON3 MACON4\n" + " 0x%02x 0x%02x 0x%02x\n" + "Rx : ERXST ERXND ERXWRPT ERXRDPT ERXFCON EPKTCNT MAMXFL\n" + " 0x%04x 0x%04x 0x%04x 0x%04x " + "0x%02x 0x%02x 0x%04x\n" + "Tx : ETXST ETXND MACLCON1 MACLCON2 MAPHSUP\n" + " 0x%04x 0x%04x 0x%02x 0x%02x 0x%02x\n", + msg, nolock_regb_read(priv, EREVID), + nolock_regb_read(priv, ECON1), nolock_regb_read(priv, ECON2), + nolock_regb_read(priv, ESTAT), nolock_regb_read(priv, EIR), + nolock_regb_read(priv, EIE), nolock_regb_read(priv, MACON1), + nolock_regb_read(priv, MACON3), nolock_regb_read(priv, MACON4), + nolock_regw_read(priv, ERXSTL), nolock_regw_read(priv, ERXNDL), + nolock_regw_read(priv, ERXWRPTL), + nolock_regw_read(priv, ERXRDPTL), + nolock_regb_read(priv, ERXFCON), + nolock_regb_read(priv, EPKTCNT), + nolock_regw_read(priv, MAMXFLL), nolock_regw_read(priv, ETXSTL), + nolock_regw_read(priv, ETXNDL), + nolock_regb_read(priv, MACLCON1), + nolock_regb_read(priv, MACLCON2), + nolock_regb_read(priv, MAPHSUP)); mutex_unlock(&priv->lock); } @@ -599,12 +587,13 @@ static u16 rx_packet_start(u16 ptr) static void nolock_rxfifo_init(struct enc28j60_net *priv, u16 start, u16 end) { + struct device *dev = &priv->spi->dev; u16 erxrdpt; if (start > 0x1FFF || end > 0x1FFF || start > end) { if (netif_msg_drv(priv)) - printk(KERN_ERR DRV_NAME ": %s(%d, %d) RXFIFO " - "bad parameters!\n", __func__, start, end); + dev_err(dev, "%s(%d, %d) RXFIFO bad parameters!\n", + __func__, start, end); return; } /* set receive buffer start + end */ @@ -617,10 +606,12 @@ static void nolock_rxfifo_init(struct enc28j60_net *priv, u16 start, u16 end) static void nolock_txfifo_init(struct enc28j60_net *priv, u16 start, u16 end) { + struct device *dev = &priv->spi->dev; + if (start > 0x1FFF || end > 0x1FFF || start > end) { if (netif_msg_drv(priv)) - printk(KERN_ERR DRV_NAME ": %s(%d, %d) TXFIFO " - "bad parameters!\n", __func__, start, end); + dev_err(dev, "%s(%d, %d) TXFIFO bad parameters!\n", + __func__, start, end); return; } /* set transmit buffer start + end */ @@ -630,14 +621,15 @@ static void nolock_txfifo_init(struct enc28j60_net *priv, u16 start, u16 end) /* * Low power mode shrinks power consumption about 100x, so we'd like - * the chip to be in that mode whenever it's inactive. (However, we - * can't stay in lowpower mode during suspend with WOL active.) + * the chip to be in that mode whenever it's inactive. (However, we + * can't stay in low power mode during suspend with WOL active.) */ static void enc28j60_lowpower(struct enc28j60_net *priv, bool is_low) { + struct device *dev = &priv->spi->dev; + if (netif_msg_drv(priv)) - dev_dbg(&priv->spi->dev, "%s power...\n", - is_low ? "low" : "high"); + dev_dbg(dev, "%s power...\n", is_low ? "low" : "high"); mutex_lock(&priv->lock); if (is_low) { @@ -656,11 +648,12 @@ static void enc28j60_lowpower(struct enc28j60_net *priv, bool is_low) static int enc28j60_hw_init(struct enc28j60_net *priv) { + struct device *dev = &priv->spi->dev; u8 reg; if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() - %s\n", __func__, - priv->full_duplex ? "FullDuplex" : "HalfDuplex"); + dev_printk(KERN_DEBUG, dev, "%s() - %s\n", __func__, + priv->full_duplex ? "FullDuplex" : "HalfDuplex"); mutex_lock(&priv->lock); /* first reset the chip */ @@ -682,15 +675,15 @@ static int enc28j60_hw_init(struct enc28j60_net *priv) /* * Check the RevID. * If it's 0x00 or 0xFF probably the enc28j60 is not mounted or - * damaged + * damaged. */ reg = locked_regb_read(priv, EREVID); if (netif_msg_drv(priv)) - printk(KERN_INFO DRV_NAME ": chip RevID: 0x%02x\n", reg); + dev_info(dev, "chip RevID: 0x%02x\n", reg); if (reg == 0x00 || reg == 0xff) { if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() Invalid RevId %d\n", - __func__, reg); + dev_printk(KERN_DEBUG, dev, "%s() Invalid RevId %d\n", + __func__, reg); return 0; } @@ -723,7 +716,7 @@ static int enc28j60_hw_init(struct enc28j60_net *priv) /* * MACLCON1 (default) * MACLCON2 (default) - * Set the maximum packet size which the controller will accept + * Set the maximum packet size which the controller will accept. */ locked_regw_write(priv, MAMXFLL, MAX_FRAMELEN); @@ -750,10 +743,12 @@ static int enc28j60_hw_init(struct enc28j60_net *priv) static void enc28j60_hw_enable(struct enc28j60_net *priv) { + struct device *dev = &priv->spi->dev; + /* enable interrupts */ if (netif_msg_hw(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() enabling interrupts.\n", - __func__); + dev_printk(KERN_DEBUG, dev, "%s() enabling interrupts.\n", + __func__); enc28j60_phy_write(priv, PHIE, PHIE_PGEIE | PHIE_PLNKIE); @@ -772,7 +767,7 @@ static void enc28j60_hw_enable(struct enc28j60_net *priv) static void enc28j60_hw_disable(struct enc28j60_net *priv) { mutex_lock(&priv->lock); - /* disable interrutps and packet reception */ + /* disable interrupts and packet reception */ nolock_regb_write(priv, EIE, 0x00); nolock_reg_bfclr(priv, ECON1, ECON1_RXEN); priv->hw_enable = false; @@ -793,14 +788,12 @@ enc28j60_setlink(struct net_device *ndev, u8 autoneg, u16 speed, u8 duplex) priv->full_duplex = (duplex == DUPLEX_FULL); else { if (netif_msg_link(priv)) - dev_warn(&ndev->dev, - "unsupported link setting\n"); + netdev_warn(ndev, "unsupported link setting\n"); ret = -EOPNOTSUPP; } } else { if (netif_msg_link(priv)) - dev_warn(&ndev->dev, "Warning: hw must be disabled " - "to set link mode\n"); + netdev_warn(ndev, "Warning: hw must be disabled to set link mode\n"); ret = -EBUSY; } return ret; @@ -811,21 +804,23 @@ enc28j60_setlink(struct net_device *ndev, u8 autoneg, u16 speed, u8 duplex) */ static void enc28j60_read_tsv(struct enc28j60_net *priv, u8 tsv[TSV_SIZE]) { + struct device *dev = &priv->spi->dev; int endptr; endptr = locked_regw_read(priv, ETXNDL); if (netif_msg_hw(priv)) - printk(KERN_DEBUG DRV_NAME ": reading TSV at addr:0x%04x\n", - endptr + 1); + dev_printk(KERN_DEBUG, dev, "reading TSV at addr:0x%04x\n", + endptr + 1); enc28j60_mem_read(priv, endptr + 1, TSV_SIZE, tsv); } static void enc28j60_dump_tsv(struct enc28j60_net *priv, const char *msg, - u8 tsv[TSV_SIZE]) + u8 tsv[TSV_SIZE]) { + struct device *dev = &priv->spi->dev; u16 tmp1, tmp2; - printk(KERN_DEBUG DRV_NAME ": %s - TSV:\n", msg); + dev_printk(KERN_DEBUG, dev, "%s - TSV:\n", msg); tmp1 = tsv[1]; tmp1 <<= 8; tmp1 |= tsv[0]; @@ -834,30 +829,32 @@ static void enc28j60_dump_tsv(struct enc28j60_net *priv, const char *msg, tmp2 <<= 8; tmp2 |= tsv[4]; - printk(KERN_DEBUG DRV_NAME ": ByteCount: %d, CollisionCount: %d," - " TotByteOnWire: %d\n", tmp1, tsv[2] & 0x0f, tmp2); - printk(KERN_DEBUG DRV_NAME ": TxDone: %d, CRCErr:%d, LenChkErr: %d," - " LenOutOfRange: %d\n", TSV_GETBIT(tsv, TSV_TXDONE), - TSV_GETBIT(tsv, TSV_TXCRCERROR), - TSV_GETBIT(tsv, TSV_TXLENCHKERROR), - TSV_GETBIT(tsv, TSV_TXLENOUTOFRANGE)); - printk(KERN_DEBUG DRV_NAME ": Multicast: %d, Broadcast: %d, " - "PacketDefer: %d, ExDefer: %d\n", - TSV_GETBIT(tsv, TSV_TXMULTICAST), - TSV_GETBIT(tsv, TSV_TXBROADCAST), - TSV_GETBIT(tsv, TSV_TXPACKETDEFER), - TSV_GETBIT(tsv, TSV_TXEXDEFER)); - printk(KERN_DEBUG DRV_NAME ": ExCollision: %d, LateCollision: %d, " - "Giant: %d, Underrun: %d\n", - TSV_GETBIT(tsv, TSV_TXEXCOLLISION), - TSV_GETBIT(tsv, TSV_TXLATECOLLISION), - TSV_GETBIT(tsv, TSV_TXGIANT), TSV_GETBIT(tsv, TSV_TXUNDERRUN)); - printk(KERN_DEBUG DRV_NAME ": ControlFrame: %d, PauseFrame: %d, " - "BackPressApp: %d, VLanTagFrame: %d\n", - TSV_GETBIT(tsv, TSV_TXCONTROLFRAME), - TSV_GETBIT(tsv, TSV_TXPAUSEFRAME), - TSV_GETBIT(tsv, TSV_BACKPRESSUREAPP), - TSV_GETBIT(tsv, TSV_TXVLANTAGFRAME)); + dev_printk(KERN_DEBUG, dev, + "ByteCount: %d, CollisionCount: %d, TotByteOnWire: %d\n", + tmp1, tsv[2] & 0x0f, tmp2); + dev_printk(KERN_DEBUG, dev, + "TxDone: %d, CRCErr:%d, LenChkErr: %d, LenOutOfRange: %d\n", + TSV_GETBIT(tsv, TSV_TXDONE), + TSV_GETBIT(tsv, TSV_TXCRCERROR), + TSV_GETBIT(tsv, TSV_TXLENCHKERROR), + TSV_GETBIT(tsv, TSV_TXLENOUTOFRANGE)); + dev_printk(KERN_DEBUG, dev, + "Multicast: %d, Broadcast: %d, PacketDefer: %d, ExDefer: %d\n", + TSV_GETBIT(tsv, TSV_TXMULTICAST), + TSV_GETBIT(tsv, TSV_TXBROADCAST), + TSV_GETBIT(tsv, TSV_TXPACKETDEFER), + TSV_GETBIT(tsv, TSV_TXEXDEFER)); + dev_printk(KERN_DEBUG, dev, + "ExCollision: %d, LateCollision: %d, Giant: %d, Underrun: %d\n", + TSV_GETBIT(tsv, TSV_TXEXCOLLISION), + TSV_GETBIT(tsv, TSV_TXLATECOLLISION), + TSV_GETBIT(tsv, TSV_TXGIANT), TSV_GETBIT(tsv, TSV_TXUNDERRUN)); + dev_printk(KERN_DEBUG, dev, + "ControlFrame: %d, PauseFrame: %d, BackPressApp: %d, VLanTagFrame: %d\n", + TSV_GETBIT(tsv, TSV_TXCONTROLFRAME), + TSV_GETBIT(tsv, TSV_TXPAUSEFRAME), + TSV_GETBIT(tsv, TSV_BACKPRESSUREAPP), + TSV_GETBIT(tsv, TSV_TXVLANTAGFRAME)); } /* @@ -866,27 +863,29 @@ static void enc28j60_dump_tsv(struct enc28j60_net *priv, const char *msg, static void enc28j60_dump_rsv(struct enc28j60_net *priv, const char *msg, u16 pk_ptr, int len, u16 sts) { - printk(KERN_DEBUG DRV_NAME ": %s - NextPk: 0x%04x - RSV:\n", - msg, pk_ptr); - printk(KERN_DEBUG DRV_NAME ": ByteCount: %d, DribbleNibble: %d\n", len, - RSV_GETBIT(sts, RSV_DRIBBLENIBBLE)); - printk(KERN_DEBUG DRV_NAME ": RxOK: %d, CRCErr:%d, LenChkErr: %d," - " LenOutOfRange: %d\n", RSV_GETBIT(sts, RSV_RXOK), - RSV_GETBIT(sts, RSV_CRCERROR), - RSV_GETBIT(sts, RSV_LENCHECKERR), - RSV_GETBIT(sts, RSV_LENOUTOFRANGE)); - printk(KERN_DEBUG DRV_NAME ": Multicast: %d, Broadcast: %d, " - "LongDropEvent: %d, CarrierEvent: %d\n", - RSV_GETBIT(sts, RSV_RXMULTICAST), - RSV_GETBIT(sts, RSV_RXBROADCAST), - RSV_GETBIT(sts, RSV_RXLONGEVDROPEV), - RSV_GETBIT(sts, RSV_CARRIEREV)); - printk(KERN_DEBUG DRV_NAME ": ControlFrame: %d, PauseFrame: %d," - " UnknownOp: %d, VLanTagFrame: %d\n", - RSV_GETBIT(sts, RSV_RXCONTROLFRAME), - RSV_GETBIT(sts, RSV_RXPAUSEFRAME), - RSV_GETBIT(sts, RSV_RXUNKNOWNOPCODE), - RSV_GETBIT(sts, RSV_RXTYPEVLAN)); + struct device *dev = &priv->spi->dev; + + dev_printk(KERN_DEBUG, dev, "%s - NextPk: 0x%04x - RSV:\n", msg, pk_ptr); + dev_printk(KERN_DEBUG, dev, "ByteCount: %d, DribbleNibble: %d\n", + len, RSV_GETBIT(sts, RSV_DRIBBLENIBBLE)); + dev_printk(KERN_DEBUG, dev, + "RxOK: %d, CRCErr:%d, LenChkErr: %d, LenOutOfRange: %d\n", + RSV_GETBIT(sts, RSV_RXOK), + RSV_GETBIT(sts, RSV_CRCERROR), + RSV_GETBIT(sts, RSV_LENCHECKERR), + RSV_GETBIT(sts, RSV_LENOUTOFRANGE)); + dev_printk(KERN_DEBUG, dev, + "Multicast: %d, Broadcast: %d, LongDropEvent: %d, CarrierEvent: %d\n", + RSV_GETBIT(sts, RSV_RXMULTICAST), + RSV_GETBIT(sts, RSV_RXBROADCAST), + RSV_GETBIT(sts, RSV_RXLONGEVDROPEV), + RSV_GETBIT(sts, RSV_CARRIEREV)); + dev_printk(KERN_DEBUG, dev, + "ControlFrame: %d, PauseFrame: %d, UnknownOp: %d, VLanTagFrame: %d\n", + RSV_GETBIT(sts, RSV_RXCONTROLFRAME), + RSV_GETBIT(sts, RSV_RXPAUSEFRAME), + RSV_GETBIT(sts, RSV_RXUNKNOWNOPCODE), + RSV_GETBIT(sts, RSV_RXTYPEVLAN)); } static void dump_packet(const char *msg, int len, const char *data) @@ -904,20 +903,20 @@ static void dump_packet(const char *msg, int len, const char *data) static void enc28j60_hw_rx(struct net_device *ndev) { struct enc28j60_net *priv = netdev_priv(ndev); + struct device *dev = &priv->spi->dev; struct sk_buff *skb = NULL; u16 erxrdpt, next_packet, rxstat; u8 rsv[RSV_SIZE]; int len; if (netif_msg_rx_status(priv)) - printk(KERN_DEBUG DRV_NAME ": RX pk_addr:0x%04x\n", - priv->next_pk_ptr); + netdev_printk(KERN_DEBUG, ndev, "RX pk_addr:0x%04x\n", + priv->next_pk_ptr); if (unlikely(priv->next_pk_ptr > RXEND_INIT)) { if (netif_msg_rx_err(priv)) - dev_err(&ndev->dev, - "%s() Invalid packet address!! 0x%04x\n", - __func__, priv->next_pk_ptr); + netdev_err(ndev, "%s() Invalid packet address!! 0x%04x\n", + __func__, priv->next_pk_ptr); /* packet address corrupted: reset RX logic */ mutex_lock(&priv->lock); nolock_reg_bfclr(priv, ECON1, ECON1_RXEN); @@ -950,7 +949,7 @@ static void enc28j60_hw_rx(struct net_device *ndev) if (!RSV_GETBIT(rxstat, RSV_RXOK) || len > MAX_FRAMELEN) { if (netif_msg_rx_err(priv)) - dev_err(&ndev->dev, "Rx Error (%04x)\n", rxstat); + netdev_err(ndev, "Rx Error (%04x)\n", rxstat); ndev->stats.rx_errors++; if (RSV_GETBIT(rxstat, RSV_CRCERROR)) ndev->stats.rx_crc_errors++; @@ -962,8 +961,7 @@ static void enc28j60_hw_rx(struct net_device *ndev) skb = netdev_alloc_skb(ndev, len + NET_IP_ALIGN); if (!skb) { if (netif_msg_rx_err(priv)) - dev_err(&ndev->dev, - "out of memory for Rx'd frame\n"); + netdev_err(ndev, "out of memory for Rx'd frame\n"); ndev->stats.rx_dropped++; } else { skb_reserve(skb, NET_IP_ALIGN); @@ -983,12 +981,12 @@ static void enc28j60_hw_rx(struct net_device *ndev) /* * Move the RX read pointer to the start of the next * received packet. - * This frees the memory we just read out + * This frees the memory we just read out. */ erxrdpt = erxrdpt_workaround(next_packet, RXSTART_INIT, RXEND_INIT); if (netif_msg_hw(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() ERXRDPT:0x%04x\n", - __func__, erxrdpt); + dev_printk(KERN_DEBUG, dev, "%s() ERXRDPT:0x%04x\n", + __func__, erxrdpt); mutex_lock(&priv->lock); nolock_regw_write(priv, ERXRDPTL, erxrdpt); @@ -997,9 +995,9 @@ static void enc28j60_hw_rx(struct net_device *ndev) u16 reg; reg = nolock_regw_read(priv, ERXRDPTL); if (reg != erxrdpt) - printk(KERN_DEBUG DRV_NAME ": %s() ERXRDPT verify " - "error (0x%04x - 0x%04x)\n", __func__, - reg, erxrdpt); + dev_printk(KERN_DEBUG, dev, + "%s() ERXRDPT verify error (0x%04x - 0x%04x)\n", + __func__, reg, erxrdpt); } #endif priv->next_pk_ptr = next_packet; @@ -1013,6 +1011,7 @@ static void enc28j60_hw_rx(struct net_device *ndev) */ static int enc28j60_get_free_rxfifo(struct enc28j60_net *priv) { + struct net_device *ndev = priv->netdev; int epkcnt, erxst, erxnd, erxwr, erxrd; int free_space; @@ -1035,8 +1034,8 @@ static int enc28j60_get_free_rxfifo(struct enc28j60_net *priv) } mutex_unlock(&priv->lock); if (netif_msg_rx_status(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() free_space = %d\n", - __func__, free_space); + netdev_printk(KERN_DEBUG, ndev, "%s() free_space = %d\n", + __func__, free_space); return free_space; } @@ -1046,24 +1045,25 @@ static int enc28j60_get_free_rxfifo(struct enc28j60_net *priv) static void enc28j60_check_link_status(struct net_device *ndev) { struct enc28j60_net *priv = netdev_priv(ndev); + struct device *dev = &priv->spi->dev; u16 reg; int duplex; reg = enc28j60_phy_read(priv, PHSTAT2); if (netif_msg_hw(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() PHSTAT1: %04x, " - "PHSTAT2: %04x\n", __func__, - enc28j60_phy_read(priv, PHSTAT1), reg); + dev_printk(KERN_DEBUG, dev, + "%s() PHSTAT1: %04x, PHSTAT2: %04x\n", __func__, + enc28j60_phy_read(priv, PHSTAT1), reg); duplex = reg & PHSTAT2_DPXSTAT; if (reg & PHSTAT2_LSTAT) { netif_carrier_on(ndev); if (netif_msg_ifup(priv)) - dev_info(&ndev->dev, "link up - %s\n", - duplex ? "Full duplex" : "Half duplex"); + netdev_info(ndev, "link up - %s\n", + duplex ? "Full duplex" : "Half duplex"); } else { if (netif_msg_ifdown(priv)) - dev_info(&ndev->dev, "link down\n"); + netdev_info(ndev, "link down\n"); netif_carrier_off(ndev); } } @@ -1089,8 +1089,8 @@ static void enc28j60_tx_clear(struct net_device *ndev, bool err) /* * RX handler - * ignore PKTIF because is unreliable! (look at the errata datasheet) - * check EPKTCNT is the suggested workaround. + * Ignore PKTIF because is unreliable! (Look at the errata datasheet) + * Check EPKTCNT is the suggested workaround. * We don't need to clear interrupt flag, automatically done when * enc28j60_hw_rx() decrements the packet counter. * Returns how many packet processed. @@ -1102,13 +1102,14 @@ static int enc28j60_rx_interrupt(struct net_device *ndev) pk_counter = locked_regb_read(priv, EPKTCNT); if (pk_counter && netif_msg_intr(priv)) - printk(KERN_DEBUG DRV_NAME ": intRX, pk_cnt: %d\n", pk_counter); + netdev_printk(KERN_DEBUG, ndev, "intRX, pk_cnt: %d\n", + pk_counter); if (pk_counter > priv->max_pk_counter) { /* update statistics */ priv->max_pk_counter = pk_counter; if (netif_msg_rx_status(priv) && priv->max_pk_counter > 1) - printk(KERN_DEBUG DRV_NAME ": RX max_pk_cnt: %d\n", - priv->max_pk_counter); + netdev_printk(KERN_DEBUG, ndev, "RX max_pk_cnt: %d\n", + priv->max_pk_counter); } ret = pk_counter; while (pk_counter-- > 0) @@ -1124,8 +1125,6 @@ static void enc28j60_irq_work_handler(struct work_struct *work) struct net_device *ndev = priv->netdev; int intflags, loop; - if (netif_msg_intr(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__); /* disable further interrupts */ locked_reg_bfclr(priv, EIE, EIE_INTIE); @@ -1136,16 +1135,16 @@ static void enc28j60_irq_work_handler(struct work_struct *work) if ((intflags & EIR_DMAIF) != 0) { loop++; if (netif_msg_intr(priv)) - printk(KERN_DEBUG DRV_NAME - ": intDMA(%d)\n", loop); + netdev_printk(KERN_DEBUG, ndev, "intDMA(%d)\n", + loop); locked_reg_bfclr(priv, EIR, EIR_DMAIF); } /* LINK changed handler */ if ((intflags & EIR_LINKIF) != 0) { loop++; if (netif_msg_intr(priv)) - printk(KERN_DEBUG DRV_NAME - ": intLINK(%d)\n", loop); + netdev_printk(KERN_DEBUG, ndev, "intLINK(%d)\n", + loop); enc28j60_check_link_status(ndev); /* read PHIR to clear the flag */ enc28j60_phy_read(priv, PHIR); @@ -1156,13 +1155,12 @@ static void enc28j60_irq_work_handler(struct work_struct *work) bool err = false; loop++; if (netif_msg_intr(priv)) - printk(KERN_DEBUG DRV_NAME - ": intTX(%d)\n", loop); + netdev_printk(KERN_DEBUG, ndev, "intTX(%d)\n", + loop); priv->tx_retry_count = 0; if (locked_regb_read(priv, ESTAT) & ESTAT_TXABRT) { if (netif_msg_tx_err(priv)) - dev_err(&ndev->dev, - "Tx Error (aborted)\n"); + netdev_err(ndev, "Tx Error (aborted)\n"); err = true; } if (netif_msg_tx_done(priv)) { @@ -1179,8 +1177,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work) loop++; if (netif_msg_intr(priv)) - printk(KERN_DEBUG DRV_NAME - ": intTXErr(%d)\n", loop); + netdev_printk(KERN_DEBUG, ndev, "intTXErr(%d)\n", + loop); locked_reg_bfclr(priv, ECON1, ECON1_TXRTS); enc28j60_read_tsv(priv, tsv); if (netif_msg_tx_err(priv)) @@ -1194,9 +1192,9 @@ static void enc28j60_irq_work_handler(struct work_struct *work) /* Transmit Late collision check for retransmit */ if (TSV_GETBIT(tsv, TSV_TXLATECOLLISION)) { if (netif_msg_tx_err(priv)) - printk(KERN_DEBUG DRV_NAME - ": LateCollision TXErr (%d)\n", - priv->tx_retry_count); + netdev_printk(KERN_DEBUG, ndev, + "LateCollision TXErr (%d)\n", + priv->tx_retry_count); if (priv->tx_retry_count++ < MAX_TX_RETRYCOUNT) locked_reg_bfset(priv, ECON1, ECON1_TXRTS); @@ -1210,13 +1208,12 @@ static void enc28j60_irq_work_handler(struct work_struct *work) if ((intflags & EIR_RXERIF) != 0) { loop++; if (netif_msg_intr(priv)) - printk(KERN_DEBUG DRV_NAME - ": intRXErr(%d)\n", loop); + netdev_printk(KERN_DEBUG, ndev, "intRXErr(%d)\n", + loop); /* Check free FIFO space to flag RX overrun */ if (enc28j60_get_free_rxfifo(priv) <= 0) { if (netif_msg_rx_err(priv)) - printk(KERN_DEBUG DRV_NAME - ": RX Overrun\n"); + netdev_printk(KERN_DEBUG, ndev, "RX Overrun\n"); ndev->stats.rx_dropped++; } locked_reg_bfclr(priv, EIR, EIR_RXERIF); @@ -1228,8 +1225,6 @@ static void enc28j60_irq_work_handler(struct work_struct *work) /* re-enable interrupts */ locked_reg_bfset(priv, EIE, EIE_INTIE); - if (netif_msg_intr(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() exit\n", __func__); } /* @@ -1239,11 +1234,13 @@ static void enc28j60_irq_work_handler(struct work_struct *work) */ static void enc28j60_hw_tx(struct enc28j60_net *priv) { + struct net_device *ndev = priv->netdev; + BUG_ON(!priv->tx_skb); if (netif_msg_tx_queued(priv)) - printk(KERN_DEBUG DRV_NAME - ": Tx Packet Len:%d\n", priv->tx_skb->len); + netdev_printk(KERN_DEBUG, ndev, "Tx Packet Len:%d\n", + priv->tx_skb->len); if (netif_msg_pktdata(priv)) dump_packet(__func__, @@ -1253,6 +1250,7 @@ static void enc28j60_hw_tx(struct enc28j60_net *priv) #ifdef CONFIG_ENC28J60_WRITEVERIFY /* readback and verify written data */ if (netif_msg_drv(priv)) { + struct device *dev = &priv->spi->dev; int test_len, k; u8 test_buf[64]; /* limit the test to the first 64 bytes */ int okflag; @@ -1266,16 +1264,14 @@ static void enc28j60_hw_tx(struct enc28j60_net *priv) okflag = 1; for (k = 0; k < test_len; k++) { if (priv->tx_skb->data[k] != test_buf[k]) { - printk(KERN_DEBUG DRV_NAME - ": Error, %d location differ: " - "0x%02x-0x%02x\n", k, - priv->tx_skb->data[k], test_buf[k]); + dev_printk(KERN_DEBUG, dev, + "Error, %d location differ: 0x%02x-0x%02x\n", + k, priv->tx_skb->data[k], test_buf[k]); okflag = 0; } } if (!okflag) - printk(KERN_DEBUG DRV_NAME ": Tx write buffer, " - "verify ERROR!\n"); + dev_printk(KERN_DEBUG, dev, "Tx write buffer, verify ERROR!\n"); } #endif /* set TX request flag */ @@ -1287,14 +1283,11 @@ static netdev_tx_t enc28j60_send_packet(struct sk_buff *skb, { struct enc28j60_net *priv = netdev_priv(dev); - if (netif_msg_tx_queued(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__); - /* If some error occurs while trying to transmit this * packet, you should return '1' from this function. * In such a case you _may not_ do anything to the * SKB, it is still owned by the network queueing - * layer when an error is returned. This means you + * layer when an error is returned. This means you * may not modify any SKB fields, you may not free * the SKB, etc. */ @@ -1337,7 +1330,7 @@ static void enc28j60_tx_timeout(struct net_device *ndev) struct enc28j60_net *priv = netdev_priv(ndev); if (netif_msg_timer(priv)) - dev_err(&ndev->dev, DRV_NAME " tx timeout\n"); + netdev_err(ndev, "tx timeout\n"); ndev->stats.tx_errors++; /* can't restart safely under softirq */ @@ -1356,13 +1349,9 @@ static int enc28j60_net_open(struct net_device *dev) { struct enc28j60_net *priv = netdev_priv(dev); - if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__); - if (!is_valid_ether_addr(dev->dev_addr)) { if (netif_msg_ifup(priv)) - dev_err(&dev->dev, "invalid MAC address %pM\n", - dev->dev_addr); + netdev_err(dev, "invalid MAC address %pM\n", dev->dev_addr); return -EADDRNOTAVAIL; } /* Reset the hardware here (and take it out of low power mode) */ @@ -1370,7 +1359,7 @@ static int enc28j60_net_open(struct net_device *dev) enc28j60_hw_disable(priv); if (!enc28j60_hw_init(priv)) { if (netif_msg_ifup(priv)) - dev_err(&dev->dev, "hw_reset() failed\n"); + netdev_err(dev, "hw_reset() failed\n"); return -EINVAL; } /* Update the MAC address (in case user has changed it) */ @@ -1392,9 +1381,6 @@ static int enc28j60_net_close(struct net_device *dev) { struct enc28j60_net *priv = netdev_priv(dev); - if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": %s() enter\n", __func__); - enc28j60_hw_disable(priv); enc28j60_lowpower(priv, true); netif_stop_queue(dev); @@ -1415,16 +1401,16 @@ static void enc28j60_set_multicast_list(struct net_device *dev) if (dev->flags & IFF_PROMISC) { if (netif_msg_link(priv)) - dev_info(&dev->dev, "promiscuous mode\n"); + netdev_info(dev, "promiscuous mode\n"); priv->rxfilter = RXFILTER_PROMISC; } else if ((dev->flags & IFF_ALLMULTI) || !netdev_mc_empty(dev)) { if (netif_msg_link(priv)) - dev_info(&dev->dev, "%smulticast mode\n", - (dev->flags & IFF_ALLMULTI) ? "all-" : ""); + netdev_info(dev, "%smulticast mode\n", + (dev->flags & IFF_ALLMULTI) ? "all-" : ""); priv->rxfilter = RXFILTER_MULTI; } else { if (netif_msg_link(priv)) - dev_info(&dev->dev, "normal mode\n"); + netdev_info(dev, "normal mode\n"); priv->rxfilter = RXFILTER_NORMAL; } @@ -1436,20 +1422,21 @@ static void enc28j60_setrx_work_handler(struct work_struct *work) { struct enc28j60_net *priv = container_of(work, struct enc28j60_net, setrx_work); + struct device *dev = &priv->spi->dev; if (priv->rxfilter == RXFILTER_PROMISC) { if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": promiscuous mode\n"); + dev_printk(KERN_DEBUG, dev, "promiscuous mode\n"); locked_regb_write(priv, ERXFCON, 0x00); } else if (priv->rxfilter == RXFILTER_MULTI) { if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": multicast mode\n"); + dev_printk(KERN_DEBUG, dev, "multicast mode\n"); locked_regb_write(priv, ERXFCON, ERXFCON_UCEN | ERXFCON_CRCEN | ERXFCON_BCEN | ERXFCON_MCEN); } else { if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": normal mode\n"); + dev_printk(KERN_DEBUG, dev, "normal mode\n"); locked_regb_write(priv, ERXFCON, ERXFCON_UCEN | ERXFCON_CRCEN | ERXFCON_BCEN); @@ -1468,7 +1455,7 @@ static void enc28j60_restart_work_handler(struct work_struct *work) enc28j60_net_close(ndev); ret = enc28j60_net_open(ndev); if (unlikely(ret)) { - dev_info(&ndev->dev, " could not restart %d\n", ret); + netdev_info(ndev, "could not restart %d\n", ret); dev_close(ndev); } } @@ -1552,14 +1539,13 @@ static const struct net_device_ops enc28j60_netdev_ops = { static int enc28j60_probe(struct spi_device *spi) { + unsigned char macaddr[ETH_ALEN]; struct net_device *dev; struct enc28j60_net *priv; - const void *macaddr; int ret = 0; if (netif_msg_drv(&debug)) - dev_info(&spi->dev, DRV_NAME " Ethernet driver %s loaded\n", - DRV_VERSION); + dev_info(&spi->dev, "Ethernet driver %s loaded\n", DRV_VERSION); dev = alloc_etherdev(sizeof(struct enc28j60_net)); if (!dev) { @@ -1570,8 +1556,7 @@ static int enc28j60_probe(struct spi_device *spi) priv->netdev = dev; /* priv to netdev reference */ priv->spi = spi; /* priv to spi reference */ - priv->msg_enable = netif_msg_init(debug.msg_enable, - ENC28J60_MSG_DEFAULT); + priv->msg_enable = netif_msg_init(debug.msg_enable, ENC28J60_MSG_DEFAULT); mutex_init(&priv->lock); INIT_WORK(&priv->tx_work, enc28j60_tx_work_handler); INIT_WORK(&priv->setrx_work, enc28j60_setrx_work_handler); @@ -1582,13 +1567,12 @@ static int enc28j60_probe(struct spi_device *spi) if (!enc28j60_chipset_init(dev)) { if (netif_msg_probe(priv)) - dev_info(&spi->dev, DRV_NAME " chip not found\n"); + dev_info(&spi->dev, "chip not found\n"); ret = -EIO; goto error_irq; } - macaddr = of_get_mac_address(spi->dev.of_node); - if (macaddr) + if (device_get_mac_address(&spi->dev, macaddr, sizeof(macaddr))) ether_addr_copy(dev->dev_addr, macaddr); else eth_hw_addr_random(dev); @@ -1600,8 +1584,8 @@ static int enc28j60_probe(struct spi_device *spi) ret = request_irq(spi->irq, enc28j60_irq, 0, DRV_NAME, priv); if (ret < 0) { if (netif_msg_probe(priv)) - dev_err(&spi->dev, DRV_NAME ": request irq %d failed " - "(ret = %d)\n", spi->irq, ret); + dev_err(&spi->dev, "request irq %d failed (ret = %d)\n", + spi->irq, ret); goto error_irq; } @@ -1616,11 +1600,10 @@ static int enc28j60_probe(struct spi_device *spi) ret = register_netdev(dev); if (ret) { if (netif_msg_probe(priv)) - dev_err(&spi->dev, "register netdev " DRV_NAME - " failed (ret = %d)\n", ret); + dev_err(&spi->dev, "register netdev failed (ret = %d)\n", + ret); goto error_register; } - dev_info(&dev->dev, DRV_NAME " driver registered\n"); return 0; @@ -1636,9 +1619,6 @@ static int enc28j60_remove(struct spi_device *spi) { struct enc28j60_net *priv = spi_get_drvdata(spi); - if (netif_msg_drv(priv)) - printk(KERN_DEBUG DRV_NAME ": remove\n"); - unregister_netdev(priv->netdev); free_irq(spi->irq, priv); free_netdev(priv->netdev); @@ -1660,22 +1640,7 @@ static struct spi_driver enc28j60_driver = { .probe = enc28j60_probe, .remove = enc28j60_remove, }; - -static int __init enc28j60_init(void) -{ - msec20_to_jiffies = msecs_to_jiffies(20); - - return spi_register_driver(&enc28j60_driver); -} - -module_init(enc28j60_init); - -static void __exit enc28j60_exit(void) -{ - spi_unregister_driver(&enc28j60_driver); -} - -module_exit(enc28j60_exit); +module_spi_driver(enc28j60_driver); MODULE_DESCRIPTION(DRV_NAME " ethernet driver"); MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>"); diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index 549898d5d450..f0d0e09f60e2 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -19,6 +19,7 @@ config NFP tristate "Netronome(R) NFP4000/NFP6000 NIC driver" depends on PCI && PCI_MSI depends on VXLAN || VXLAN=n + select NET_DEVLINK ---help--- This driver supports the Netronome(R) NFP4000/NFP6000 based cards working as a advanced Ethernet NIC. It works with both diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index eeda4ed98333..ce54b6c2a9ad 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -161,9 +161,9 @@ nfp_fl_get_tun_from_act_l4_port(struct nfp_app *app, struct nfp_flower_priv *priv = app->priv; switch (tun->key.tp_dst) { - case htons(NFP_FL_VXLAN_PORT): + case htons(IANA_VXLAN_UDP_PORT): return NFP_FL_TUNNEL_VXLAN; - case htons(NFP_FL_GENEVE_PORT): + case htons(GENEVE_UDP_PORT): if (priv->flower_ext_feats & NFP_FL_FEATS_GENEVE) return NFP_FL_TUNNEL_GENEVE; /* FALLTHROUGH */ diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index c0945a5fd1a4..f6ca8dc9cc92 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -34,9 +34,6 @@ struct nfp_app; #define NFP_FL_MASK_REUSE_TIME_NS 40000 #define NFP_FL_MASK_ID_LOCATION 1 -#define NFP_FL_VXLAN_PORT 4789 -#define NFP_FL_GENEVE_PORT 6081 - /* Extra features bitmap. */ #define NFP_FL_FEATS_GENEVE BIT(0) #define NFP_FL_NBI_MTU_SETTING BIT(1) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 450d7296fd57..9f16920da81d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -195,7 +195,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, flow_rule_match_enc_opts(rule, &enc_op); switch (enc_ports.key->dst) { - case htons(NFP_FL_VXLAN_PORT): + case htons(IANA_VXLAN_UDP_PORT): *tun_type = NFP_FL_TUNNEL_VXLAN; key_layer |= NFP_FLOWER_LAYER_VXLAN; key_size += sizeof(struct nfp_flower_ipv4_udp_tun); @@ -203,7 +203,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (enc_op.key) return -EOPNOTSUPP; break; - case htons(NFP_FL_GENEVE_PORT): + case htons(GENEVE_UDP_PORT): if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) return -EOPNOTSUPP; *tun_type = NFP_FL_TUNNEL_GENEVE; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index e9eca99cf493..cb59a18ec6a6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -362,7 +362,6 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) if (ret) return ret; - devlink_port_type_eth_set(&port->dl_port, port->netdev); devlink_port_attrs_set(&port->dl_port, DEVLINK_PORT_FLAVOUR_PHYSICAL, eth_port.label_port, eth_port.is_split, eth_port.label_subport); @@ -377,6 +376,16 @@ void nfp_devlink_port_unregister(struct nfp_port *port) devlink_port_unregister(&port->dl_port); } +void nfp_devlink_port_type_eth_set(struct nfp_port *port) +{ + devlink_port_type_eth_set(&port->dl_port, port->netdev); +} + +void nfp_devlink_port_type_clear(struct nfp_port *port) +{ + devlink_port_type_clear(&port->dl_port); +} + struct devlink *nfp_devlink_get_devlink(struct net_device *netdev) { struct nfp_app *app; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 6d1b8816552e..ad2f133bd545 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3548,7 +3548,7 @@ void nfp_net_info(struct nfp_net *nn) nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -3564,7 +3564,6 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_RSS ? "RSS1 " : "", nn->cap & NFP_NET_CFG_CTRL_RSS2 ? "RSS2 " : "", nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "", - nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "", nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 372adea10e14..f5d564bbb55a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -104,8 +104,6 @@ #define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ #define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ #define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ -#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ -#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ #define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */ @@ -130,7 +128,6 @@ #define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ #define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ #define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ -#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ #define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ #define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ #define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 08f5fdbd8e41..f35278062476 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -160,6 +160,7 @@ nfp_net_pf_init_vnic(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id) err = nfp_devlink_port_register(pf->app, nn->port); if (err) goto err_dfs_clean; + nfp_devlink_port_type_eth_set(nn->port); } nfp_net_info(nn); @@ -173,8 +174,10 @@ nfp_net_pf_init_vnic(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id) return 0; err_devlink_port_clean: - if (nn->port) + if (nn->port) { + nfp_devlink_port_type_clear(nn->port); nfp_devlink_port_unregister(nn->port); + } err_dfs_clean: nfp_net_debugfs_dir_clean(&nn->debugfs_dir); nfp_net_clean(nn); @@ -220,8 +223,10 @@ static void nfp_net_pf_clean_vnic(struct nfp_pf *pf, struct nfp_net *nn) { if (nfp_net_is_data_vnic(nn)) nfp_app_vnic_clean(pf->app, nn); - if (nn->port) + if (nn->port) { + nfp_devlink_port_type_clear(nn->port); nfp_devlink_port_unregister(nn->port); + } nfp_net_debugfs_dir_clean(&nn->debugfs_dir); nfp_net_clean(nn); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index 90ae053f5c07..d7fd203bb180 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -131,6 +131,8 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf); int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port); void nfp_devlink_port_unregister(struct nfp_port *port); +void nfp_devlink_port_type_eth_set(struct nfp_port *port); +void nfp_devlink_port_type_clear(struct nfp_port *port); /** * Mac stats (0x0000 - 0x0200) diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index a5bf46310f60..55d686ed8cdf 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1839,7 +1839,7 @@ static void __exit pasemi_mac_cleanup_module(void) pci_unregister_driver(&pasemi_mac_driver); } -int pasemi_mac_init_module(void) +static int pasemi_mac_init_module(void) { int err; diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 43a57ec296fd..512186adab00 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -492,6 +492,9 @@ enum qed_mf_mode_bit { /* Allow DSCP to TC mapping */ QED_MF_DSCP_TO_TC_MAP, + + /* Do not insert a vlan tag with id 0 */ + QED_MF_DONT_ADD_VLAN0_TAG, }; enum qed_ufp_mode { diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 69966dfc6e3d..5c6a276f69ac 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -204,9 +204,7 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data, else p_data->arr[type].update = DONT_UPDATE_DCB_DSCP; - /* Do not add vlan tag 0 when DCB is enabled and port in UFP/OV mode */ - if ((test_bit(QED_MF_8021Q_TAGGING, &p_hwfn->cdev->mf_bits) || - test_bit(QED_MF_8021AD_TAGGING, &p_hwfn->cdev->mf_bits))) + if (test_bit(QED_MF_DONT_ADD_VLAN0_TAG, &p_hwfn->cdev->mf_bits)) p_data->arr[type].dont_add_vlan0 = true; /* QM reconf data */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 9df8c4b3b54e..195573793352 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3157,12 +3157,14 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) cdev->mf_bits = BIT(QED_MF_OVLAN_CLSS) | BIT(QED_MF_LLH_PROTO_CLSS) | BIT(QED_MF_UFP_SPECIFIC) | - BIT(QED_MF_8021Q_TAGGING); + BIT(QED_MF_8021Q_TAGGING) | + BIT(QED_MF_DONT_ADD_VLAN0_TAG); break; case NVM_CFG1_GLOB_MF_MODE_BD: cdev->mf_bits = BIT(QED_MF_OVLAN_CLSS) | BIT(QED_MF_LLH_PROTO_CLSS) | - BIT(QED_MF_8021AD_TAGGING); + BIT(QED_MF_8021AD_TAGGING) | + BIT(QED_MF_DONT_ADD_VLAN0_TAG); break; case NVM_CFG1_GLOB_MF_MODE_NPAR1_0: cdev->mf_bits = BIT(QED_MF_LLH_MAC_CLSS) | diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 63a78162cfaf..92fe226980fd 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -498,8 +498,7 @@ struct qede_reload_args { /* Datapath functions definition */ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev); u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); netdev_features_t qede_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index b4c8949933f1..f0a2ca23f63a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -652,9 +652,9 @@ static void qede_get_drvinfo(struct net_device *ndev, { char mfw[ETHTOOL_FWVERS_LEN], storm[ETHTOOL_FWVERS_LEN]; struct qede_dev *edev = netdev_priv(ndev); + char mbi[ETHTOOL_FWVERS_LEN]; strlcpy(info->driver, "qede", sizeof(info->driver)); - strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); snprintf(storm, ETHTOOL_FWVERS_LEN, "%d.%d.%d.%d", edev->dev_info.common.fw_major, @@ -668,13 +668,27 @@ static void qede_get_drvinfo(struct net_device *ndev, (edev->dev_info.common.mfw_rev >> 8) & 0xFF, edev->dev_info.common.mfw_rev & 0xFF); - if ((strlen(storm) + strlen(mfw) + strlen("mfw storm ")) < - sizeof(info->fw_version)) { + if ((strlen(storm) + strlen(DRV_MODULE_VERSION) + strlen("[storm] ")) < + sizeof(info->version)) + snprintf(info->version, sizeof(info->version), + "%s [storm %s]", DRV_MODULE_VERSION, storm); + else + snprintf(info->version, sizeof(info->version), + "%s %s", DRV_MODULE_VERSION, storm); + + if (edev->dev_info.common.mbi_version) { + snprintf(mbi, ETHTOOL_FWVERS_LEN, "%d.%d.%d", + (edev->dev_info.common.mbi_version & + QED_MBI_VERSION_2_MASK) >> QED_MBI_VERSION_2_OFFSET, + (edev->dev_info.common.mbi_version & + QED_MBI_VERSION_1_MASK) >> QED_MBI_VERSION_1_OFFSET, + (edev->dev_info.common.mbi_version & + QED_MBI_VERSION_0_MASK) >> QED_MBI_VERSION_0_OFFSET); snprintf(info->fw_version, sizeof(info->fw_version), - "mfw %s storm %s", mfw, storm); + "mbi %s [mfw %s]", mbi, mfw); } else { snprintf(info->fw_version, sizeof(info->fw_version), - "%s %s", mfw, storm); + "mfw %s", mfw); } strlcpy(info->bus_info, pci_name(edev->pdev), sizeof(info->bus_info)); diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 31b046e24565..c342b07e3a93 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1696,8 +1696,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) } u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct qede_dev *edev = netdev_priv(dev); int total_txq; @@ -1705,7 +1704,7 @@ u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, total_txq = QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc; return QEDE_TSS_COUNT(edev) ? - fallback(dev, skb, NULL) % total_txq : 0; + netdev_pick_tx(dev, skb, NULL) % total_txq : 0; } /* 8B udp header + 8B base tunnel header + 32B option length */ diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 7562ccbbb39a..a8ca26c2ae0c 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6267,7 +6267,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, */ smp_mb(); if (rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) - netif_wake_queue(dev); + netif_start_queue(dev); } return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 8154b38c08f7..4f648394e645 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1615,8 +1615,7 @@ drop: } static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { /* If skb needs TX timestamp, it is handled in network control queue */ return (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ? RAVB_NC : diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 644e42c181ee..01ea0d6f8819 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -101,8 +101,7 @@ static struct vnet_port *vsw_tx_port_find(struct sk_buff *skb, } static u16 vsw_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct vnet_port *port = netdev_priv(dev); diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 590172818b92..96b883f965f6 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -234,8 +234,7 @@ static struct vnet_port *vnet_tx_port_find(struct sk_buff *skb, } static u16 vnet_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct vnet *vp = netdev_priv(dev); struct vnet_port *port = __tx_port_find(vp, skb); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 5583d993480d..c05b1207358d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -22,8 +22,6 @@ #define GENEVE_NETDEV_VER "0.6" -#define GENEVE_UDP_PORT 6081 - #define GENEVE_N_VID (1u << 24) #define GENEVE_VID_MASK (GENEVE_N_VID - 1) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 7a145172d503..c06e31747288 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1271,20 +1271,17 @@ static const struct genl_ops gtp_genl_ops[] = { { .cmd = GTP_CMD_NEWPDP, .doit = gtp_genl_new_pdp, - .policy = gtp_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_DELPDP, .doit = gtp_genl_del_pdp, - .policy = gtp_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_GETPDP, .doit = gtp_genl_get_pdp, .dumpit = gtp_genl_dump_pdp, - .policy = gtp_genl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -1294,6 +1291,7 @@ static struct genl_family gtp_genl_family __ro_after_init = { .version = 0, .hdrsize = 0, .maxattr = GTPA_MAX, + .policy = gtp_genl_policy, .netnsok = true, .module = THIS_MODULE, .ops = gtp_genl_ops, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index cf4897043e83..1a08679f90ce 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -308,7 +308,7 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev, * If a valid queue has already been assigned, then use that. * Otherwise compute tx queue based on hash and the send table. * - * This is basically similar to default (__netdev_pick_tx) with the added step + * This is basically similar to default (netdev_pick_tx) with the added step * of using the host send_table when no other queue has been assigned. * * TODO support XPS - but get_xps_queue not exported @@ -331,8 +331,7 @@ static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb) } static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct net_device_context *ndc = netdev_priv(ndev); struct net_device *vf_netdev; @@ -344,10 +343,9 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, const struct net_device_ops *vf_ops = vf_netdev->netdev_ops; if (vf_ops->ndo_select_queue) - txq = vf_ops->ndo_select_queue(vf_netdev, skb, - sb_dev, fallback); + txq = vf_ops->ndo_select_queue(vf_netdev, skb, sb_dev); else - txq = fallback(vf_netdev, skb, NULL); + txq = netdev_pick_tx(vf_netdev, skb, NULL); /* Record the queue selected by VF so that it can be * used for common case where VF has more queues than diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 3b88846de31b..707285953750 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -598,37 +598,31 @@ static const struct nla_policy hwsim_genl_policy[MAC802154_HWSIM_ATTR_MAX + 1] = static const struct genl_ops hwsim_nl_ops[] = { { .cmd = MAC802154_HWSIM_CMD_NEW_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_GET_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, { .cmd = MAC802154_HWSIM_CMD_NEW_EDGE, - .policy = hwsim_genl_policy, .doit = hwsim_new_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_EDGE, - .policy = hwsim_genl_policy, .doit = hwsim_del_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_SET_EDGE, - .policy = hwsim_genl_policy, .doit = hwsim_set_edge_lqi, .flags = GENL_UNS_ADMIN_PERM, }, @@ -638,6 +632,7 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .name = "MAC802154_HWSIM", .version = 1, .maxattr = MAC802154_HWSIM_ATTR_MAX, + .policy = hwsim_genl_policy, .module = THIS_MODULE, .ops = hwsim_nl_ops, .n_ops = ARRAY_SIZE(hwsim_nl_ops), diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 64a982563d59..947c40f112d1 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2637,60 +2637,50 @@ static const struct genl_ops macsec_genl_ops[] = { { .cmd = MACSEC_CMD_GET_TXSC, .dumpit = macsec_dump_txsc, - .policy = macsec_genl_policy, }, { .cmd = MACSEC_CMD_ADD_RXSC, .doit = macsec_add_rxsc, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSC, .doit = macsec_del_rxsc, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSC, .doit = macsec_upd_rxsc, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_TXSA, .doit = macsec_add_txsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_TXSA, .doit = macsec_del_txsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_TXSA, .doit = macsec_upd_txsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_RXSA, .doit = macsec_add_rxsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSA, .doit = macsec_del_rxsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSA, .doit = macsec_upd_rxsa, - .policy = macsec_genl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -2700,6 +2690,7 @@ static struct genl_family macsec_fam __ro_after_init = { .hdrsize = 0, .version = MACSEC_GENL_VERSION, .maxattr = MACSEC_ATTR_MAX, + .policy = macsec_genl_policy, .netnsok = true, .module = THIS_MODULE, .ops = macsec_genl_ops, diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 0c0f105657d3..4a6be8fab884 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -24,6 +24,7 @@ #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/net_tstamp.h> #include <linux/ethtool.h> #include <linux/if_arp.h> #include <linux/if_vlan.h> @@ -34,6 +35,7 @@ #include <net/rtnetlink.h> #include <net/xfrm.h> #include <linux/netpoll.h> +#include <linux/phy.h> #define MACVLAN_HASH_BITS 8 #define MACVLAN_HASH_SIZE (1<<MACVLAN_HASH_BITS) @@ -822,6 +824,30 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu) return 0; } +static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct net_device *real_dev = macvlan_dev_real_dev(dev); + const struct net_device_ops *ops = real_dev->netdev_ops; + struct ifreq ifrr; + int err = -EOPNOTSUPP; + + strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ); + ifrr.ifr_ifru = ifr->ifr_ifru; + + switch (cmd) { + case SIOCSHWTSTAMP: + case SIOCGHWTSTAMP: + if (netif_device_present(real_dev) && ops->ndo_do_ioctl) + err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); + break; + } + + if (!err) + ifr->ifr_ifru = ifrr.ifr_ifru; + + return err; +} + /* * macvlan network devices have devices nesting below it and are a special * "super class" of normal network devices; split their locks off into a @@ -1020,6 +1046,26 @@ static int macvlan_ethtool_get_link_ksettings(struct net_device *dev, return __ethtool_get_link_ksettings(vlan->lowerdev, cmd); } +static int macvlan_ethtool_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct net_device *real_dev = macvlan_dev_real_dev(dev); + const struct ethtool_ops *ops = real_dev->ethtool_ops; + struct phy_device *phydev = real_dev->phydev; + + if (phydev && phydev->drv && phydev->drv->ts_info) { + return phydev->drv->ts_info(phydev, info); + } else if (ops->get_ts_info) { + return ops->get_ts_info(real_dev, info); + } else { + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + } + + return 0; +} + static netdev_features_t macvlan_fix_features(struct net_device *dev, netdev_features_t features) { @@ -1094,6 +1140,7 @@ static const struct ethtool_ops macvlan_ethtool_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = macvlan_ethtool_get_link_ksettings, .get_drvinfo = macvlan_ethtool_get_drvinfo, + .get_ts_info = macvlan_ethtool_get_ts_info, }; static const struct net_device_ops macvlan_netdev_ops = { @@ -1103,6 +1150,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_stop = macvlan_stop, .ndo_start_xmit = macvlan_start_xmit, .ndo_change_mtu = macvlan_change_mtu, + .ndo_do_ioctl = macvlan_do_ioctl, .ndo_fix_features = macvlan_fix_features, .ndo_change_rx_flags = macvlan_change_rx_flags, .ndo_set_mac_address = macvlan_set_mac_address, diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index ed1166adaa2f..b16a1221d19b 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -115,8 +115,7 @@ static netdev_tx_t net_failover_start_xmit(struct sk_buff *skb, static u16 net_failover_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct net_failover_info *nfo_info = netdev_priv(dev); struct net_device *primary_dev; @@ -127,10 +126,9 @@ static u16 net_failover_select_queue(struct net_device *dev, const struct net_device_ops *ops = primary_dev->netdev_ops; if (ops->ndo_select_queue) - txq = ops->ndo_select_queue(primary_dev, skb, - sb_dev, fallback); + txq = ops->ndo_select_queue(primary_dev, skb, sb_dev); else - txq = fallback(primary_dev, skb, NULL); + txq = netdev_pick_tx(primary_dev, skb, NULL); qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 520657945b82..1c66e92c717c 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -273,13 +273,13 @@ config BCM87XX_PHY Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs. config BCM_CYGNUS_PHY - tristate "Broadcom Cygnus SoC internal PHY" - depends on ARCH_BCM_CYGNUS || COMPILE_TEST + tristate "Broadcom Cygnus/Omega SoC internal PHY" + depends on ARCH_BCM_IPROC || COMPILE_TEST depends on MDIO_BCM_IPROC select BCM_NET_PHYLIB ---help--- This PHY driver is for the 1G internal PHYs of the Broadcom - Cygnus Family SoC. + Cygnus and Omega Family SoC. Currently supports internal PHY's used in the BCM11300, BCM11320, BCM11350, BCM11360, BCM58300, BCM58302, diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 37218e5d7cc9..ae6a76d3f2fe 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/delay.h> +#include <linux/bitfield.h> #include <linux/phy.h> #include "aquantia.h" @@ -22,20 +23,33 @@ #define PHY_ID_AQCS109 0x03a1b5c2 #define PHY_ID_AQR405 0x03a1b4b0 +#define MDIO_PHYXS_VEND_IF_STATUS 0xe812 +#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK GENMASK(7, 3) +#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR 0 +#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI 2 +#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII 6 +#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII 10 + #define MDIO_AN_VEND_PROV 0xc400 #define MDIO_AN_VEND_PROV_1000BASET_FULL BIT(15) #define MDIO_AN_VEND_PROV_1000BASET_HALF BIT(14) +#define MDIO_AN_VEND_PROV_DOWNSHIFT_EN BIT(4) +#define MDIO_AN_VEND_PROV_DOWNSHIFT_MASK GENMASK(3, 0) +#define MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT 4 #define MDIO_AN_TX_VEND_STATUS1 0xc800 -#define MDIO_AN_TX_VEND_STATUS1_10BASET (0x0 << 1) -#define MDIO_AN_TX_VEND_STATUS1_100BASETX (0x1 << 1) -#define MDIO_AN_TX_VEND_STATUS1_1000BASET (0x2 << 1) -#define MDIO_AN_TX_VEND_STATUS1_10GBASET (0x3 << 1) -#define MDIO_AN_TX_VEND_STATUS1_2500BASET (0x4 << 1) -#define MDIO_AN_TX_VEND_STATUS1_5000BASET (0x5 << 1) -#define MDIO_AN_TX_VEND_STATUS1_RATE_MASK (0x7 << 1) +#define MDIO_AN_TX_VEND_STATUS1_RATE_MASK GENMASK(3, 1) +#define MDIO_AN_TX_VEND_STATUS1_10BASET 0 +#define MDIO_AN_TX_VEND_STATUS1_100BASETX 1 +#define MDIO_AN_TX_VEND_STATUS1_1000BASET 2 +#define MDIO_AN_TX_VEND_STATUS1_10GBASET 3 +#define MDIO_AN_TX_VEND_STATUS1_2500BASET 4 +#define MDIO_AN_TX_VEND_STATUS1_5000BASET 5 #define MDIO_AN_TX_VEND_STATUS1_FULL_DUPLEX BIT(0) +#define MDIO_AN_TX_VEND_INT_STATUS1 0xcc00 +#define MDIO_AN_TX_VEND_INT_STATUS1_DOWNSHIFT BIT(1) + #define MDIO_AN_TX_VEND_INT_STATUS2 0xcc01 #define MDIO_AN_TX_VEND_INT_MASK2 0xd401 @@ -44,8 +58,30 @@ #define MDIO_AN_RX_LP_STAT1 0xe820 #define MDIO_AN_RX_LP_STAT1_1000BASET_FULL BIT(15) #define MDIO_AN_RX_LP_STAT1_1000BASET_HALF BIT(14) +#define MDIO_AN_RX_LP_STAT1_SHORT_REACH BIT(13) +#define MDIO_AN_RX_LP_STAT1_AQRATE_DOWNSHIFT BIT(12) +#define MDIO_AN_RX_LP_STAT1_AQ_PHY BIT(2) + +#define MDIO_AN_RX_LP_STAT4 0xe823 +#define MDIO_AN_RX_LP_STAT4_FW_MAJOR GENMASK(15, 8) +#define MDIO_AN_RX_LP_STAT4_FW_MINOR GENMASK(7, 0) + +#define MDIO_AN_RX_VEND_STAT3 0xe832 +#define MDIO_AN_RX_VEND_STAT3_AFR BIT(0) /* Vendor specific 1, MDIO_MMD_VEND1 */ +#define VEND1_GLOBAL_FW_ID 0x0020 +#define VEND1_GLOBAL_FW_ID_MAJOR GENMASK(15, 8) +#define VEND1_GLOBAL_FW_ID_MINOR GENMASK(7, 0) + +#define VEND1_GLOBAL_RSVD_STAT1 0xc885 +#define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID GENMASK(7, 4) +#define VEND1_GLOBAL_RSVD_STAT1_PROV_ID GENMASK(3, 0) + +#define VEND1_GLOBAL_RSVD_STAT9 0xc88d +#define VEND1_GLOBAL_RSVD_STAT9_MODE GENMASK(7, 0) +#define VEND1_GLOBAL_RSVD_STAT9_1000BT2 0x23 + #define VEND1_GLOBAL_INT_STD_STATUS 0xfc00 #define VEND1_GLOBAL_INT_VEND_STATUS 0xfc01 @@ -112,41 +148,22 @@ static int aqr_config_aneg(struct phy_device *phydev) static int aqr_config_intr(struct phy_device *phydev) { + bool en = phydev->interrupts == PHY_INTERRUPT_ENABLED; int err; - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - err = phy_write_mmd(phydev, MDIO_MMD_AN, - MDIO_AN_TX_VEND_INT_MASK2, - MDIO_AN_TX_VEND_INT_MASK2_LINK); - if (err < 0) - return err; - - err = phy_write_mmd(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_INT_STD_MASK, - VEND1_GLOBAL_INT_STD_MASK_ALL); - if (err < 0) - return err; - - err = phy_write_mmd(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_INT_VEND_MASK, - VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 | - VEND1_GLOBAL_INT_VEND_MASK_AN); - } else { - err = phy_write_mmd(phydev, MDIO_MMD_AN, - MDIO_AN_TX_VEND_INT_MASK2, 0); - if (err < 0) - return err; - - err = phy_write_mmd(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_INT_STD_MASK, 0); - if (err < 0) - return err; - - err = phy_write_mmd(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_INT_VEND_MASK, 0); - } + err = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_INT_MASK2, + en ? MDIO_AN_TX_VEND_INT_MASK2_LINK : 0); + if (err < 0) + return err; + + err = phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_INT_STD_MASK, + en ? VEND1_GLOBAL_INT_STD_MASK_ALL : 0); + if (err < 0) + return err; - return err; + return phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_INT_VEND_MASK, + en ? VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 | + VEND1_GLOBAL_INT_VEND_MASK_AN : 0); } static int aqr_ack_interrupt(struct phy_device *phydev) @@ -178,13 +195,287 @@ static int aqr_read_status(struct phy_device *phydev) return genphy_c45_read_status(phydev); } +static int aqr107_read_downshift_event(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_INT_STATUS1); + if (val < 0) + return val; + + return !!(val & MDIO_AN_TX_VEND_INT_STATUS1_DOWNSHIFT); +} + +static int aqr107_read_rate(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_STATUS1); + if (val < 0) + return val; + + switch (FIELD_GET(MDIO_AN_TX_VEND_STATUS1_RATE_MASK, val)) { + case MDIO_AN_TX_VEND_STATUS1_10BASET: + phydev->speed = SPEED_10; + break; + case MDIO_AN_TX_VEND_STATUS1_100BASETX: + phydev->speed = SPEED_100; + break; + case MDIO_AN_TX_VEND_STATUS1_1000BASET: + phydev->speed = SPEED_1000; + break; + case MDIO_AN_TX_VEND_STATUS1_2500BASET: + phydev->speed = SPEED_2500; + break; + case MDIO_AN_TX_VEND_STATUS1_5000BASET: + phydev->speed = SPEED_5000; + break; + case MDIO_AN_TX_VEND_STATUS1_10GBASET: + phydev->speed = SPEED_10000; + break; + default: + phydev->speed = SPEED_UNKNOWN; + break; + } + + if (val & MDIO_AN_TX_VEND_STATUS1_FULL_DUPLEX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + + return 0; +} + +static int aqr107_read_status(struct phy_device *phydev) +{ + int val, ret; + + ret = aqr_read_status(phydev); + if (ret) + return ret; + + if (!phydev->link || phydev->autoneg == AUTONEG_DISABLE) + return 0; + + val = phy_read_mmd(phydev, MDIO_MMD_PHYXS, MDIO_PHYXS_VEND_IF_STATUS); + if (val < 0) + return val; + + switch (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val)) { + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR: + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI: + phydev->interface = PHY_INTERFACE_MODE_10GKR; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII: + phydev->interface = PHY_INTERFACE_MODE_SGMII; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII: + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + break; + default: + phydev->interface = PHY_INTERFACE_MODE_NA; + break; + } + + val = aqr107_read_downshift_event(phydev); + if (val <= 0) + return val; + + phydev_warn(phydev, "Downshift occurred! Cabling may be defective.\n"); + + /* Read downshifted rate from vendor register */ + return aqr107_read_rate(phydev); +} + +static int aqr107_get_downshift(struct phy_device *phydev, u8 *data) +{ + int val, cnt, enable; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV); + if (val < 0) + return val; + + enable = FIELD_GET(MDIO_AN_VEND_PROV_DOWNSHIFT_EN, val); + cnt = FIELD_GET(MDIO_AN_VEND_PROV_DOWNSHIFT_MASK, val); + + *data = enable && cnt ? cnt : DOWNSHIFT_DEV_DISABLE; + + return 0; +} + +static int aqr107_set_downshift(struct phy_device *phydev, u8 cnt) +{ + int val = 0; + + if (!FIELD_FIT(MDIO_AN_VEND_PROV_DOWNSHIFT_MASK, cnt)) + return -E2BIG; + + if (cnt != DOWNSHIFT_DEV_DISABLE) { + val = MDIO_AN_VEND_PROV_DOWNSHIFT_EN; + val |= FIELD_PREP(MDIO_AN_VEND_PROV_DOWNSHIFT_MASK, cnt); + } + + return phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV, + MDIO_AN_VEND_PROV_DOWNSHIFT_EN | + MDIO_AN_VEND_PROV_DOWNSHIFT_MASK, val); +} + +static int aqr107_get_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_DOWNSHIFT: + return aqr107_get_downshift(phydev, data); + default: + return -EOPNOTSUPP; + } +} + +static int aqr107_set_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, const void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_DOWNSHIFT: + return aqr107_set_downshift(phydev, *(const u8 *)data); + default: + return -EOPNOTSUPP; + } +} + +/* If we configure settings whilst firmware is still initializing the chip, + * then these settings may be overwritten. Therefore make sure chip + * initialization has completed. Use presence of the firmware ID as + * indicator for initialization having completed. + * The chip also provides a "reset completed" bit, but it's cleared after + * read. Therefore function would time out if called again. + */ +static int aqr107_wait_reset_complete(struct phy_device *phydev) +{ + int val, retries = 100; + + do { + val = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_FW_ID); + if (val < 0) + return val; + msleep(20); + } while (!val && --retries); + + return val ? 0 : -ETIMEDOUT; +} + +static void aqr107_chip_info(struct phy_device *phydev) +{ + u8 fw_major, fw_minor, build_id, prov_id; + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_FW_ID); + if (val < 0) + return; + + fw_major = FIELD_GET(VEND1_GLOBAL_FW_ID_MAJOR, val); + fw_minor = FIELD_GET(VEND1_GLOBAL_FW_ID_MINOR, val); + + val = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_RSVD_STAT1); + if (val < 0) + return; + + build_id = FIELD_GET(VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID, val); + prov_id = FIELD_GET(VEND1_GLOBAL_RSVD_STAT1_PROV_ID, val); + + phydev_dbg(phydev, "FW %u.%u, Build %u, Provisioning %u\n", + fw_major, fw_minor, build_id, prov_id); +} + +static int aqr107_config_init(struct phy_device *phydev) +{ + int ret; + + /* Check that the PHY interface type is compatible */ + if (phydev->interface != PHY_INTERFACE_MODE_SGMII && + phydev->interface != PHY_INTERFACE_MODE_2500BASEX && + phydev->interface != PHY_INTERFACE_MODE_10GKR) + return -ENODEV; + + ret = aqr107_wait_reset_complete(phydev); + if (!ret) + aqr107_chip_info(phydev); + + /* ensure that a latched downshift event is cleared */ + aqr107_read_downshift_event(phydev); + + return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT); +} + static int aqcs109_config_init(struct phy_device *phydev) { + int ret; + + /* Check that the PHY interface type is compatible */ + if (phydev->interface != PHY_INTERFACE_MODE_SGMII && + phydev->interface != PHY_INTERFACE_MODE_2500BASEX) + return -ENODEV; + + ret = aqr107_wait_reset_complete(phydev); + if (!ret) + aqr107_chip_info(phydev); + /* AQCS109 belongs to a chip family partially supporting 10G and 5G. * PMA speed ability bits are the same for all members of the family, * AQCS109 however supports speeds up to 2.5G only. */ - return phy_set_max_speed(phydev, SPEED_2500); + ret = phy_set_max_speed(phydev, SPEED_2500); + if (ret) + return ret; + + /* ensure that a latched downshift event is cleared */ + aqr107_read_downshift_event(phydev); + + return aqr107_set_downshift(phydev, MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT); +} + +static void aqr107_link_change_notify(struct phy_device *phydev) +{ + u8 fw_major, fw_minor; + bool downshift, short_reach, afr; + int mode, val; + + if (phydev->state != PHY_RUNNING || phydev->autoneg == AUTONEG_DISABLE) + return; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_RX_LP_STAT1); + /* call failed or link partner is no Aquantia PHY */ + if (val < 0 || !(val & MDIO_AN_RX_LP_STAT1_AQ_PHY)) + return; + + short_reach = val & MDIO_AN_RX_LP_STAT1_SHORT_REACH; + downshift = val & MDIO_AN_RX_LP_STAT1_AQRATE_DOWNSHIFT; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_RX_LP_STAT4); + if (val < 0) + return; + + fw_major = FIELD_GET(MDIO_AN_RX_LP_STAT4_FW_MAJOR, val); + fw_minor = FIELD_GET(MDIO_AN_RX_LP_STAT4_FW_MINOR, val); + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_RX_VEND_STAT3); + if (val < 0) + return; + + afr = val & MDIO_AN_RX_VEND_STAT3_AFR; + + phydev_dbg(phydev, "Link partner is Aquantia PHY, FW %u.%u%s%s%s\n", + fw_major, fw_minor, + short_reach ? ", short reach mode" : "", + downshift ? ", fast-retrain downshift advertised" : "", + afr ? ", fast reframe advertised" : ""); + + val = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_RSVD_STAT9); + if (val < 0) + return; + + mode = FIELD_GET(VEND1_GLOBAL_RSVD_STAT9_MODE, val); + if (mode == VEND1_GLOBAL_RSVD_STAT9_1000BT2) + phydev_info(phydev, "Aquantia 1000Base-T2 mode active\n"); } static struct phy_driver aqr_driver[] = { @@ -234,10 +525,14 @@ static struct phy_driver aqr_driver[] = { .aneg_done = genphy_c45_aneg_done, .get_features = genphy_c45_pma_read_abilities, .probe = aqr_hwmon_probe, + .config_init = aqr107_config_init, .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .ack_interrupt = aqr_ack_interrupt, - .read_status = aqr_read_status, + .read_status = aqr107_read_status, + .get_tunable = aqr107_get_tunable, + .set_tunable = aqr107_set_tunable, + .link_change_notify = aqr107_link_change_notify, }, { PHY_ID_MATCH_MODEL(PHY_ID_AQCS109), @@ -249,7 +544,10 @@ static struct phy_driver aqr_driver[] = { .config_aneg = aqr_config_aneg, .config_intr = aqr_config_intr, .ack_interrupt = aqr_ack_interrupt, - .read_status = aqr_read_status, + .read_status = aqr107_read_status, + .get_tunable = aqr107_get_tunable, + .set_tunable = aqr107_set_tunable, + .link_change_notify = aqr107_link_change_notify, }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR405), diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index f3e96191eb6f..f315ab468a0d 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -324,8 +324,6 @@ static int at803x_config_intr(struct phy_device *phydev) static void at803x_link_change_notify(struct phy_device *phydev) { - struct at803x_priv *priv = phydev->priv; - /* * Conduct a hardware reset for AT8030 every time a link loss is * signalled. This is necessary to circumvent a hardware bug that @@ -333,25 +331,19 @@ static void at803x_link_change_notify(struct phy_device *phydev) * in the FIFO. In such cases, the FIFO enters an error mode it * cannot recover from by software. */ - if (phydev->state == PHY_NOLINK) { - if (phydev->mdio.reset && !priv->phy_reset) { - struct at803x_context context; + if (phydev->state == PHY_NOLINK && phydev->mdio.reset) { + struct at803x_context context; - at803x_context_save(phydev, &context); + at803x_context_save(phydev, &context); - phy_device_reset(phydev, 1); - msleep(1); - phy_device_reset(phydev, 0); - msleep(1); + phy_device_reset(phydev, 1); + msleep(1); + phy_device_reset(phydev, 0); + msleep(1); - at803x_context_restore(phydev, &context); + at803x_context_restore(phydev, &context); - phydev_dbg(phydev, "%s(): phy was reset\n", - __func__); - priv->phy_reset = true; - } - } else { - priv->phy_reset = false; + phydev_dbg(phydev, "%s(): phy was reset\n", __func__); } } diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c index ab8e12922bf9..625b7cb76285 100644 --- a/drivers/net/phy/bcm-cygnus.c +++ b/drivers/net/phy/bcm-cygnus.c @@ -10,6 +10,10 @@ #include <linux/netdevice.h> #include <linux/phy.h> +struct bcm_omega_phy_priv { + u64 *stats; +}; + /* Broadcom Cygnus Phy specific registers */ #define MII_BCM_CYGNUS_AFE_VDAC_ICTRL_0 0x91E5 /* VDAL Control register */ @@ -121,6 +125,130 @@ static int bcm_cygnus_resume(struct phy_device *phydev) return genphy_config_aneg(phydev); } +static int bcm_omega_config_init(struct phy_device *phydev) +{ + u8 count, rev; + int ret = 0; + + rev = phydev->phy_id & ~phydev->drv->phy_id_mask; + + pr_info_once("%s: %s PHY revision: 0x%02x\n", + phydev_name(phydev), phydev->drv->name, rev); + + /* Dummy read to a register to workaround an issue upon reset where the + * internal inverter may not allow the first MDIO transaction to pass + * the MDIO management controller and make us return 0xffff for such + * reads. + */ + phy_read(phydev, MII_BMSR); + + switch (rev) { + case 0x00: + ret = bcm_phy_28nm_a0b0_afe_config_init(phydev); + break; + default: + break; + } + + if (ret) + return ret; + + ret = bcm_phy_downshift_get(phydev, &count); + if (ret) + return ret; + + /* Only enable EEE if Wirespeed/downshift is disabled */ + ret = bcm_phy_set_eee(phydev, count == DOWNSHIFT_DEV_DISABLE); + if (ret) + return ret; + + return bcm_phy_enable_apd(phydev, true); +} + +static int bcm_omega_resume(struct phy_device *phydev) +{ + int ret; + + /* Re-apply workarounds coming out suspend/resume */ + ret = bcm_omega_config_init(phydev); + if (ret) + return ret; + + /* 28nm Gigabit PHYs come out of reset without any half-duplex + * or "hub" compliant advertised mode, fix that. This does not + * cause any problems with the PHY library since genphy_config_aneg() + * gracefully handles auto-negotiated and forced modes. + */ + return genphy_config_aneg(phydev); +} + +static int bcm_omega_get_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_DOWNSHIFT: + return bcm_phy_downshift_get(phydev, (u8 *)data); + default: + return -EOPNOTSUPP; + } +} + +static int bcm_omega_set_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, + const void *data) +{ + u8 count = *(u8 *)data; + int ret; + + switch (tuna->id) { + case ETHTOOL_PHY_DOWNSHIFT: + ret = bcm_phy_downshift_set(phydev, count); + break; + default: + return -EOPNOTSUPP; + } + + if (ret) + return ret; + + /* Disable EEE advertisement since this prevents the PHY + * from successfully linking up, trigger auto-negotiation restart + * to let the MAC decide what to do. + */ + ret = bcm_phy_set_eee(phydev, count == DOWNSHIFT_DEV_DISABLE); + if (ret) + return ret; + + return genphy_restart_aneg(phydev); +} + +static void bcm_omega_get_phy_stats(struct phy_device *phydev, + struct ethtool_stats *stats, u64 *data) +{ + struct bcm_omega_phy_priv *priv = phydev->priv; + + bcm_phy_get_stats(phydev, priv->stats, stats, data); +} + +static int bcm_omega_probe(struct phy_device *phydev) +{ + struct bcm_omega_phy_priv *priv; + + priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phydev->priv = priv; + + priv->stats = devm_kcalloc(&phydev->mdio.dev, + bcm_phy_get_sset_count(phydev), sizeof(u64), + GFP_KERNEL); + if (!priv->stats) + return -ENOMEM; + + return 0; +} + static struct phy_driver bcm_cygnus_phy_driver[] = { { .phy_id = PHY_ID_BCM_CYGNUS, @@ -132,10 +260,27 @@ static struct phy_driver bcm_cygnus_phy_driver[] = { .config_intr = bcm_phy_config_intr, .suspend = genphy_suspend, .resume = bcm_cygnus_resume, -} }; +}, { + .phy_id = PHY_ID_BCM_OMEGA, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom Omega Combo GPHY", + .features = PHY_GBIT_FEATURES, + .flags = PHY_IS_INTERNAL, + .config_init = bcm_omega_config_init, + .suspend = genphy_suspend, + .resume = bcm_omega_resume, + .get_tunable = bcm_omega_get_tunable, + .set_tunable = bcm_omega_set_tunable, + .get_sset_count = bcm_phy_get_sset_count, + .get_strings = bcm_phy_get_strings, + .get_stats = bcm_omega_get_phy_stats, + .probe = bcm_omega_probe, +} +}; static struct mdio_device_id __maybe_unused bcm_cygnus_phy_tbl[] = { { PHY_ID_BCM_CYGNUS, 0xfffffff0, }, + { PHY_ID_BCM_OMEGA, 0xfffffff0, }, { } }; MODULE_DEVICE_TABLE(mdio, bcm_cygnus_phy_tbl); diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index a75642051b8b..e0d3310957ff 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -371,6 +371,58 @@ void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow, } EXPORT_SYMBOL_GPL(bcm_phy_get_stats); +void bcm_phy_r_rc_cal_reset(struct phy_device *phydev) +{ + /* Reset R_CAL/RC_CAL Engine */ + bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0010); + + /* Disable Reset R_AL/RC_CAL Engine */ + bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0000); +} +EXPORT_SYMBOL_GPL(bcm_phy_r_rc_cal_reset); + +int bcm_phy_28nm_a0b0_afe_config_init(struct phy_device *phydev) +{ + /* Increase VCO range to prevent unlocking problem of PLL at low + * temp + */ + bcm_phy_write_misc(phydev, PLL_PLLCTRL_1, 0x0048); + + /* Change Ki to 011 */ + bcm_phy_write_misc(phydev, PLL_PLLCTRL_2, 0x021b); + + /* Disable loading of TVCO buffer to bandgap, set bandgap trim + * to 111 + */ + bcm_phy_write_misc(phydev, PLL_PLLCTRL_4, 0x0e20); + + /* Adjust bias current trim by -3 */ + bcm_phy_write_misc(phydev, DSP_TAP10, 0x690b); + + /* Switch to CORE_BASE1E */ + phy_write(phydev, MII_BRCM_CORE_BASE1E, 0xd); + + bcm_phy_r_rc_cal_reset(phydev); + + /* write AFE_RXCONFIG_0 */ + bcm_phy_write_misc(phydev, AFE_RXCONFIG_0, 0xeb19); + + /* write AFE_RXCONFIG_1 */ + bcm_phy_write_misc(phydev, AFE_RXCONFIG_1, 0x9a3f); + + /* write AFE_RX_LP_COUNTER */ + bcm_phy_write_misc(phydev, AFE_RX_LP_COUNTER, 0x7fc0); + + /* write AFE_HPF_TRIM_OTHERS */ + bcm_phy_write_misc(phydev, AFE_HPF_TRIM_OTHERS, 0x000b); + + /* write AFTE_TX_CONFIG */ + bcm_phy_write_misc(phydev, AFE_TX_CONFIG, 0x0800); + + return 0; +} +EXPORT_SYMBOL_GPL(bcm_phy_28nm_a0b0_afe_config_init); + MODULE_DESCRIPTION("Broadcom PHY Library"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Broadcom Corporation"); diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index 17faaefcfd60..5ecacb4e64f0 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h @@ -9,6 +9,24 @@ #include <linux/brcmphy.h> #include <linux/phy.h> +/* 28nm only register definitions */ +#define MISC_ADDR(base, channel) base, channel + +#define DSP_TAP10 MISC_ADDR(0x0a, 0) +#define PLL_PLLCTRL_1 MISC_ADDR(0x32, 1) +#define PLL_PLLCTRL_2 MISC_ADDR(0x32, 2) +#define PLL_PLLCTRL_4 MISC_ADDR(0x33, 0) + +#define AFE_RXCONFIG_0 MISC_ADDR(0x38, 0) +#define AFE_RXCONFIG_1 MISC_ADDR(0x38, 1) +#define AFE_RXCONFIG_2 MISC_ADDR(0x38, 2) +#define AFE_RX_LP_COUNTER MISC_ADDR(0x38, 3) +#define AFE_TX_CONFIG MISC_ADDR(0x39, 0) +#define AFE_VDCA_ICTRL_0 MISC_ADDR(0x39, 1) +#define AFE_VDAC_OTHERS_0 MISC_ADDR(0x39, 3) +#define AFE_HPF_TRIM_OTHERS MISC_ADDR(0x3a, 0) + + int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val); int bcm_phy_read_exp(struct phy_device *phydev, u16 reg); @@ -45,5 +63,7 @@ int bcm_phy_get_sset_count(struct phy_device *phydev); void bcm_phy_get_strings(struct phy_device *phydev, u8 *data); void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow, struct ethtool_stats *stats, u64 *data); +void bcm_phy_r_rc_cal_reset(struct phy_device *phydev); +int bcm_phy_28nm_a0b0_afe_config_init(struct phy_device *phydev); #endif /* _LINUX_BCM_PHY_LIB_H */ diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index b8415f8fae14..a75e1b283541 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -37,77 +37,10 @@ #define MII_BCM7XXX_SHD_3_TL4 0x23 #define MII_BCM7XXX_TL4_RST_MSK (BIT(2) | BIT(1)) -/* 28nm only register definitions */ -#define MISC_ADDR(base, channel) base, channel - -#define DSP_TAP10 MISC_ADDR(0x0a, 0) -#define PLL_PLLCTRL_1 MISC_ADDR(0x32, 1) -#define PLL_PLLCTRL_2 MISC_ADDR(0x32, 2) -#define PLL_PLLCTRL_4 MISC_ADDR(0x33, 0) - -#define AFE_RXCONFIG_0 MISC_ADDR(0x38, 0) -#define AFE_RXCONFIG_1 MISC_ADDR(0x38, 1) -#define AFE_RXCONFIG_2 MISC_ADDR(0x38, 2) -#define AFE_RX_LP_COUNTER MISC_ADDR(0x38, 3) -#define AFE_TX_CONFIG MISC_ADDR(0x39, 0) -#define AFE_VDCA_ICTRL_0 MISC_ADDR(0x39, 1) -#define AFE_VDAC_OTHERS_0 MISC_ADDR(0x39, 3) -#define AFE_HPF_TRIM_OTHERS MISC_ADDR(0x3a, 0) - struct bcm7xxx_phy_priv { u64 *stats; }; -static void r_rc_cal_reset(struct phy_device *phydev) -{ - /* Reset R_CAL/RC_CAL Engine */ - bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0010); - - /* Disable Reset R_AL/RC_CAL Engine */ - bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0000); -} - -static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev) -{ - /* Increase VCO range to prevent unlocking problem of PLL at low - * temp - */ - bcm_phy_write_misc(phydev, PLL_PLLCTRL_1, 0x0048); - - /* Change Ki to 011 */ - bcm_phy_write_misc(phydev, PLL_PLLCTRL_2, 0x021b); - - /* Disable loading of TVCO buffer to bandgap, set bandgap trim - * to 111 - */ - bcm_phy_write_misc(phydev, PLL_PLLCTRL_4, 0x0e20); - - /* Adjust bias current trim by -3 */ - bcm_phy_write_misc(phydev, DSP_TAP10, 0x690b); - - /* Switch to CORE_BASE1E */ - phy_write(phydev, MII_BRCM_CORE_BASE1E, 0xd); - - r_rc_cal_reset(phydev); - - /* write AFE_RXCONFIG_0 */ - bcm_phy_write_misc(phydev, AFE_RXCONFIG_0, 0xeb19); - - /* write AFE_RXCONFIG_1 */ - bcm_phy_write_misc(phydev, AFE_RXCONFIG_1, 0x9a3f); - - /* write AFE_RX_LP_COUNTER */ - bcm_phy_write_misc(phydev, AFE_RX_LP_COUNTER, 0x7fc0); - - /* write AFE_HPF_TRIM_OTHERS */ - bcm_phy_write_misc(phydev, AFE_HPF_TRIM_OTHERS, 0x000b); - - /* write AFTE_TX_CONFIG */ - bcm_phy_write_misc(phydev, AFE_TX_CONFIG, 0x0800); - - return 0; -} - static int bcm7xxx_28nm_d0_afe_config_init(struct phy_device *phydev) { /* AFE_RXCONFIG_0 */ @@ -143,7 +76,7 @@ static int bcm7xxx_28nm_d0_afe_config_init(struct phy_device *phydev) bcm_phy_write_misc(phydev, DSP_TAP10, 0x011b); /* Reset R_CAL/RC_CAL engine */ - r_rc_cal_reset(phydev); + bcm_phy_r_rc_cal_reset(phydev); return 0; } @@ -171,7 +104,7 @@ static int bcm7xxx_28nm_e0_plus_afe_config_init(struct phy_device *phydev) bcm_phy_write_misc(phydev, DSP_TAP10, 0x011b); /* Reset R_CAL/RC_CAL engine */ - r_rc_cal_reset(phydev); + bcm_phy_r_rc_cal_reset(phydev); return 0; } @@ -196,7 +129,7 @@ static int bcm7xxx_28nm_a0_patch_afe_config_init(struct phy_device *phydev) /* Enable ffe zero detection for Vitesse interoperability */ bcm_phy_write_misc(phydev, 0x26, 0x2, 0x0015); - r_rc_cal_reset(phydev); + bcm_phy_r_rc_cal_reset(phydev); return 0; } @@ -227,7 +160,7 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev) switch (rev) { case 0xa0: case 0xb0: - ret = bcm7xxx_28nm_b0_afe_config_init(phydev); + ret = bcm_phy_28nm_a0b0_afe_config_init(phydev); break; case 0xd0: ret = bcm7xxx_28nm_d0_afe_config_init(phydev); @@ -657,7 +590,6 @@ static struct phy_driver bcm7xxx_driver[] = { BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"), BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"), - BCM7XXX_28NM_GPHY(PHY_ID_BCM_OMEGA, "Broadcom Omega Combo GPHY"), BCM7XXX_40NM_EPHY(PHY_ID_BCM7346, "Broadcom BCM7346"), BCM7XXX_40NM_EPHY(PHY_ID_BCM7362, "Broadcom BCM7362"), BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"), diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 3ccba37bd6dd..65350186d514 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -29,6 +29,7 @@ #include <linux/ethtool.h> #include <linux/phy.h> #include <linux/marvell_phy.h> +#include <linux/bitfield.h> #include <linux/of.h> #include <linux/io.h> @@ -91,6 +92,14 @@ #define MII_88E1510_TEMP_SENSOR 0x1b #define MII_88E1510_TEMP_SENSOR_MASK 0xff +#define MII_88E1540_COPPER_CTRL3 0x1a +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK GENMASK(11, 10) +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS 0 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS 1 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS 2 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS 3 +#define MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN BIT(9) + #define MII_88E6390_MISC_TEST 0x1b #define MII_88E6390_MISC_TEST_SAMPLE_1S 0 #define MII_88E6390_MISC_TEST_SAMPLE_10MS BIT(14) @@ -1025,6 +1034,101 @@ static int m88e1145_config_init(struct phy_device *phydev) return 0; } +static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs) +{ + int val; + + val = phy_read(phydev, MII_88E1540_COPPER_CTRL3); + if (val < 0) + return val; + + if (!(val & MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN)) { + *msecs = ETHTOOL_PHY_FAST_LINK_DOWN_OFF; + return 0; + } + + val = FIELD_GET(MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val); + + switch (val) { + case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS: + *msecs = 0; + break; + case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS: + *msecs = 10; + break; + case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS: + *msecs = 20; + break; + case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS: + *msecs = 40; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs) +{ + struct ethtool_eee eee; + int val, ret; + + if (*msecs == ETHTOOL_PHY_FAST_LINK_DOWN_OFF) + return phy_clear_bits(phydev, MII_88E1540_COPPER_CTRL3, + MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN); + + /* According to the Marvell data sheet EEE must be disabled for + * Fast Link Down detection to work properly + */ + ret = phy_ethtool_get_eee(phydev, &eee); + if (!ret && eee.eee_enabled) { + phydev_warn(phydev, "Fast Link Down detection requires EEE to be disabled!\n"); + return -EBUSY; + } + + if (*msecs <= 5) + val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS; + else if (*msecs <= 15) + val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS; + else if (*msecs <= 30) + val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS; + else + val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS; + + val = FIELD_PREP(MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val); + + ret = phy_modify(phydev, MII_88E1540_COPPER_CTRL3, + MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val); + if (ret) + return ret; + + return phy_set_bits(phydev, MII_88E1540_COPPER_CTRL3, + MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN); +} + +static int m88e1540_get_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_FAST_LINK_DOWN: + return m88e1540_get_fld(phydev, data); + default: + return -EOPNOTSUPP; + } +} + +static int m88e1540_set_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, const void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_FAST_LINK_DOWN: + return m88e1540_set_fld(phydev, data); + default: + return -EOPNOTSUPP; + } +} + /* The VOD can be out of specification on link up. Poke an * undocumented register, in an undocumented page, with a magic value * to fix this. @@ -2247,6 +2351,8 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, + .get_tunable = m88e1540_get_tunable, + .set_tunable = m88e1540_set_tunable, }, { .phy_id = MARVELL_PHY_ID_88E1545, @@ -2307,6 +2413,8 @@ static struct phy_driver marvell_drivers[] = { .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, + .get_tunable = m88e1540_get_tunable, + .set_tunable = m88e1540_set_tunable, }, }; diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index 8295bc7c8c20..4a28fb29adaa 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -92,10 +92,7 @@ static int unimac_mdio_poll(void *wait_func_data) usleep_range(1000, 2000); } while (--timeout); - if (!timeout) - return -ETIMEDOUT; - - return 0; + return -ETIMEDOUT; } static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg) @@ -292,7 +289,7 @@ static int unimac_mdio_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - dev_info(&pdev->dev, "Broadcom UniMAC MDIO bus at 0x%p\n", priv->base); + dev_info(&pdev->dev, "Broadcom UniMAC MDIO bus\n"); return 0; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 3745220c5c98..5938c5acf3b3 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -891,9 +891,6 @@ void phy_state_machine(struct work_struct *work) old_state = phydev->state; - if (phydev->drv && phydev->drv->link_change_notify) - phydev->drv->link_change_notify(phydev); - switch (phydev->state) { case PHY_DOWN: case PHY_READY: @@ -940,10 +937,13 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error(phydev); - if (old_state != phydev->state) + if (old_state != phydev->state) { phydev_dbg(phydev, "PHY state change %s -> %s\n", phy_state_to_str(old_state), phy_state_to_str(phydev->state)); + if (phydev->drv && phydev->drv->link_change_notify) + phydev->drv->link_change_notify(phydev); + } /* Only re-schedule a PHY state machine change if we are polling the * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving diff --git a/drivers/net/phy/rockchip.c b/drivers/net/phy/rockchip.c index 95abf7072f32..9053b1d01906 100644 --- a/drivers/net/phy/rockchip.c +++ b/drivers/net/phy/rockchip.c @@ -104,41 +104,14 @@ static int rockchip_integrated_phy_config_init(struct phy_device *phydev) static void rockchip_link_change_notify(struct phy_device *phydev) { - int speed = SPEED_10; - - if (phydev->autoneg == AUTONEG_ENABLE) { - int reg = phy_read(phydev, MII_SPECIAL_CONTROL_STATUS); - - if (reg < 0) { - phydev_err(phydev, "phy_read err: %d.\n", reg); - return; - } - - if (reg & MII_SPEED_100) - speed = SPEED_100; - else if (reg & MII_SPEED_10) - speed = SPEED_10; - } else { - int bmcr = phy_read(phydev, MII_BMCR); - - if (bmcr < 0) { - phydev_err(phydev, "phy_read err: %d.\n", bmcr); - return; - } - - if (bmcr & BMCR_SPEED100) - speed = SPEED_100; - else - speed = SPEED_10; - } - /* * If mode switch happens from 10BT to 100BT, all DSP/AFE * registers are set to default values. So any AFE/DSP * registers have to be re-initialized in this case. */ - if ((phydev->speed == SPEED_10) && (speed == SPEED_100)) { + if (phydev->state == PHY_RUNNING && phydev->speed == SPEED_100) { int ret = rockchip_integrated_phy_analog_init(phydev); + if (ret) phydev_err(phydev, "rockchip_integrated_phy_analog_init err: %d.\n", ret); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6ed96fdfd96d..6ad74f898832 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1691,8 +1691,7 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev) } static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { /* * This helper function exists to help dev_pick_tx get the correct @@ -2726,24 +2725,20 @@ static const struct genl_ops team_nl_ops[] = { { .cmd = TEAM_CMD_NOOP, .doit = team_nl_cmd_noop, - .policy = team_nl_policy, }, { .cmd = TEAM_CMD_OPTIONS_SET, .doit = team_nl_cmd_options_set, - .policy = team_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = TEAM_CMD_OPTIONS_GET, .doit = team_nl_cmd_options_get, - .policy = team_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = TEAM_CMD_PORT_LIST_GET, .doit = team_nl_cmd_port_list_get, - .policy = team_nl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -2756,6 +2751,7 @@ static struct genl_family team_nl_family __ro_after_init = { .name = TEAM_GENL_NAME, .version = TEAM_GENL_VERSION, .maxattr = TEAM_ATTR_MAX, + .policy = team_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = team_nl_ops, diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e9ca1c088d0b..24d0220b9ba0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -606,8 +606,7 @@ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) } static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct tun_struct *tun = netdev_priv(dev); u16 ret; @@ -1043,7 +1042,7 @@ static int tun_net_close(struct net_device *dev) static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) { #ifdef CONFIG_RPS - if (tun->numqueues == 1 && static_key_false(&rps_needed)) { + if (tun->numqueues == 1 && static_branch_unlikely(&rps_needed)) { /* Select queue was not called for the skbuff, so we extract the * RPS hash and save it into the flow_table here. */ @@ -2873,8 +2872,7 @@ err_free_dev: return err; } -static void tun_get_iff(struct net *net, struct tun_struct *tun, - struct ifreq *ifr) +static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) { tun_debug(KERN_INFO, tun, "tun_get_iff\n"); @@ -3103,10 +3101,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd); + net = dev_net(tun->dev); ret = 0; switch (cmd) { case TUNGETIFF: - tun_get_iff(current->nsproxy->net_ns, tun, &ifr); + tun_get_iff(tun, &ifr); if (tfile->detached) ifr.ifr_flags |= IFF_DETACH_QUEUE; @@ -3328,6 +3327,13 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = tun_net_change_carrier(tun->dev, (bool)carrier); break; + case TUNGETDEVNETNS: + ret = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto unlock; + ret = open_related_ns(&net->ns, get_net_ns); + break; + default: ret = -EINVAL; break; @@ -3457,7 +3463,7 @@ static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file) rtnl_lock(); tun = tun_get(tfile); if (tun) - tun_get_iff(current->nsproxy->net_ns, tun, &ifr); + tun_get_iff(tun, &ifr); rtnl_unlock(); if (tun) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7eb38ea9ba56..1b03c4b6ebff 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1925,7 +1925,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, return 0; } -static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) +static void virtnet_clean_affinity(struct virtnet_info *vi) { int i; @@ -1949,7 +1949,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi) int stride; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { - virtnet_clean_affinity(vi, -1); + virtnet_clean_affinity(vi); return; } @@ -1999,7 +1999,7 @@ static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node) struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info, node); - virtnet_clean_affinity(vi, cpu); + virtnet_clean_affinity(vi); return 0; } @@ -2735,7 +2735,7 @@ static void virtnet_del_vqs(struct virtnet_info *vi) { struct virtio_device *vdev = vi->vdev; - virtnet_clean_affinity(vi, -1); + virtnet_clean_affinity(vi); vdev->config->del_vqs(vdev); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 0838af04d681..4cc7b222859c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3620,35 +3620,29 @@ done: static const struct genl_ops hwsim_ops[] = { { .cmd = HWSIM_CMD_REGISTER, - .policy = hwsim_genl_policy, .doit = hwsim_register_received_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_FRAME, - .policy = hwsim_genl_policy, .doit = hwsim_cloned_frame_received_nl, }, { .cmd = HWSIM_CMD_TX_INFO_FRAME, - .policy = hwsim_genl_policy, .doit = hwsim_tx_info_frame_received_nl, }, { .cmd = HWSIM_CMD_NEW_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_DEL_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = HWSIM_CMD_GET_RADIO, - .policy = hwsim_genl_policy, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, @@ -3658,6 +3652,7 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .name = "MAC80211_HWSIM", .version = 1, .maxattr = HWSIM_ATTR_MAX, + .policy = hwsim_genl_policy, .netnsok = true, .module = THIS_MODULE, .ops = hwsim_ops, diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 20cee5c397fb..f6da8edab7f1 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1282,8 +1282,7 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev) static u16 mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { skb->priority = cfg80211_classify8021d(skb, NULL); return mwifiex_1d_to_wmm_queue[skb->priority]; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 6da12518e693..783198844dd7 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -148,8 +148,7 @@ void xenvif_wake_queue(struct xenvif_queue *queue) } static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct xenvif *vif = netdev_priv(dev); unsigned int size = vif->hash.size; @@ -162,7 +161,8 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, return 0; if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) - return fallback(dev, skb, NULL) % dev->real_num_tx_queues; + return netdev_pick_tx(dev, skb, NULL) % + dev->real_num_tx_queues; xenvif_set_skb_hash(vif, skb); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c914c24f880b..80c30321de41 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -543,8 +543,7 @@ static int xennet_count_skb_slots(struct sk_buff *skb) } static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { unsigned int num_queues = dev->real_num_tx_queues; u32 hash; diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c index 8dde5a40e253..2c088af44c8b 100644 --- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c +++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c @@ -245,8 +245,7 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb) } static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct adapter *padapter = rtw_netdev_priv(dev); struct mlme_priv *pmlmepriv = &padapter->mlmepriv; diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c index 143e3f9b31aa..0a20a4e9e19a 100644 --- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c +++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c @@ -404,8 +404,7 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb) static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct adapter *padapter = rtw_netdev_priv(dev); struct mlme_priv *pmlmepriv = &padapter->mlmepriv; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 9704b135a7bc..481d371c4b01 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -442,25 +442,21 @@ static const struct genl_ops tcmu_genl_ops[] = { { .cmd = TCMU_CMD_SET_FEATURES, .flags = GENL_ADMIN_PERM, - .policy = tcmu_attr_policy, .doit = tcmu_genl_set_features, }, { .cmd = TCMU_CMD_ADDED_DEVICE_DONE, .flags = GENL_ADMIN_PERM, - .policy = tcmu_attr_policy, .doit = tcmu_genl_add_dev_done, }, { .cmd = TCMU_CMD_REMOVED_DEVICE_DONE, .flags = GENL_ADMIN_PERM, - .policy = tcmu_attr_policy, .doit = tcmu_genl_rm_dev_done, }, { .cmd = TCMU_CMD_RECONFIG_DEVICE_DONE, .flags = GENL_ADMIN_PERM, - .policy = tcmu_attr_policy, .doit = tcmu_genl_reconfig_dev_done, }, }; @@ -472,6 +468,7 @@ static struct genl_family tcmu_genl_family __ro_after_init = { .name = "TCM-USER", .version = 2, .maxattr = TCMU_ATTR_MAX, + .policy = tcmu_attr_policy, .mcgrps = tcmu_mcgrps, .n_mcgrps = ARRAY_SIZE(tcmu_mcgrps), .netnsok = true, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f02367faa58d..f62897198844 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -205,6 +205,7 @@ enum bpf_return_type { RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ + RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ }; /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 83f81ac53282..6cb82301d8e9 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -233,7 +233,6 @@ const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) { \ handler \ .cmd = op_name, \ - .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ }, #define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) @@ -290,7 +289,8 @@ static struct genl_family ZZZ_genl_family __ro_after_init = { #ifdef GENL_MAGIC_FAMILY_HDRSZ .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), #endif - .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, + .maxattr = ARRAY_SIZE(CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy))-1, + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), .ops = ZZZ_genl_ops, .n_ops = ARRAY_SIZE(ZZZ_genl_ops), .mcgrps = ZZZ_genl_mcgrps, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3b83288749c6..b0e17c94566c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5110,6 +5110,7 @@ enum { MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0 = 0x14, MLX5_ACTION_IN_FIELD_OUT_SIPV4 = 0x15, MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16, + MLX5_ACTION_IN_FIELD_OUT_FIRST_VID = 0x17, MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47, }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 26f69cf763f4..166fdc0a78b4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -194,8 +194,8 @@ struct net_device_stats { #ifdef CONFIG_RPS #include <linux/static_key.h> -extern struct static_key rps_needed; -extern struct static_key rfs_needed; +extern struct static_key_false rps_needed; +extern struct static_key_false rfs_needed; #endif struct neighbour; @@ -986,8 +986,7 @@ struct devlink; * those the driver believes to be appropriate. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - * struct net_device *sb_dev, - * select_queue_fallback_t fallback); + * struct net_device *sb_dev); * Called to decide which queue to use when device supports multiple * transmit queues. * @@ -1268,8 +1267,7 @@ struct net_device_ops { netdev_features_t features); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); @@ -2152,9 +2150,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, &qdisc_xmit_lock_key); \ } -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb, - struct net_device *sb_dev); +u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); +struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, + struct sk_buff *skb, + struct net_device *sb_dev); /* returns the headroom that the master device needs to take in account * when forwarding to this dev @@ -2639,11 +2639,9 @@ void dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback); + struct net_device *sb_dev); int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev); int dev_direct_xmit(struct sk_buff *skb, u16 queue_id); diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ae9c0f71f311..86dfa417848d 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -63,7 +63,6 @@ struct bucket_table { unsigned int size; unsigned int nest; - unsigned int rehash; u32 hash_rnd; unsigned int locks_mask; spinlock_t *locks; @@ -307,13 +306,13 @@ static inline struct rhash_head __rcu **rht_bucket_insert( } /** - * rht_for_each_continue - continue iterating over hash chain + * rht_for_each_from - iterate over hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index */ -#define rht_for_each_continue(pos, head, tbl, hash) \ +#define rht_for_each_from(pos, head, tbl, hash) \ for (pos = rht_dereference_bucket(head, tbl, hash); \ !rht_is_a_nulls(pos); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) @@ -325,18 +324,18 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_from(pos, *rht_bucket(tbl, hash), tbl, hash) /** - * rht_for_each_entry_continue - continue iterating over hash chain + * rht_for_each_entry_from - iterate over hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. */ -#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \ +#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \ for (pos = rht_dereference_bucket(head, tbl, hash); \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) @@ -350,7 +349,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ + rht_for_each_entry_from(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -375,9 +374,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** - * rht_for_each_rcu_continue - continue iterating over rcu hash chain + * rht_for_each_rcu_from - iterate over rcu hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @@ -385,7 +384,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_rcu_continue(pos, head, tbl, hash) \ +#define rht_for_each_rcu_from(pos, head, tbl, hash) \ for (({barrier(); }), \ pos = rht_dereference_bucket_rcu(head, tbl, hash); \ !rht_is_a_nulls(pos); \ @@ -402,13 +401,13 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_rcu_from(pos, *rht_bucket(tbl, hash), tbl, hash) /** - * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain + * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. @@ -417,7 +416,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \ +#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \ for (({barrier(); }), \ pos = rht_dereference_bucket_rcu(head, tbl, hash); \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ @@ -436,7 +435,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ + rht_for_each_entry_rcu_from(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -492,7 +491,7 @@ restart: hash = rht_key_hashfn(ht, tbl, key, params); head = rht_bucket(tbl, hash); do { - rht_for_each_rcu_continue(he, *head, tbl, hash) { + rht_for_each_rcu_from(he, *head, tbl, hash) { if (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) @@ -626,7 +625,7 @@ slow_path: if (!pprev) goto out; - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, *pprev, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -776,12 +775,6 @@ static inline int rhltable_insert( * @obj: pointer to hash head inside object * @params: hash table parameters * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * * This lookup function may only be used for fixed key hash table (key_len * parameter set). It will BUG() if used inappropriately. * @@ -837,12 +830,6 @@ static inline void *rhashtable_lookup_get_insert_fast( * @obj: pointer to hash head inside object * @params: hash table parameters * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * * Lookups may occur in parallel with hashtable mutations and resizing. * * Will trigger an automatic deferred table resizing if residency in the @@ -903,7 +890,7 @@ static inline int __rhashtable_remove_fast_one( spin_lock_bh(lock); pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + rht_for_each_from(he, *pprev, tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1055,7 +1042,7 @@ static inline int __rhashtable_replace_fast( spin_lock_bh(lock); pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + rht_for_each_from(he, *pprev, tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; diff --git a/include/linux/siphash.h b/include/linux/siphash.h index fa7a6b9cedbf..bf21591a9e5e 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -21,6 +21,11 @@ typedef struct { u64 key[2]; } siphash_key_t; +static inline bool siphash_key_is_zero(const siphash_key_t *key) +{ + return !(key->key[0] | key->key[1]); +} + u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); diff --git a/include/net/devlink.h b/include/net/devlink.h index 63de99e09f04..03fb16f4fb6c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/spinlock.h> #include <net/net_namespace.h> #include <uapi/linux/devlink.h> @@ -53,6 +54,9 @@ struct devlink_port { struct devlink *devlink; unsigned index; bool registered; + spinlock_t type_lock; /* Protects type and type_dev + * pointer consistency. + */ enum devlink_port_type type; enum devlink_port_type desired_type; void *type_dev; @@ -545,17 +549,13 @@ static inline struct devlink *priv_to_devlink(void *priv) static inline struct devlink *netdev_to_devlink(struct net_device *dev) { -#if IS_ENABLED(CONFIG_NET_DEVLINK) if (dev->netdev_ops->ndo_get_devlink) return dev->netdev_ops->ndo_get_devlink(dev); -#endif return NULL; } struct ib_device; -#if IS_ENABLED(CONFIG_NET_DEVLINK) - struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); int devlink_register(struct devlink *devlink, struct device *dev); void devlink_unregister(struct devlink *devlink); @@ -724,500 +724,14 @@ void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state); +#if IS_ENABLED(CONFIG_NET_DEVLINK) + void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len); int devlink_compat_flash_update(struct net_device *dev, const char *file_name); #else -static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, - size_t priv_size) -{ - return kzalloc(sizeof(struct devlink) + priv_size, GFP_KERNEL); -} - -static inline int devlink_register(struct devlink *devlink, struct device *dev) -{ - return 0; -} - -static inline void devlink_unregister(struct devlink *devlink) -{ -} - -static inline void devlink_params_publish(struct devlink *devlink) -{ -} - -static inline void devlink_params_unpublish(struct devlink *devlink) -{ -} - -static inline void devlink_free(struct devlink *devlink) -{ - kfree(devlink); -} - -static inline int devlink_port_register(struct devlink *devlink, - struct devlink_port *devlink_port, - unsigned int port_index) -{ - return 0; -} - -static inline void devlink_port_unregister(struct devlink_port *devlink_port) -{ -} - -static inline void devlink_port_type_eth_set(struct devlink_port *devlink_port, - struct net_device *netdev) -{ -} - -static inline void devlink_port_type_ib_set(struct devlink_port *devlink_port, - struct ib_device *ibdev) -{ -} - -static inline void devlink_port_type_clear(struct devlink_port *devlink_port) -{ -} - -static inline void devlink_port_attrs_set(struct devlink_port *devlink_port, - enum devlink_port_flavour flavour, - u32 port_number, bool split, - u32 split_subport_number) -{ -} - -static inline int -devlink_port_get_phys_port_name(struct devlink_port *devlink_port, - char *name, size_t len) -{ - return -EOPNOTSUPP; -} - -static inline int devlink_sb_register(struct devlink *devlink, - unsigned int sb_index, u32 size, - u16 ingress_pools_count, - u16 egress_pools_count, - u16 ingress_tc_count, - u16 egress_tc_count) -{ - return 0; -} - -static inline void devlink_sb_unregister(struct devlink *devlink, - unsigned int sb_index) -{ -} - -static inline int -devlink_dpipe_table_register(struct devlink *devlink, - const char *table_name, - struct devlink_dpipe_table_ops *table_ops, - void *priv, bool counter_control_extern) -{ - return 0; -} - -static inline void devlink_dpipe_table_unregister(struct devlink *devlink, - const char *table_name) -{ -} - -static inline int devlink_dpipe_headers_register(struct devlink *devlink, - struct devlink_dpipe_headers * - dpipe_headers) -{ - return 0; -} - -static inline void devlink_dpipe_headers_unregister(struct devlink *devlink) -{ -} - -static inline bool devlink_dpipe_table_counter_enabled(struct devlink *devlink, - const char *table_name) -{ - return false; -} - -static inline int -devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx) -{ - return 0; -} - -static inline int -devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx, - struct devlink_dpipe_entry *entry) -{ - return 0; -} - -static inline int -devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) -{ - return 0; -} - -static inline void -devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) -{ -} - -static inline int -devlink_dpipe_action_put(struct sk_buff *skb, - struct devlink_dpipe_action *action) -{ - return 0; -} - -static inline int -devlink_dpipe_match_put(struct sk_buff *skb, - struct devlink_dpipe_match *match) -{ - return 0; -} - -static inline int -devlink_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) -{ - return 0; -} - -static inline void -devlink_resources_unregister(struct devlink *devlink, - struct devlink_resource *resource) -{ -} - -static inline int -devlink_resource_size_get(struct devlink *devlink, u64 resource_id, - u64 *p_resource_size) -{ - return -EOPNOTSUPP; -} - -static inline int -devlink_dpipe_table_resource_set(struct devlink *devlink, - const char *table_name, u64 resource_id, - u64 resource_units) -{ - return -EOPNOTSUPP; -} - -static inline void -devlink_resource_occ_get_register(struct devlink *devlink, - u64 resource_id, - devlink_resource_occ_get_t *occ_get, - void *occ_get_priv) -{ -} - -static inline void -devlink_resource_occ_get_unregister(struct devlink *devlink, - u64 resource_id) -{ -} - -static inline int -devlink_params_register(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) -{ - return 0; -} - -static inline void -devlink_params_unregister(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) -{ - -} - -static inline int -devlink_port_params_register(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count) -{ - return 0; -} - -static inline void -devlink_port_params_unregister(struct devlink_port *devlink_port, - const struct devlink_param *params, - size_t params_count) -{ -} - -static inline int -devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val) -{ - return -EOPNOTSUPP; -} - -static inline int -devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, - union devlink_param_value init_val) -{ - return -EOPNOTSUPP; -} - -static inline int -devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value *init_val) -{ - return -EOPNOTSUPP; -} - -static inline int -devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, - u32 param_id, - union devlink_param_value init_val) -{ - return -EOPNOTSUPP; -} - -static inline void -devlink_param_value_changed(struct devlink *devlink, u32 param_id) -{ -} - -static inline void -devlink_port_param_value_changed(struct devlink_port *devlink_port, - u32 param_id) -{ -} - -static inline void -devlink_param_value_str_fill(union devlink_param_value *dst_val, - const char *src) -{ -} - -static inline struct devlink_region * -devlink_region_create(struct devlink *devlink, - const char *region_name, - u32 region_max_snapshots, - u64 region_size) -{ - return NULL; -} - -static inline void -devlink_region_destroy(struct devlink_region *region) -{ -} - -static inline u32 -devlink_region_shapshot_id_get(struct devlink *devlink) -{ - return 0; -} - -static inline int -devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, - u8 *data, u32 snapshot_id, - devlink_snapshot_data_dest_t *data_destructor) -{ - return 0; -} - -static inline int -devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) -{ - return 0; -} - -static inline int -devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) -{ - return 0; -} - -static inline int -devlink_info_version_fixed_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_info_version_stored_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_info_version_running_put(struct devlink_info_req *req, - const char *version_name, - const char *version_value) -{ - return 0; -} - -static inline int -devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) -{ - return 0; -} - -static inline int -devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) -{ - return 0; -} - -static inline int -devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) -{ - return 0; -} - -static inline int -devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) -{ - return 0; -} - -static inline int -devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) -{ - return 0; -} - -static inline int -devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) -{ - return 0; -} - -static inline int -devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len) -{ - return 0; -} - -static inline int -devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value) -{ - return 0; -} - -static inline int -devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value) -{ - return 0; -} - -static inline int -devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value) -{ - return 0; -} - -static inline int -devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value) -{ - return 0; -} - -static inline int -devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u16 value_len) -{ - return 0; -} - -static inline struct devlink_health_reporter * -devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, bool auto_recover, - void *priv) -{ - return NULL; -} - -static inline void -devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) -{ -} - -static inline void * -devlink_health_reporter_priv(struct devlink_health_reporter *reporter) -{ - return NULL; -} - -static inline int -devlink_health_report(struct devlink_health_reporter *reporter, - const char *msg, void *priv_ctx) -{ - return 0; -} - -static inline void -devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, - enum devlink_health_reporter_state state) -{ -} - static inline void devlink_compat_running_version(struct net_device *dev, char *buf, size_t len) { @@ -1228,6 +742,7 @@ devlink_compat_flash_update(struct net_device *dev, const char *file_name) { return -EOPNOTSUPP; } + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dst.h b/include/net/dst.h index 6cf0870414c7..12b31c602cb0 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -19,17 +19,6 @@ #include <net/neighbour.h> #include <asm/processor.h> -#define DST_GC_MIN (HZ/10) -#define DST_GC_INC (HZ/2) -#define DST_GC_MAX (120*HZ) - -/* Each dst_entry has reference count and sits in some parent list(s). - * When it is removed from parent list, it is "freed" (dst_free). - * After this it enters dead state (dst->obsolete > 0) and if its refcnt - * is zero, it can be destroyed immediately, otherwise it is added - * to gc list and garbage collector periodically checks the refcnt. - */ - struct sk_buff; struct dst_entry { diff --git a/include/net/genetlink.h b/include/net/genetlink.h index aa2e5888f18d..6850c7b1a3a6 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -26,6 +26,7 @@ struct genl_info; * @name: name of family * @version: protocol version * @maxattr: maximum number of attributes supported + * @policy: netlink policy * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them * @parallel_ops: operations can be called in parallel and aren't @@ -56,6 +57,7 @@ struct genl_family { unsigned int maxattr; bool netnsok; bool parallel_ops; + const struct nla_policy *policy; int (*pre_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -124,14 +126,12 @@ static inline int genl_err_attr(struct genl_info *info, int err, * @cmd: command identifier * @internal_flags: flags used by the family * @flags: flags - * @policy: attribute validation policy * @doit: standard command callback * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps */ struct genl_ops { - const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); int (*start)(struct netlink_callback *cb); diff --git a/include/net/geneve.h b/include/net/geneve.h index fc6a7e0a874a..bced0b1d9fe4 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -4,6 +4,8 @@ #include <net/udp_tunnel.h> +#define GENEVE_UDP_PORT 6081 + /* Geneve Header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |Ver| Opt Len |O|C| Rsvd. | Protocol Type | diff --git a/include/net/ip.h b/include/net/ip.h index be3cad9c2e4c..aa09ae5f01a5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -38,6 +38,10 @@ #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MIN_MTU 68 /* RFC 791 */ +extern unsigned int sysctl_fib_sync_mem; +extern unsigned int sysctl_fib_sync_mem_min; +extern unsigned int sysctl_fib_sync_mem_max; + struct sock; struct inet_skb_parm { diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 84097010237c..2acb78a762ee 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -50,7 +50,8 @@ struct fib6_config { u32 fc_protocol; u16 fc_type; /* only 8 bits are used */ u16 fc_delete_all_nh : 1, - __unused : 15; + fc_ignore_dev_down:1, + __unused : 14; struct in6_addr fc_dst; struct in6_addr fc_src; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 104a6669e344..7698460a3dd1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -9,6 +9,7 @@ #include <linux/uidgid.h> #include <net/inet_frag.h> #include <linux/rcupdate.h> +#include <linux/siphash.h> struct tcpm_hash_bucket; struct ctl_table_header; @@ -217,5 +218,6 @@ struct netns_ipv4 { unsigned int ipmr_seq; /* protected by rtnl_mutex */ atomic_t rt_genid; + siphash_key_t ip_id_key; }; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index b028a1dc150d..64e29b58bb5e 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -33,6 +33,8 @@ struct netns_sysctl_ipv6 { int auto_flowlabels; int icmpv6_time; int icmpv6_echo_ignore_all; + int icmpv6_echo_ignore_multicast; + int icmpv6_echo_ignore_anycast; int anycast_src_echo_reply; int ip_nonlocal_bind; int fwmark_reflect; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 21a5243fecd1..9dfd7960d90a 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -106,10 +106,8 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return req; } -static inline void reqsk_free(struct request_sock *req) +static inline void __reqsk_free(struct request_sock *req) { - WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); - req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); @@ -117,6 +115,12 @@ static inline void reqsk_free(struct request_sock *req) kmem_cache_free(req->rsk_ops->slab, req); } +static inline void reqsk_free(struct request_sock *req) +{ + WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); + __reqsk_free(req); +} + static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 7d1a0483a17b..2269383c1399 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -113,6 +113,9 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + + /* for NOLOCK qdisc, true if there are no enqueued skbs */ + bool empty; struct rcu_head rcu; }; @@ -143,11 +146,19 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } +static inline bool qdisc_is_empty(const struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_NOLOCK) + return qdisc->empty; + return !qdisc->q.qlen; +} + static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { if (!spin_trylock(&qdisc->seqlock)) return false; + qdisc->empty = false; } else if (qdisc_is_running(qdisc)) { return false; } diff --git a/include/net/sock.h b/include/net/sock.h index 8de5ee258b93..7fa223278522 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -368,6 +368,7 @@ struct sock { atomic_t sk_drops; int sk_rcvlowat; struct sk_buff_head sk_error_queue; + struct sk_buff *sk_rx_skb_cache; struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with @@ -414,6 +415,7 @@ struct sock { struct sk_buff *sk_send_head; struct rb_root tcp_rtx_queue; }; + struct sk_buff *sk_tx_skb_cache; struct sk_buff_head sk_write_queue; __s32 sk_peek_off; int sk_write_pending; @@ -966,7 +968,7 @@ static inline void sock_rps_record_flow_hash(__u32 hash) static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS - if (static_key_false(&rfs_needed)) { + if (static_branch_unlikely(&rfs_needed)) { /* Reading sk->sk_rxhash might incur an expensive cache line * miss. * @@ -1466,6 +1468,11 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); + if (!sk->sk_tx_skb_cache) { + skb_zcopy_clear(skb, true); + sk->sk_tx_skb_cache = skb; + return; + } __kfree_skb(skb); } @@ -2433,6 +2440,15 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags) static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); + if ( +#ifdef CONFIG_RPS + !static_branch_unlikely(&rps_needed) && +#endif + !sk->sk_rx_skb_cache) { + sk->sk_rx_skb_cache = skb; + skb_orphan(skb); + return; + } __kfree_skb(skb); } diff --git a/include/net/tls.h b/include/net/tls.h index a5a938583295..3ce71d78414c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -60,6 +60,17 @@ #define TLS_AAD_SPACE_SIZE 13 #define TLS_DEVICE_NAME_MAX 32 +#define MAX_IV_SIZE 16 + +/* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes. + * + * IV[16] = b0[1] || implicit nonce[4] || explicit nonce[8] || length[3] + * + * The field 'length' is encoded in field 'b0' as '(length width - 1)'. + * Hence b0 contains (3 - 1) = 2. + */ +#define TLS_AES_CCM_IV_B0_BYTE 2 + /* * This structure defines the routines for Inline TLS driver. * The following routines are optional and filled with a @@ -123,8 +134,7 @@ struct tls_rec { struct scatterlist sg_content_type; char aad_space[TLS_AAD_SPACE_SIZE]; - u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE + - TLS_CIPHER_AES_GCM_128_SALT_SIZE]; + u8 iv_data[MAX_IV_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; @@ -219,6 +229,7 @@ struct tls_prot_info { u16 tag_size; u16 overhead_size; u16 iv_size; + u16 salt_size; u16 rec_seq_size; u16 aad_size; u16 tail_size; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 00254a58824b..83b5999a2587 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -8,6 +8,8 @@ #include <net/rtnetlink.h> #include <net/switchdev.h> +#define IANA_VXLAN_UDP_PORT 4789 + /* VXLAN protocol (RFC 7348) header: * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |R|R|R|R|I|R|R|R| Reserved | diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 929c8e537a14..837024512baf 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1478,13 +1478,27 @@ union bpf_attr { * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * - * There is a single supported mode at this time: + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The following flags are supported at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **: + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **: + * Any new space is reserved to hold a tunnel header. + * Configure skb offsets and other fields accordingly. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **: + * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **: + * Use with ENCAP_L3 flags to further specify the tunnel type. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2431,6 +2445,38 @@ union bpf_attr { * Return * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. + * + * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * This function is identical to bpf_sk_lookup_tcp, except that it + * also returns timewait or request sockets. Use bpf_sk_fullsock + * or bpf_tcp_socket to access the full structure. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. + * + * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Check whether iph and th contain a valid SYN cookie ACK for + * the listening socket in sk. + * + * iph points to the start of the IPv4 or IPv6 header, while + * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr). + * + * th points to the start of the TCP header, while th_len contains + * sizeof(struct tcphdr). + * + * Return + * 0 if iph and th are a valid SYN cookie ACK, or a negative error + * otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2531,7 +2577,9 @@ union bpf_attr { FN(sk_fullsock), \ FN(tcp_sock), \ FN(skb_ecn_set_ce), \ - FN(get_listener_sock), + FN(get_listener_sock), \ + FN(skc_lookup_tcp), \ + FN(tcp_check_syncookie), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2590,9 +2638,18 @@ enum bpf_func_id { /* Current network namespace */ #define BPF_F_CURRENT_NETNS (-1L) +/* BPF_FUNC_skb_adjust_room flags. */ +#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) + +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) +#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) +#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_MAC, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 3652b239dad1..50c76f4fa402 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -252,9 +252,17 @@ struct ethtool_tunable { #define DOWNSHIFT_DEV_DEFAULT_COUNT 0xff #define DOWNSHIFT_DEV_DISABLE 0 +/* Time in msecs after which link is reported as down + * 0 = lowest time supported by the PHY + * 0xff = off, link down detection according to standard + */ +#define ETHTOOL_PHY_FAST_LINK_DOWN_ON 0 +#define ETHTOOL_PHY_FAST_LINK_DOWN_OFF 0xff + enum phy_tunable_id { ETHTOOL_PHY_ID_UNSPEC, ETHTOOL_PHY_DOWNSHIFT, + ETHTOOL_PHY_FAST_LINK_DOWN, /* * Add your fresh new phy tunable attribute above and remember to update * phy_tunable_strings[] in net/core/ethtool.c diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h index f2ea833a2812..87c2c9f08803 100644 --- a/include/uapi/linux/fou.h +++ b/include/uapi/linux/fou.h @@ -16,6 +16,12 @@ enum { FOU_ATTR_IPPROTO, /* u8 */ FOU_ATTR_TYPE, /* u8 */ FOU_ATTR_REMCSUM_NOPARTIAL, /* flag */ + FOU_ATTR_LOCAL_V4, /* u32 */ + FOU_ATTR_LOCAL_V6, /* in6_addr */ + FOU_ATTR_PEER_V4, /* u32 */ + FOU_ATTR_PEER_V6, /* in6_addr */ + FOU_ATTR_PEER_PORT, /* u16 */ + FOU_ATTR_IFINDEX, /* s32 */ __FOU_ATTR_MAX, }; diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 23a6753b37df..454ae31b93c7 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -60,6 +60,7 @@ #define TUNSETSTEERINGEBPF _IOR('T', 224, int) #define TUNSETFILTEREBPF _IOR('T', 225, int) #define TUNSETCARRIER _IOW('T', 226, int) +#define TUNGETDEVNETNS _IO('T', 227) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index dbe0cbe4f1b7..dfabacee6903 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -798,6 +798,44 @@ struct ovs_action_push_eth { struct ovs_key_ethernet addresses; }; +/* + * enum ovs_check_pkt_len_attr - Attributes for %OVS_ACTION_ATTR_CHECK_PKT_LEN. + * + * @OVS_CHECK_PKT_LEN_ATTR_PKT_LEN: u16 Packet length to check for. + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: Nested OVS_ACTION_ATTR_* + * actions to apply if the packer length is greater than the specified + * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. + * @OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL - Nested OVS_ACTION_ATTR_* + * actions to apply if the packer length is lesser or equal to the specified + * length in the attr - OVS_CHECK_PKT_LEN_ATTR_PKT_LEN. + */ +enum ovs_check_pkt_len_attr { + OVS_CHECK_PKT_LEN_ATTR_UNSPEC, + OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, + __OVS_CHECK_PKT_LEN_ATTR_MAX, + +#ifdef __KERNEL__ + OVS_CHECK_PKT_LEN_ATTR_ARG /* struct check_pkt_len_arg */ +#endif +}; + +#define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1) + +#ifdef __KERNEL__ +struct check_pkt_len_arg { + u16 pkt_len; /* Same value as OVS_CHECK_PKT_LEN_ATTR_PKT_LEN'. */ + bool exec_for_greater; /* When true, actions in IF_GREATER will + * not change flow keys. False otherwise. + */ + bool exec_for_lesser_equal; /* When true, actions in IF_LESS_EQUAL + * will not change flow keys. False + * otherwise. + */ +}; +#endif + /** * enum ovs_action_attr - Action types. * @@ -842,6 +880,9 @@ struct ovs_action_push_eth { * packet, or modify the packet (e.g., change the DSCP field). * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of * actions without affecting the original packet and key. + * @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set + * of actions if greater than the specified packet length, else execute + * another set of actions. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -876,6 +917,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_POP_NSH, /* No argument. */ OVS_ACTION_ATTR_METER, /* u32 meter ID. */ OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */ + OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 8bb6cc5f3235..b521464ea962 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -160,15 +160,42 @@ enum { #define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ #define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ +/* + * Sender's congestion state indicating normal or abnormal situations + * in the last round of packets sent. The state is driven by the ACK + * information and timer events. + */ enum tcp_ca_state { + /* + * Nothing bad has been observed recently. + * No apparent reordering, packet loss, or ECN marks. + */ TCP_CA_Open = 0, #define TCPF_CA_Open (1<<TCP_CA_Open) + /* + * The sender enters disordered state when it has received DUPACKs or + * SACKs in the last round of packets sent. This could be due to packet + * loss or reordering but needs further information to confirm packets + * have been lost. + */ TCP_CA_Disorder = 1, #define TCPF_CA_Disorder (1<<TCP_CA_Disorder) + /* + * The sender enters Congestion Window Reduction (CWR) state when it + * has received ACKs with ECN-ECE marks, or has experienced congestion + * or packet discard on the sender host (e.g. qdisc). + */ TCP_CA_CWR = 2, #define TCPF_CA_CWR (1<<TCP_CA_CWR) + /* + * The sender is in fast recovery and retransmitting lost packets, + * typically triggered by ACK events. + */ TCP_CA_Recovery = 3, #define TCPF_CA_Recovery (1<<TCP_CA_Recovery) + /* + * The sender is in loss recovery triggered by retransmission timeout. + */ TCP_CA_Loss = 4 #define TCPF_CA_Loss (1<<TCP_CA_Loss) }; diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 0ebe02ef1a86..efb958fd167d 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -281,6 +281,8 @@ enum { TIPC_NLA_PROP_TOL, /* u32 */ TIPC_NLA_PROP_WIN, /* u32 */ TIPC_NLA_PROP_MTU, /* u32 */ + TIPC_NLA_PROP_BROADCAST, /* u32 */ + TIPC_NLA_PROP_BROADCAST_RATIO, /* u32 */ __TIPC_NLA_PROP_MAX, TIPC_NLA_PROP_MAX = __TIPC_NLA_PROP_MAX - 1 diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index 401d6f01de6a..5b9c26753e46 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -70,6 +70,13 @@ #define TLS_CIPHER_AES_GCM_256_TAG_SIZE 16 #define TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE 8 +#define TLS_CIPHER_AES_CCM_128 53 +#define TLS_CIPHER_AES_CCM_128_IV_SIZE 8 +#define TLS_CIPHER_AES_CCM_128_KEY_SIZE 16 +#define TLS_CIPHER_AES_CCM_128_SALT_SIZE 4 +#define TLS_CIPHER_AES_CCM_128_TAG_SIZE 16 +#define TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE 8 + #define TLS_SET_RECORD_TYPE 1 #define TLS_GET_RECORD_TYPE 2 @@ -94,4 +101,12 @@ struct tls12_crypto_info_aes_gcm_256 { unsigned char rec_seq[TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE]; }; +struct tls12_crypto_info_aes_ccm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_CCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_CCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_CCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE]; +}; + #endif /* _UAPI_LINUX_TLS_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fd502c1f71eb..2fe89138309a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -377,7 +377,8 @@ static bool is_release_function(enum bpf_func_id func_id) static bool is_acquire_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_lookup_tcp || - func_id == BPF_FUNC_sk_lookup_udp; + func_id == BPF_FUNC_sk_lookup_udp || + func_id == BPF_FUNC_skc_lookup_tcp; } static bool is_ptr_cast_function(enum bpf_func_id func_id) @@ -3156,19 +3157,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - if (is_acquire_function(func_id)) { - int id = acquire_reference_state(env, insn_idx); - - if (id < 0) - return id; - /* For mark_ptr_or_null_reg() */ - regs[BPF_REG_0].id = id; - /* For release_reference() */ - regs[BPF_REG_0].ref_obj_id = id; - } else { - /* For mark_ptr_or_null_reg() */ - regs[BPF_REG_0].id = ++env->id_gen; - } + regs[BPF_REG_0].id = ++env->id_gen; + } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; @@ -3179,9 +3172,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } - if (is_ptr_cast_function(func_id)) + if (is_ptr_cast_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + } else if (is_acquire_function(func_id)) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + /* For mark_ptr_or_null_reg() */ + regs[BPF_REG_0].id = id; + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = id; + } do_refine_retval_range(regs, fn->ret_type, func_id, &meta); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4e62a4a8fa91..1b942a7caf26 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -650,16 +650,37 @@ static const struct genl_ops taskstats_ops[] = { { .cmd = TASKSTATS_CMD_GET, .doit = taskstats_user_cmd, - .policy = taskstats_cmd_get_policy, - .flags = GENL_ADMIN_PERM, + /* policy enforced later */ + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_HASPOL, }, { .cmd = CGROUPSTATS_CMD_GET, .doit = cgroupstats_user_cmd, - .policy = cgroupstats_cmd_get_policy, + /* policy enforced later */ + .flags = GENL_CMD_CAP_HASPOL, }, }; +static int taskstats_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + const struct nla_policy *policy = NULL; + + switch (ops->cmd) { + case TASKSTATS_CMD_GET: + policy = taskstats_cmd_get_policy; + break; + case CGROUPSTATS_CMD_GET: + policy = cgroupstats_cmd_get_policy; + break; + default: + return -EINVAL; + } + + return nlmsg_validate(info->nlhdr, GENL_HDRLEN, TASKSTATS_CMD_ATTR_MAX, + policy, info->extack); +} + static struct genl_family family __ro_after_init = { .name = TASKSTATS_GENL_NAME, .version = TASKSTATS_GENL_VERSION, @@ -667,6 +688,7 @@ static struct genl_family family __ro_after_init = { .module = THIS_MODULE, .ops = taskstats_ops, .n_ops = ARRAY_SIZE(taskstats_ops), + .pre_doit = taskstats_pre_doit, }; /* Needed early in initialization */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 97f59abc3e92..811d51b7cb86 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -197,6 +197,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return NULL; } + rcu_head_init(&tbl->rcu); INIT_LIST_HEAD(&tbl->walkers); tbl->hash_rnd = get_random_u32(); @@ -280,10 +281,9 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, while (!(err = rhashtable_rehash_one(ht, old_hash))) ; - if (err == -ENOENT) { - old_tbl->rehash++; + if (err == -ENOENT) err = 0; - } + spin_unlock_bh(old_bucket_lock); return err; @@ -330,13 +330,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht) spin_lock(&ht->lock); list_for_each_entry(walker, &old_tbl->walkers, list) walker->tbl = NULL; - spin_unlock(&ht->lock); /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. + * We do this inside the locked region so that + * rhashtable_walk_stop() can use rcu_head_after_call_rcu() + * to check if it should not re-link the table. */ call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + spin_unlock(&ht->lock); return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } @@ -491,7 +494,7 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, elasticity = RHT_ELASTICITY; pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, *pprev, tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -582,46 +585,22 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, struct bucket_table *new_tbl; struct bucket_table *tbl; unsigned int hash; - spinlock_t *lock; void *data; - tbl = rcu_dereference(ht->tbl); - - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, ht->p); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - - spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); - } - - data = rhashtable_lookup_one(ht, tbl, hash, key, obj); - new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); - if (PTR_ERR(new_tbl) != -EEXIST) - data = ERR_CAST(new_tbl); + new_tbl = rcu_dereference(ht->tbl); - while (!IS_ERR_OR_NULL(new_tbl)) { + do { tbl = new_tbl; hash = rht_head_hashfn(ht, tbl, obj, ht->p); - spin_lock_nested(rht_bucket_lock(tbl, hash), - SINGLE_DEPTH_NESTING); + spin_lock_bh(rht_bucket_lock(tbl, hash)); data = rhashtable_lookup_one(ht, tbl, hash, key, obj); new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); if (PTR_ERR(new_tbl) != -EEXIST) data = ERR_CAST(new_tbl); - spin_unlock(rht_bucket_lock(tbl, hash)); - } - - spin_unlock_bh(lock); + spin_unlock_bh(rht_bucket_lock(tbl, hash)); + } while (!IS_ERR_OR_NULL(new_tbl)); if (PTR_ERR(data) == -EAGAIN) data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: @@ -943,10 +922,11 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) ht = iter->ht; spin_lock(&ht->lock); - if (tbl->rehash < tbl->size) - list_add(&iter->walker.list, &tbl->walkers); - else + if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu)) + /* This bucket table is being freed, don't re-link it. */ iter->walker.tbl = NULL; + else + list_add(&iter->walker.list, &tbl->walkers); spin_unlock(&ht->lock); out: diff --git a/net/Kconfig b/net/Kconfig index 1efe1f9ee492..3e8fdd688329 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -429,11 +429,8 @@ config NET_SOCK_MSG with the help of BPF programs. config NET_DEVLINK - bool "Network physical/parent device Netlink interface" - help - Network physical/parent device Netlink interface provides - infrastructure to support access to physical chip-wide config and - monitoring. + bool + default n config PAGE_POOL bool diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 67a58da2e6a0..d3033a3d2a63 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1345,34 +1345,29 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .doit = batadv_netlink_get_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_start, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_algo_dump, }, { .cmd = BATADV_CMD_GET_HARDIF, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .dumpit = batadv_netlink_dump_hardif, .doit = batadv_netlink_get_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | @@ -1381,68 +1376,57 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_tt_local_dump, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_tt_global_dump, }, { .cmd = BATADV_CMD_GET_ORIGINATORS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_orig_dump, }, { .cmd = BATADV_CMD_GET_NEIGHBORS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_hardif_neigh_dump, }, { .cmd = BATADV_CMD_GET_GATEWAYS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_gw_dump, }, { .cmd = BATADV_CMD_GET_BLA_CLAIM, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_bla_claim_dump, }, { .cmd = BATADV_CMD_GET_BLA_BACKBONE, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_bla_backbone_dump, }, { .cmd = BATADV_CMD_GET_DAT_CACHE, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_dat_cache_dump, }, { .cmd = BATADV_CMD_GET_MCAST_FLAGS, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .dumpit = batadv_mcast_flags_dump, }, { .cmd = BATADV_CMD_SET_MESH, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_SET_HARDIF, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, @@ -1450,7 +1434,6 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_VLAN, /* can be retrieved by unprivileged users */ - .policy = batadv_netlink_policy, .doit = batadv_netlink_get_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, @@ -1458,7 +1441,6 @@ static const struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_SET_VLAN, .flags = GENL_ADMIN_PERM, - .policy = batadv_netlink_policy, .doit = batadv_netlink_set_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, @@ -1470,6 +1452,7 @@ struct genl_family batadv_netlink_family __ro_after_init = { .name = BATADV_NL_NAME, .version = 1, .maxattr = BATADV_ATTR_MAX, + .policy = batadv_netlink_policy, .netnsok = true, .pre_doit = batadv_pre_doit, .post_doit = batadv_post_doit, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a0e369179f6d..b257342c0860 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -517,7 +517,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( if (src) memcpy(p->eth_addr, src, ETH_ALEN); else - memset(p->eth_addr, 0xff, ETH_ALEN); + eth_broadcast_addr(p->eth_addr); return p; } diff --git a/net/core/datagram.c b/net/core/datagram.c index b2651bb6d2a3..0dafec5cada0 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -61,6 +61,8 @@ #include <trace/events/skb.h> #include <net/busy_poll.h> +#include "datagram.h" + /* * Is a socket 'connection oriented' ? */ @@ -408,10 +410,10 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) } EXPORT_SYMBOL(skb_kill_datagram); -int __skb_datagram_iter(const struct sk_buff *skb, int offset, - struct iov_iter *to, int len, bool fault_short, - size_t (*cb)(const void *, size_t, void *, struct iov_iter *), - void *data) +static int __skb_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, bool fault_short, + size_t (*cb)(const void *, size_t, void *, + struct iov_iter *), void *data) { int start = skb_headlen(skb); int i, copy = start - offset, start_off = offset, n; diff --git a/net/core/datagram.h b/net/core/datagram.h new file mode 100644 index 000000000000..bcfb75bfa3b2 --- /dev/null +++ b/net/core/datagram.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NET_CORE_DATAGRAM_H_ +#define _NET_CORE_DATAGRAM_H_ + +#include <linux/types.h> + +struct sock; +struct sk_buff; +struct iov_iter; + +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); + +#endif /* _NET_CORE_DATAGRAM_H_ */ diff --git a/net/core/dev.c b/net/core/dev.c index 2b67f2aa59dd..9ca2d3abfd1a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3468,6 +3468,15 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { __qdisc_drop(skb, &to_free); rc = NET_XMIT_DROP; + } else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && + qdisc_run_begin(q)) { + qdisc_bstats_cpu_update(q, skb); + + if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) + __qdisc_run(q); + + qdisc_run_end(q); + rc = NET_XMIT_SUCCESS; } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; qdisc_run(q); @@ -3689,23 +3698,21 @@ get_cpus_map: } u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return 0; } EXPORT_SYMBOL(dev_pick_tx_zero); u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return (u16)raw_smp_processor_id() % dev->real_num_tx_queues; } EXPORT_SYMBOL(dev_pick_tx_cpu_id); -static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) +u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) { struct sock *sk = skb->sk; int queue_index = sk_tx_queue_get(sk); @@ -3729,10 +3736,11 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, return queue_index; } +EXPORT_SYMBOL(netdev_pick_tx); -struct netdev_queue *netdev_pick_tx(struct net_device *dev, - struct sk_buff *skb, - struct net_device *sb_dev) +struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, + struct sk_buff *skb, + struct net_device *sb_dev) { int queue_index = 0; @@ -3747,10 +3755,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb, sb_dev, - __netdev_pick_tx); + queue_index = ops->ndo_select_queue(dev, skb, sb_dev); else - queue_index = __netdev_pick_tx(dev, skb, sb_dev); + queue_index = netdev_pick_tx(dev, skb, sb_dev); queue_index = netdev_cap_txqueue(dev, queue_index); } @@ -3824,7 +3831,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) else skb_dst_force(skb); - txq = netdev_pick_tx(dev, skb, sb_dev); + txq = netdev_core_pick_tx(dev, skb, sb_dev); q = rcu_dereference_bh(txq->qdisc); trace_net_dev_queue(skb); @@ -3975,9 +3982,9 @@ EXPORT_SYMBOL(rps_sock_flow_table); u32 rps_cpu_mask __read_mostly; EXPORT_SYMBOL(rps_cpu_mask); -struct static_key rps_needed __read_mostly; +struct static_key_false rps_needed __read_mostly; EXPORT_SYMBOL(rps_needed); -struct static_key rfs_needed __read_mostly; +struct static_key_false rfs_needed __read_mostly; EXPORT_SYMBOL(rfs_needed); static struct rps_dev_flow * @@ -4429,7 +4436,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) bool free_skb = true; int cpu, rc; - txq = netdev_pick_tx(dev, skb, NULL); + txq = netdev_core_pick_tx(dev, skb, NULL); cpu = smp_processor_id(); HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -4503,7 +4510,7 @@ static int netif_rx_internal(struct sk_buff *skb) } #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -5172,7 +5179,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -5220,7 +5227,7 @@ static void netif_receive_skb_list_internal(struct list_head *head) rcu_read_lock(); #ifdef CONFIG_RPS - if (static_key_false(&rps_needed)) { + if (static_branch_unlikely(&rps_needed)) { list_for_each_entry_safe(skb, next, head, list) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 31380fd5a4e2..5163d900bb4f 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -366,7 +366,8 @@ EXPORT_SYMBOL(dev_load); * dev_ioctl - network device ioctl * @net: the applicable net namespace * @cmd: command to issue - * @arg: pointer to a struct ifreq in user space + * @ifr: pointer to a struct ifreq in user space + * @need_copyout: whether or not copy_to_user() should be called * * Issue ioctl functions to devices. This is normally called by the * user space syscall interfaces but can sometimes be useful for diff --git a/net/core/devlink.c b/net/core/devlink.c index da0a29f30885..37d01c39071e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -19,6 +19,7 @@ #include <linux/device.h> #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/spinlock.h> #include <rdma/ib_verbs.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -543,12 +544,14 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, goto nla_put_failure; if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; + + spin_lock(&devlink_port->type_lock); if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) - goto nla_put_failure; + goto nla_put_failure_type_locked; if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET && nla_put_u16(msg, DEVLINK_ATTR_PORT_DESIRED_TYPE, devlink_port->desired_type)) - goto nla_put_failure; + goto nla_put_failure_type_locked; if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) { struct net_device *netdev = devlink_port->type_dev; @@ -557,7 +560,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, netdev->ifindex) || nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME, netdev->name))) - goto nla_put_failure; + goto nla_put_failure_type_locked; } if (devlink_port->type == DEVLINK_PORT_TYPE_IB) { struct ib_device *ibdev = devlink_port->type_dev; @@ -565,14 +568,17 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, if (ibdev && nla_put_string(msg, DEVLINK_ATTR_PORT_IBDEV_NAME, ibdev->name)) - goto nla_put_failure; + goto nla_put_failure_type_locked; } + spin_unlock(&devlink_port->type_lock); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; +nla_put_failure_type_locked: + spin_unlock(&devlink_port->type_lock); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -3640,7 +3646,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { u64 ret_offset, start_offset, end_offset = 0; - const struct genl_ops *ops = cb->data; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; @@ -3657,7 +3662,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, return -ENOMEM; err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, - attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack); + attrs, DEVLINK_ATTR_MAX, devlink_nl_family.policy, + cb->extack); if (err) goto out_free; @@ -4491,6 +4497,7 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) { mutex_lock(&reporter->devlink->lock); list_del(&reporter->list); + mutex_destroy(&reporter->dump_lock); mutex_unlock(&reporter->devlink->lock); if (reporter->dump_fmsg) devlink_fmsg_free(reporter->dump_fmsg); @@ -4928,7 +4935,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_GET, .doit = devlink_nl_cmd_get_doit, .dumpit = devlink_nl_cmd_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, @@ -4936,21 +4942,18 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PORT_GET, .doit = devlink_nl_cmd_port_get_doit, .dumpit = devlink_nl_cmd_port_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_SET, .doit = devlink_nl_cmd_port_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_PORT_SPLIT, .doit = devlink_nl_cmd_port_split_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -4958,7 +4961,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_PORT_UNSPLIT, .doit = devlink_nl_cmd_port_unsplit_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -4967,7 +4969,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_GET, .doit = devlink_nl_cmd_sb_get_doit, .dumpit = devlink_nl_cmd_sb_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ @@ -4976,7 +4977,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_POOL_GET, .doit = devlink_nl_cmd_sb_pool_get_doit, .dumpit = devlink_nl_cmd_sb_pool_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ @@ -4984,7 +4984,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_POOL_SET, .doit = devlink_nl_cmd_sb_pool_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, @@ -4993,7 +4992,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .doit = devlink_nl_cmd_sb_port_pool_get_doit, .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ @@ -5001,7 +4999,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, .doit = devlink_nl_cmd_sb_port_pool_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, @@ -5010,7 +5007,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit, .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, /* can be retrieved by unprivileged users */ @@ -5018,7 +5014,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT | DEVLINK_NL_FLAG_NEED_SB, @@ -5026,7 +5021,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, .doit = devlink_nl_cmd_sb_occ_snapshot_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, @@ -5034,7 +5028,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, .doit = devlink_nl_cmd_sb_occ_max_clear_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NEED_SB, @@ -5042,14 +5035,12 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_ESWITCH_GET, .doit = devlink_nl_cmd_eswitch_get_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_ESWITCH_SET, .doit = devlink_nl_cmd_eswitch_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -5057,49 +5048,42 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, .doit = devlink_nl_cmd_dpipe_table_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, .doit = devlink_nl_cmd_dpipe_entries_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, .doit = devlink_nl_cmd_dpipe_headers_get, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, .doit = devlink_nl_cmd_dpipe_table_counters_set, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_SET, .doit = devlink_nl_cmd_resource_set, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .doit = devlink_nl_cmd_resource_dump, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_RELOAD, .doit = devlink_nl_cmd_reload, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -5108,14 +5092,12 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PARAM_GET, .doit = devlink_nl_cmd_param_get_doit, .dumpit = devlink_nl_cmd_param_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PARAM_SET, .doit = devlink_nl_cmd_param_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, @@ -5123,14 +5105,12 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_PORT_PARAM_GET, .doit = devlink_nl_cmd_port_param_get_doit, .dumpit = devlink_nl_cmd_port_param_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_PORT_PARAM_SET, .doit = devlink_nl_cmd_port_param_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, @@ -5138,21 +5118,18 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_REGION_GET, .doit = devlink_nl_cmd_region_get_doit, .dumpit = devlink_nl_cmd_region_get_dumpit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_DEL, .doit = devlink_nl_cmd_region_del, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_REGION_READ, .dumpit = devlink_nl_cmd_region_read_dumpit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, @@ -5160,7 +5137,6 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_INFO_GET, .doit = devlink_nl_cmd_info_get_doit, .dumpit = devlink_nl_cmd_info_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, @@ -5168,35 +5144,30 @@ static const struct genl_ops devlink_nl_ops[] = { .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, .doit = devlink_nl_cmd_health_reporter_get_doit, .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .policy = devlink_nl_policy, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, /* can be retrieved by unprivileged users */ }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, .doit = devlink_nl_cmd_health_reporter_set_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, .doit = devlink_nl_cmd_health_reporter_recover_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, .doit = devlink_nl_cmd_health_reporter_diagnose_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, .doit = devlink_nl_cmd_health_reporter_dump_get_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -5204,7 +5175,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | DEVLINK_NL_FLAG_NO_LOCK, @@ -5212,7 +5182,6 @@ static const struct genl_ops devlink_nl_ops[] = { { .cmd = DEVLINK_CMD_FLASH_UPDATE, .doit = devlink_nl_cmd_flash_update, - .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, @@ -5222,6 +5191,7 @@ static struct genl_family devlink_nl_family __ro_after_init = { .name = DEVLINK_GENL_NAME, .version = DEVLINK_GENL_VERSION, .maxattr = DEVLINK_ATTR_MAX, + .policy = devlink_nl_policy, .netnsok = true, .pre_doit = devlink_nl_pre_doit, .post_doit = devlink_nl_post_doit, @@ -5303,6 +5273,7 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_free(struct devlink *devlink) { + mutex_destroy(&devlink->lock); WARN_ON(!list_empty(&devlink->reporter_list)); WARN_ON(!list_empty(&devlink->region_list)); WARN_ON(!list_empty(&devlink->param_list)); @@ -5340,6 +5311,7 @@ int devlink_port_register(struct devlink *devlink, devlink_port->devlink = devlink; devlink_port->index = port_index; devlink_port->registered = true; + spin_lock_init(&devlink_port->type_lock); list_add_tail(&devlink_port->list, &devlink->port_list); INIT_LIST_HEAD(&devlink_port->param_list); mutex_unlock(&devlink->lock); @@ -5368,8 +5340,12 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type type, void *type_dev) { + if (WARN_ON(!devlink_port->registered)) + return; + spin_lock(&devlink_port->type_lock); devlink_port->type = type; devlink_port->type_dev = type_dev; + spin_unlock(&devlink_port->type_lock); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); } @@ -5382,8 +5358,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, void devlink_port_type_eth_set(struct devlink_port *devlink_port, struct net_device *netdev) { - return __devlink_port_type_set(devlink_port, - DEVLINK_PORT_TYPE_ETH, netdev); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev); } EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); @@ -5396,8 +5371,7 @@ EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev) { - return __devlink_port_type_set(devlink_port, - DEVLINK_PORT_TYPE_IB, ibdev); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev); } EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); @@ -5408,8 +5382,7 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); */ void devlink_port_type_clear(struct devlink_port *devlink_port) { - return __devlink_port_type_set(devlink_port, - DEVLINK_PORT_TYPE_NOTSET, NULL); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL); } EXPORT_SYMBOL_GPL(devlink_port_type_clear); @@ -5431,12 +5404,13 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, { struct devlink_port_attrs *attrs = &devlink_port->attrs; + if (WARN_ON(devlink_port->registered)) + return; attrs->set = true; attrs->flavour = flavour; attrs->port_number = port_number; attrs->split = split; attrs->split_subport_number = split_subport_number; - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); } EXPORT_SYMBOL_GPL(devlink_port_attrs_set); @@ -6447,17 +6421,15 @@ void devlink_compat_running_version(struct net_device *dev, dev_hold(dev); rtnl_unlock(); - mutex_lock(&devlink_mutex); devlink = netdev_to_devlink(dev); if (!devlink || !devlink->ops->info_get) - goto unlock_list; + goto out; mutex_lock(&devlink->lock); __devlink_compat_running_version(devlink, buf, len); mutex_unlock(&devlink->lock); -unlock_list: - mutex_unlock(&devlink_mutex); +out: rtnl_lock(); dev_put(dev); } @@ -6465,22 +6437,22 @@ unlock_list: int devlink_compat_flash_update(struct net_device *dev, const char *file_name) { struct devlink *devlink; - int ret = -EOPNOTSUPP; + int ret; dev_hold(dev); rtnl_unlock(); - mutex_lock(&devlink_mutex); devlink = netdev_to_devlink(dev); - if (!devlink || !devlink->ops->flash_update) - goto unlock_list; + if (!devlink || !devlink->ops->flash_update) { + ret = -EOPNOTSUPP; + goto out; + } mutex_lock(&devlink->lock); ret = devlink->ops->flash_update(devlink, file_name, NULL, NULL); mutex_unlock(&devlink->lock); -unlock_list: - mutex_unlock(&devlink_mutex); +out: rtnl_lock(); dev_put(dev); diff --git a/net/core/dst.c b/net/core/dst.c index a263309df115..1f13d90cd0e4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -26,23 +26,6 @@ #include <net/dst.h> #include <net/dst_metadata.h> -/* - * Theory of operations: - * 1) We use a list, protected by a spinlock, to add - * new entries from both BH and non-BH context. - * 2) In order to keep spinlock held for a small delay, - * we use a second list where are stored long lived - * entries, that are handled by the garbage collect thread - * fired by a workqueue. - * 3) This list is guarded by a mutex, - * so that the gc_task and dst_dev_event() can be synchronized. - */ - -/* - * We want to keep lock & list close together - * to dirty as few cache lines as possible in __dst_free(). - * As this is not a very strong hint, we dont force an alignment on SMP. - */ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b1eb32419732..387d67eb75ab 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -136,6 +136,7 @@ static const char phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", + [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) @@ -2432,6 +2433,7 @@ static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna) { switch (tuna->id) { case ETHTOOL_PHY_DOWNSHIFT: + case ETHTOOL_PHY_FAST_LINK_DOWN: if (tuna->len != sizeof(u8) || tuna->type_id != ETHTOOL_TUNABLE_U8) return -EINVAL; diff --git a/net/core/filter.c b/net/core/filter.c index 647c63a7b25b..22eb2edf5573 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2963,42 +2963,113 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) } } -static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) +#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \ + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + +#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \ + BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ + BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ + BPF_F_ADJ_ROOM_ENCAP_L4_UDP) + +static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, + u64 flags) { - u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); + bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK; + u16 mac_len = 0, inner_net = 0, inner_trans = 0; + unsigned int gso_type = SKB_GSO_DODGY; int ret; - if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) - return -ENOTSUPP; + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) { + /* udp gso_size delineates datagrams, only allow if fixed */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) || + !(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + return -ENOTSUPP; + } - ret = skb_cow(skb, len_diff); + ret = skb_cow_head(skb, len_diff); if (unlikely(ret < 0)) return ret; + if (encap) { + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) + return -ENOTSUPP; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + return -EINVAL; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE && + flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) + return -EINVAL; + + if (skb->encapsulation) + return -EALREADY; + + mac_len = skb->network_header - skb->mac_header; + inner_net = skb->network_header; + inner_trans = skb->transport_header; + } + ret = bpf_skb_net_hdr_push(skb, off, len_diff); if (unlikely(ret < 0)) return ret; + if (encap) { + /* inner mac == inner_net on l3 encap */ + skb->inner_mac_header = inner_net; + skb->inner_network_header = inner_net; + skb->inner_transport_header = inner_trans; + skb_set_inner_protocol(skb, skb->protocol); + + skb->encapsulation = 1; + skb_set_network_header(skb, mac_len); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) + gso_type |= SKB_GSO_UDP_TUNNEL; + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE) + gso_type |= SKB_GSO_GRE; + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + gso_type |= SKB_GSO_IPXIP6; + else + gso_type |= SKB_GSO_IPXIP4; + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE || + flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) { + int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr); + + skb_set_transport_header(skb, mac_len + nh_len); + } + } + if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); /* Due to header grow, MSS needs to be downgraded. */ - skb_decrease_gso_size(shinfo, len_diff); + if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + skb_decrease_gso_size(shinfo, len_diff); + /* Header must be checked, and gso_segs recomputed. */ - shinfo->gso_type |= SKB_GSO_DODGY; + shinfo->gso_type |= gso_type; shinfo->gso_segs = 0; } return 0; } -static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) +static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, + u64 flags) { - u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) - return -ENOTSUPP; + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) { + /* udp gso_size delineates datagrams, only allow if fixed */ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) || + !(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + return -ENOTSUPP; + } ret = skb_unclone(skb, GFP_ATOMIC); if (unlikely(ret < 0)) @@ -3012,7 +3083,9 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) struct skb_shared_info *shinfo = skb_shinfo(skb); /* Due to header shrink, MSS can be upgraded. */ - skb_increase_gso_size(shinfo, len_diff); + if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) + skb_increase_gso_size(shinfo, len_diff); + /* Header must be checked, and gso_segs recomputed. */ shinfo->gso_type |= SKB_GSO_DODGY; shinfo->gso_segs = 0; @@ -3027,49 +3100,50 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb) SKB_MAX_ALLOC; } -static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) +BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, + u32, mode, u64, flags) { - bool trans_same = skb->transport_header == skb->network_header; u32 len_cur, len_diff_abs = abs(len_diff); u32 len_min = bpf_skb_net_base_len(skb); u32 len_max = __bpf_skb_max_len(skb); __be16 proto = skb->protocol; bool shrink = len_diff < 0; + u32 off; int ret; + if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK)) + return -EINVAL; if (unlikely(len_diff_abs > 0xfffU)) return -EFAULT; if (unlikely(proto != htons(ETH_P_IP) && proto != htons(ETH_P_IPV6))) return -ENOTSUPP; + off = skb_mac_header_len(skb); + switch (mode) { + case BPF_ADJ_ROOM_NET: + off += bpf_skb_net_base_len(skb); + break; + case BPF_ADJ_ROOM_MAC: + break; + default: + return -ENOTSUPP; + } + len_cur = skb->len - skb_network_offset(skb); - if (skb_transport_header_was_set(skb) && !trans_same) - len_cur = skb_network_header_len(skb); if ((shrink && (len_diff_abs >= len_cur || len_cur - len_diff_abs < len_min)) || (!shrink && (skb->len + len_diff_abs > len_max && !skb_is_gso(skb)))) return -ENOTSUPP; - ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : - bpf_skb_net_grow(skb, len_diff_abs); + ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) : + bpf_skb_net_grow(skb, off, len_diff_abs, flags); bpf_compute_data_pointers(skb); return ret; } -BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, - u32, mode, u64, flags) -{ - if (unlikely(flags)) - return -EINVAL; - if (likely(mode == BPF_ADJ_ROOM_NET)) - return bpf_skb_adjust_net(skb, len_diff); - - return -ENOTSUPP; -} - static const struct bpf_func_proto bpf_skb_adjust_room_proto = { .func = bpf_skb_adjust_room, .gpl_only = false, @@ -5156,15 +5230,15 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, return sk; } -/* bpf_sk_lookup performs the core lookup for different types of sockets, +/* bpf_skc_lookup performs the core lookup for different types of sockets, * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. * Returns the socket as an 'unsigned long' to simplify the casting in the * callers to satisfy BPF_CALL declarations. */ -static unsigned long -__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, - struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, - u64 flags) +static struct sock * +__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, + u64 flags) { struct sock *sk = NULL; u8 family = AF_UNSPEC; @@ -5192,15 +5266,27 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, put_net(net); } +out: + return sk; +} + +static struct sock * +__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, + u64 flags) +{ + struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net, + ifindex, proto, netns_id, flags); + if (sk) sk = sk_to_full_sk(sk); -out: - return (unsigned long) sk; + + return sk; } -static unsigned long -bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, - u8 proto, u64 netns_id, u64 flags) +static struct sock * +bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) { struct net *caller_net; int ifindex; @@ -5213,14 +5299,47 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, ifindex = 0; } - return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex, - proto, netns_id, flags); + return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto, + netns_id, flags); +} + +static struct sock * +bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, + u8 proto, u64 netns_id, u64 flags) +{ + struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id, + flags); + + if (sk) + sk = sk_to_full_sk(sk); + + return sk; +} + +BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP, + netns_id, flags); } +static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = { + .func = bpf_skc_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, netns_id, flags); + return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP, + netns_id, flags); } static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { @@ -5238,7 +5357,8 @@ static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = { BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, netns_id, flags); + return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP, + netns_id, flags); } static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { @@ -5273,8 +5393,9 @@ BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, struct net *caller_net = dev_net(ctx->rxq->dev); int ifindex = ctx->rxq->dev->ifindex; - return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, - IPPROTO_UDP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_UDP, netns_id, + flags); } static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { @@ -5289,14 +5410,38 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) +{ + struct net *caller_net = dev_net(ctx->rxq->dev); + int ifindex = ctx->rxq->dev->ifindex; + + return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags); +} + +static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { + .func = bpf_xdp_skc_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { struct net *caller_net = dev_net(ctx->rxq->dev); int ifindex = ctx->rxq->dev->ifindex; - return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, - IPPROTO_TCP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags); } static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { @@ -5311,11 +5456,31 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, + IPPROTO_TCP, netns_id, flags); +} + +static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { + .func = bpf_sock_addr_skc_lookup_tcp, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, - IPPROTO_TCP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, IPPROTO_TCP, + netns_id, flags); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { @@ -5332,8 +5497,9 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { - return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, - IPPROTO_UDP, netns_id, flags); + return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, + sock_net(ctx->sk), 0, IPPROTO_UDP, + netns_id, flags); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { @@ -5461,6 +5627,74 @@ static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, }; + +BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len, + struct tcphdr *, th, u32, th_len) +{ +#ifdef CONFIG_SYN_COOKIES + u32 cookie; + int ret; + + if (unlikely(th_len < sizeof(*th))) + return -EINVAL; + + /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */ + if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) + return -EINVAL; + + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + return -EINVAL; + + if (!th->ack || th->rst || th->syn) + return -ENOENT; + + if (tcp_synq_no_recent_overflow(sk)) + return -ENOENT; + + cookie = ntohl(th->ack_seq) - 1; + + switch (sk->sk_family) { + case AF_INET: + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + + ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); + break; + +#if IS_BUILTIN(CONFIG_IPV6) + case AF_INET6: + if (unlikely(iph_len < sizeof(struct ipv6hdr))) + return -EINVAL; + + ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); + break; +#endif /* CONFIG_IPV6 */ + + default: + return -EPROTONOSUPPORT; + } + + if (ret > 0) + return 0; + + return -ENOENT; +#else + return -ENOTSUPP; +#endif +} + +static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { + .func = bpf_tcp_check_syncookie, + .gpl_only = true, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5586,6 +5820,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_addr_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_sock_addr_skc_lookup_tcp_proto; #endif /* CONFIG_INET */ default: return bpf_base_func_proto(func_id); @@ -5719,6 +5955,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_sock_proto; case BPF_FUNC_get_listener_sock: return &bpf_get_listener_sock_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_skc_lookup_tcp_proto; + case BPF_FUNC_tcp_check_syncookie: + return &bpf_tcp_check_syncookie_proto; + case BPF_FUNC_skb_ecn_set_ce: + return &bpf_skb_ecn_set_ce_proto; #endif default: return bpf_base_func_proto(func_id); @@ -5754,6 +5996,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_sk_lookup_tcp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_xdp_skc_lookup_tcp_proto; + case BPF_FUNC_tcp_check_syncookie: + return &bpf_tcp_check_syncookie_proto; #endif default: return bpf_base_func_proto(func_id); @@ -5846,6 +6092,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_skc_lookup_tcp: + return &bpf_skc_lookup_tcp_proto; #endif default: return bpf_base_func_proto(func_id); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index bb1a54747d64..b4d581134ef2 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -732,6 +732,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb) * @hlen: packet header length, if @data is NULL use skb_headlen(skb) + * @flags: flags that control the dissection process, e.g. + * FLOW_DISSECTOR_F_STOP_AT_L3. * * The function will try to retrieve individual keys into target specified * by flow_dissector from either the skbuff or a raw buffer specified by the diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f8f94303a1f5..c14f0dc0157c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -754,9 +754,9 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, rcu_assign_pointer(queue->rps_map, map); if (map) - static_key_slow_inc(&rps_needed); + static_branch_inc(&rps_needed); if (old_map) - static_key_slow_dec(&rps_needed); + static_branch_dec(&rps_needed); mutex_unlock(&rps_map_mutex); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 361aabffb8c0..e365e8fb1c40 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -323,7 +323,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; - txq = netdev_pick_tx(dev, skb, NULL); + txq = netdev_core_pick_tx(dev, skb, NULL); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a51cab95ba64..f9b964fd4e4d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3569,7 +3569,7 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } -/** +/* * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry */ int ndo_dflt_fdb_add(struct ndmsg *ndm, @@ -3708,7 +3708,7 @@ out: return err; } -/** +/* * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry */ int ndo_dflt_fdb_del(struct ndmsg *ndm, @@ -3847,8 +3847,11 @@ skip: /** * ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table. - * @nlh: netlink message header + * @skb: socket buffer to store message in + * @cb: netlink callback * @dev: netdevice + * @filter_dev: ignored + * @idx: the number of FDB table entries dumped is added to *@idx * * Default netdevice operation to dump the existing unicast address list. * Returns number of addresses from list put in skb. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2415d9cb9b89..4782f9354dd1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -77,6 +77,8 @@ #include <linux/capability.h> #include <linux/user_namespace.h> +#include "datagram.h" + struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; #ifdef CONFIG_SKB_EXTENSIONS @@ -1105,9 +1107,6 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) } EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); -extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, - struct iov_iter *from, size_t length); - int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index d8fe3e549373..dc4aefdf2a08 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -144,6 +144,8 @@ static void reuseport_free_rcu(struct rcu_head *head) * reuseport_add_sock - Add a socket to the reuseport group of another. * @sk: New socket to add to the group. * @sk2: Socket belonging to the existing reuseport group. + * @bind_inany: Whether or not the group is bound to a local INANY address. + * * May return ENOMEM and not add socket to group under memory pressure. */ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 84bf2861f45f..1a2685694abd 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -95,12 +95,12 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); if (sock_table) { - static_key_slow_inc(&rps_needed); - static_key_slow_inc(&rfs_needed); + static_branch_inc(&rps_needed); + static_branch_inc(&rfs_needed); } if (orig_sock_table) { - static_key_slow_dec(&rps_needed); - static_key_slow_dec(&rfs_needed); + static_branch_dec(&rps_needed); + static_branch_dec(&rfs_needed); synchronize_rcu(); vfree(orig_sock_table); } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index fab49132345f..b695170795c2 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -10,6 +10,7 @@ config NET_DSA depends on BRIDGE || BRIDGE=n select NET_SWITCHDEV select PHYLINK + select NET_DEVLINK ---help--- Say Y if you want to enable support for the hardware switches supported by the Distributed Switch Architecture. diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c00ee464afc7..fe0a6197db9c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -18,6 +18,7 @@ #include <linux/rtnetlink.h> #include <linux/of.h> #include <linux/of_net.h> +#include <net/devlink.h> #include "dsa_priv.h" @@ -257,14 +258,36 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) static int dsa_port_setup(struct dsa_port *dp) { + enum devlink_port_flavour flavour; struct dsa_switch *ds = dp->ds; - int err = 0; + int err; + + if (dp->type == DSA_PORT_TYPE_UNUSED) + return 0; memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); - if (dp->type != DSA_PORT_TYPE_UNUSED) - err = devlink_port_register(ds->devlink, &dp->devlink_port, - dp->index); + switch (dp->type) { + case DSA_PORT_TYPE_CPU: + flavour = DEVLINK_PORT_FLAVOUR_CPU; + break; + case DSA_PORT_TYPE_DSA: + flavour = DEVLINK_PORT_FLAVOUR_DSA; + break; + case DSA_PORT_TYPE_USER: /* fall-through */ + default: + flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + break; + } + + /* dp->index is used now as port_number. However + * CPU and DSA ports should have separate numbering + * independent from front panel port numbers. + */ + devlink_port_attrs_set(&dp->devlink_port, flavour, + dp->index, false, 0); + err = devlink_port_register(ds->devlink, &dp->devlink_port, + dp->index); if (err) return err; @@ -272,13 +295,6 @@ static int dsa_port_setup(struct dsa_port *dp) case DSA_PORT_TYPE_UNUSED: break; case DSA_PORT_TYPE_CPU: - /* dp->index is used now as port_number. However - * CPU ports should have separate numbering - * independent from front panel port numbers. - */ - devlink_port_attrs_set(&dp->devlink_port, - DEVLINK_PORT_FLAVOUR_CPU, - dp->index, false, 0); err = dsa_port_link_register_of(dp); if (err) { dev_err(ds->dev, "failed to setup link for port %d.%d\n", @@ -287,13 +303,6 @@ static int dsa_port_setup(struct dsa_port *dp) } break; case DSA_PORT_TYPE_DSA: - /* dp->index is used now as port_number. However - * DSA ports should have separate numbering - * independent from front panel port numbers. - */ - devlink_port_attrs_set(&dp->devlink_port, - DEVLINK_PORT_FLAVOUR_DSA, - dp->index, false, 0); err = dsa_port_link_register_of(dp); if (err) { dev_err(ds->dev, "failed to setup link for port %d.%d\n", @@ -302,9 +311,6 @@ static int dsa_port_setup(struct dsa_port *dp) } break; case DSA_PORT_TYPE_USER: - devlink_port_attrs_set(&dp->devlink_port, - DEVLINK_PORT_FLAVOUR_PHYSICAL, - dp->index, false, 0); err = dsa_slave_create(dp); if (err) dev_err(ds->dev, "failed to create slave for port %d.%d\n", diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 093eef6f2599..6a8418dfa64f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1283,9 +1283,9 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) phy_flags = ds->ops->get_phy_flags(ds, dp->index); ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags); - if (ret == -ENODEV) { - /* We could not connect to a designated PHY or SFP, so use the - * switch internal MDIO bus instead + if (ret == -ENODEV && ds->slave_mii_bus) { + /* We could not connect to a designated PHY or SFP, so try to + * use the switch internal MDIO bus instead */ ret = dsa_slave_phy_connect(slave_dev, dp->index); if (ret) { @@ -1297,7 +1297,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) } } - return 0; + return ret; } static struct lock_class_key dsa_slave_netdev_xmit_lock_key; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b9cce0fd5696..bcc04d3e724f 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -449,14 +449,12 @@ static const struct genl_ops hsr_ops[] = { { .cmd = HSR_C_GET_NODE_STATUS, .flags = 0, - .policy = hsr_genl_policy, .doit = hsr_get_node_status, .dumpit = NULL, }, { .cmd = HSR_C_GET_NODE_LIST, .flags = 0, - .policy = hsr_genl_policy, .doit = hsr_get_node_list, .dumpit = NULL, }, @@ -467,6 +465,7 @@ static struct genl_family hsr_genl_family __ro_after_init = { .name = "HSR", .version = 1, .maxattr = HSR_A_MAX, + .policy = hsr_genl_policy, .module = THIS_MODULE, .ops = hsr_ops, .n_ops = ARRAY_SIZE(hsr_ops), diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h index a5d7515b7f62..bc147bc8e36a 100644 --- a/net/ieee802154/ieee802154.h +++ b/net/ieee802154/ieee802154.h @@ -20,7 +20,6 @@ void ieee802154_nl_exit(void); #define IEEE802154_OP(_cmd, _func) \ { \ .cmd = _cmd, \ - .policy = ieee802154_policy, \ .doit = _func, \ .dumpit = NULL, \ .flags = GENL_ADMIN_PERM, \ @@ -29,7 +28,6 @@ void ieee802154_nl_exit(void); #define IEEE802154_DUMP(_cmd, _func, _dump) \ { \ .cmd = _cmd, \ - .policy = ieee802154_policy, \ .doit = _func, \ .dumpit = _dump, \ } diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 96636e3b7aa9..098d67439b6d 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -136,6 +136,7 @@ struct genl_family nl802154_family __ro_after_init = { .name = IEEE802154_NL_NAME, .version = 1, .maxattr = IEEE802154_ATTR_MAX, + .policy = ieee802154_policy, .module = THIS_MODULE, .ops = ieee802154_ops, .n_ops = ARRAY_SIZE(ieee802154_ops), diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 99f6c254ea77..308370cfd668 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2220,7 +2220,6 @@ static const struct genl_ops nl802154_ops[] = { .doit = nl802154_get_wpan_phy, .dumpit = nl802154_dump_wpan_phy, .done = nl802154_dump_wpan_phy_done, - .policy = nl802154_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2229,7 +2228,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_INTERFACE, .doit = nl802154_get_interface, .dumpit = nl802154_dump_interface, - .policy = nl802154_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | NL802154_FLAG_NEED_RTNL, @@ -2237,7 +2235,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_INTERFACE, .doit = nl802154_new_interface, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2245,7 +2242,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_DEL_INTERFACE, .doit = nl802154_del_interface, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_DEV | NL802154_FLAG_NEED_RTNL, @@ -2253,7 +2249,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_CHANNEL, .doit = nl802154_set_channel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2261,7 +2256,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_CCA_MODE, .doit = nl802154_set_cca_mode, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2269,7 +2263,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_CCA_ED_LEVEL, .doit = nl802154_set_cca_ed_level, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2277,7 +2270,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_TX_POWER, .doit = nl802154_set_tx_power, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2285,7 +2277,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_WPAN_PHY_NETNS, .doit = nl802154_wpan_phy_netns, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_WPAN_PHY | NL802154_FLAG_NEED_RTNL, @@ -2293,7 +2284,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_PAN_ID, .doit = nl802154_set_pan_id, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2301,7 +2291,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_SHORT_ADDR, .doit = nl802154_set_short_addr, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2309,7 +2298,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_BACKOFF_EXPONENT, .doit = nl802154_set_backoff_exponent, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2317,7 +2305,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_MAX_CSMA_BACKOFFS, .doit = nl802154_set_max_csma_backoffs, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2325,7 +2312,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_MAX_FRAME_RETRIES, .doit = nl802154_set_max_frame_retries, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2333,7 +2319,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_LBT_MODE, .doit = nl802154_set_lbt_mode, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2341,7 +2326,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_ACKREQ_DEFAULT, .doit = nl802154_set_ackreq_default, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2350,7 +2334,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_SET_SEC_PARAMS, .doit = nl802154_set_llsec_params, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2359,7 +2342,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_SEC_KEY, /* TODO .doit by matching key id? */ .dumpit = nl802154_dump_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2367,7 +2349,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_SEC_KEY, .doit = nl802154_add_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2375,7 +2356,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_DEL_SEC_KEY, .doit = nl802154_del_llsec_key, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2385,7 +2365,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_SEC_DEV, /* TODO .doit by matching extended_addr? */ .dumpit = nl802154_dump_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2393,7 +2372,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_SEC_DEV, .doit = nl802154_add_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2401,7 +2379,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_DEL_SEC_DEV, .doit = nl802154_del_llsec_dev, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2411,7 +2388,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_SEC_DEVKEY, /* TODO doit by matching ??? */ .dumpit = nl802154_dump_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2419,7 +2395,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_SEC_DEVKEY, .doit = nl802154_add_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2427,7 +2402,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_DEL_SEC_DEVKEY, .doit = nl802154_del_llsec_devkey, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2436,7 +2410,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_GET_SEC_LEVEL, /* TODO .doit by matching frame_type? */ .dumpit = nl802154_dump_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2444,7 +2417,6 @@ static const struct genl_ops nl802154_ops[] = { { .cmd = NL802154_CMD_NEW_SEC_LEVEL, .doit = nl802154_add_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2453,7 +2425,6 @@ static const struct genl_ops nl802154_ops[] = { .cmd = NL802154_CMD_DEL_SEC_LEVEL, /* TODO match frame_type only? */ .doit = nl802154_del_llsec_seclevel, - .policy = nl802154_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL802154_FLAG_NEED_NETDEV | NL802154_FLAG_NEED_RTNL, @@ -2466,6 +2437,7 @@ static struct genl_family nl802154_fam __ro_after_init = { .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL802154_ATTR_MAX, + .policy = nl802154_policy, .netnsok = true, .pre_doit = nl802154_pre_doit, .post_doit = nl802154_post_doit, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eab3ebde981e..7f3a984ad618 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -136,6 +136,10 @@ void inet_sock_destruct(struct sock *sk) struct inet_sock *inet = inet_sk(sk); __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_rx_skb_cache) { + __kfree_skb(sk->sk_rx_skb_cache); + sk->sk_rx_skb_cache = NULL; + } __skb_queue_purge(&sk->sk_error_queue); sk_mem_reclaim(sk); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a573e37e0615..1704f432de1f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -183,14 +183,16 @@ struct trie { }; static struct key_vector *resize(struct trie *t, struct key_vector *tn); -static size_t tnode_free_size; +static unsigned int tnode_free_size; /* - * synchronize_rcu after call_rcu for that many pages; it should be especially - * useful before resizing the root node with PREEMPT_NONE configs; the value was - * obtained experimentally, aiming to avoid visible slowdown. + * synchronize_rcu after call_rcu for outstanding dirty memory; it should be + * especially useful before resizing the root node with PREEMPT_NONE configs; + * the value was obtained experimentally, aiming to avoid visible slowdown. */ -static const int sync_pages = 128; +unsigned int sysctl_fib_sync_mem = 512 * 1024; +unsigned int sysctl_fib_sync_mem_min = 64 * 1024; +unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024; static struct kmem_cache *fn_alias_kmem __ro_after_init; static struct kmem_cache *trie_leaf_kmem __ro_after_init; @@ -504,7 +506,7 @@ static void tnode_free(struct key_vector *tn) tn = container_of(head, struct tnode, rcu)->kv; } - if (tnode_free_size >= PAGE_SIZE * sync_pages) { + if (tnode_free_size >= sysctl_fib_sync_mem) { tnode_free_size = 0; synchronize_rcu(); } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 79e98e21cdd7..100e63f57ea6 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -499,15 +499,45 @@ out_unlock: return err; } -static int fou_add_to_port_list(struct net *net, struct fou *fou) +static bool fou_cfg_cmp(struct fou *fou, struct fou_cfg *cfg) +{ + struct sock *sk = fou->sock->sk; + struct udp_port_cfg *udp_cfg = &cfg->udp_config; + + if (fou->family != udp_cfg->family || + fou->port != udp_cfg->local_udp_port || + sk->sk_dport != udp_cfg->peer_udp_port || + sk->sk_bound_dev_if != udp_cfg->bind_ifindex) + return false; + + if (fou->family == AF_INET) { + if (sk->sk_rcv_saddr != udp_cfg->local_ip.s_addr || + sk->sk_daddr != udp_cfg->peer_ip.s_addr) + return false; + else + return true; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (ipv6_addr_cmp(&sk->sk_v6_rcv_saddr, &udp_cfg->local_ip6) || + ipv6_addr_cmp(&sk->sk_v6_daddr, &udp_cfg->peer_ip6)) + return false; + else + return true; +#endif + } + + return false; +} + +static int fou_add_to_port_list(struct net *net, struct fou *fou, + struct fou_cfg *cfg) { struct fou_net *fn = net_generic(net, fou_net_id); struct fou *fout; mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (fou->port == fout->port && - fou->family == fout->family) { + if (fou_cfg_cmp(fout, cfg)) { mutex_unlock(&fn->fou_lock); return -EALREADY; } @@ -585,7 +615,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; - err = fou_add_to_port_list(net, fou); + err = fou_add_to_port_list(net, fou, cfg); if (err) goto error; @@ -605,14 +635,12 @@ error: static int fou_destroy(struct net *net, struct fou_cfg *cfg) { struct fou_net *fn = net_generic(net, fou_net_id); - __be16 port = cfg->udp_config.local_udp_port; - u8 family = cfg->udp_config.family; int err = -EINVAL; struct fou *fou; mutex_lock(&fn->fou_lock); list_for_each_entry(fou, &fn->fou_list, list) { - if (fou->port == port && fou->family == family) { + if (fou_cfg_cmp(fou, cfg)) { fou_release(fou); err = 0; break; @@ -626,16 +654,27 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) static struct genl_family fou_nl_family; static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { - [FOU_ATTR_PORT] = { .type = NLA_U16, }, - [FOU_ATTR_AF] = { .type = NLA_U8, }, - [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, - [FOU_ATTR_TYPE] = { .type = NLA_U8, }, - [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, + [FOU_ATTR_PORT] = { .type = NLA_U16, }, + [FOU_ATTR_AF] = { .type = NLA_U8, }, + [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, + [FOU_ATTR_TYPE] = { .type = NLA_U8, }, + [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, + [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, }, + [FOU_ATTR_PEER_V4] = { .type = NLA_U32, }, + [FOU_ATTR_LOCAL_V6] = { .type = sizeof(struct in6_addr), }, + [FOU_ATTR_PEER_V6] = { .type = sizeof(struct in6_addr), }, + [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, }, + [FOU_ATTR_IFINDEX] = { .type = NLA_S32, }, }; static int parse_nl_config(struct genl_info *info, struct fou_cfg *cfg) { + bool has_local = false, has_peer = false; + struct nlattr *attr; + int ifindex; + __be16 port; + memset(cfg, 0, sizeof(*cfg)); cfg->udp_config.family = AF_INET; @@ -657,8 +696,7 @@ static int parse_nl_config(struct genl_info *info, } if (info->attrs[FOU_ATTR_PORT]) { - __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); - + port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); cfg->udp_config.local_udp_port = port; } @@ -671,6 +709,52 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL]) cfg->flags |= FOU_F_REMCSUM_NOPARTIAL; + if (cfg->udp_config.family == AF_INET) { + if (info->attrs[FOU_ATTR_LOCAL_V4]) { + attr = info->attrs[FOU_ATTR_LOCAL_V4]; + cfg->udp_config.local_ip.s_addr = nla_get_in_addr(attr); + has_local = true; + } + + if (info->attrs[FOU_ATTR_PEER_V4]) { + attr = info->attrs[FOU_ATTR_PEER_V4]; + cfg->udp_config.peer_ip.s_addr = nla_get_in_addr(attr); + has_peer = true; + } +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (info->attrs[FOU_ATTR_LOCAL_V6]) { + attr = info->attrs[FOU_ATTR_LOCAL_V6]; + cfg->udp_config.local_ip6 = nla_get_in6_addr(attr); + has_local = true; + } + + if (info->attrs[FOU_ATTR_PEER_V6]) { + attr = info->attrs[FOU_ATTR_PEER_V6]; + cfg->udp_config.peer_ip6 = nla_get_in6_addr(attr); + has_peer = true; + } +#endif + } + + if (has_peer) { + if (info->attrs[FOU_ATTR_PEER_PORT]) { + port = nla_get_be16(info->attrs[FOU_ATTR_PEER_PORT]); + cfg->udp_config.peer_udp_port = port; + } else { + return -EINVAL; + } + } + + if (info->attrs[FOU_ATTR_IFINDEX]) { + if (!has_local) + return -EINVAL; + + ifindex = nla_get_s32(info->attrs[FOU_ATTR_IFINDEX]); + + cfg->udp_config.bind_ifindex = ifindex; + } + return 0; } @@ -702,15 +786,37 @@ static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info) static int fou_fill_info(struct fou *fou, struct sk_buff *msg) { + struct sock *sk = fou->sock->sk; + if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) || nla_put_be16(msg, FOU_ATTR_PORT, fou->port) || + nla_put_be16(msg, FOU_ATTR_PEER_PORT, sk->sk_dport) || nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) || - nla_put_u8(msg, FOU_ATTR_TYPE, fou->type)) + nla_put_u8(msg, FOU_ATTR_TYPE, fou->type) || + nla_put_s32(msg, FOU_ATTR_IFINDEX, sk->sk_bound_dev_if)) return -1; if (fou->flags & FOU_F_REMCSUM_NOPARTIAL) if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL)) return -1; + + if (fou->sock->sk->sk_family == AF_INET) { + if (nla_put_in_addr(msg, FOU_ATTR_LOCAL_V4, sk->sk_rcv_saddr)) + return -1; + + if (nla_put_in_addr(msg, FOU_ATTR_PEER_V4, sk->sk_daddr)) + return -1; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (nla_put_in6_addr(msg, FOU_ATTR_LOCAL_V6, + &sk->sk_v6_rcv_saddr)) + return -1; + + if (nla_put_in6_addr(msg, FOU_ATTR_PEER_V6, &sk->sk_v6_daddr)) + return -1; +#endif + } + return 0; } @@ -763,7 +869,7 @@ static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) ret = -ESRCH; mutex_lock(&fn->fou_lock); list_for_each_entry(fout, &fn->fou_list, list) { - if (port == fout->port && family == fout->family) { + if (fou_cfg_cmp(fout, &cfg)) { ret = fou_dump_info(fout, info->snd_portid, info->snd_seq, 0, msg, info->genlhdr->cmd); @@ -808,20 +914,17 @@ static const struct genl_ops fou_nl_ops[] = { { .cmd = FOU_CMD_ADD, .doit = fou_nl_cmd_add_port, - .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = FOU_CMD_DEL, .doit = fou_nl_cmd_rm_port, - .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = FOU_CMD_GET, .doit = fou_nl_cmd_get_port, .dumpit = fou_nl_dump, - .policy = fou_nl_policy, }, }; @@ -830,6 +933,7 @@ static struct genl_family fou_nl_family __ro_after_init = { .name = FOU_GENL_NAME, .version = FOU_GENL_VERSION, .maxattr = FOU_ATTR_MAX, + .policy = fou_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = fou_nl_ops, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a5da63e5faa2..f2688fce39e1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -500,15 +500,17 @@ EXPORT_SYMBOL(ip_idents_reserve); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { - static u32 ip_idents_hashrnd __read_mostly; u32 hash, id; - net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); - hash = jhash_3words((__force u32)iph->daddr, + hash = siphash_3u32((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol ^ net_hash_mix(net), - ip_idents_hashrnd); + iph->protocol, + &net->ipv4.ip_id_key); id = ip_idents_reserve(hash, segs); iph->id = htons(id); } @@ -1176,7 +1178,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) * * When a PMTU/redirect information update invalidates a route, * this is indicated by setting obsolete to DST_OBSOLETE_KILL or - * DST_OBSOLETE_DEAD by dst_free(). + * DST_OBSOLETE_DEAD. */ if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e531344611a0..008545f63667 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -216,16 +216,15 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, refcount_set(&req->rsk_refcnt, 1); tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); - if (!inet_csk_reqsk_queue_add(sk, req, child)) { - bh_unlock_sock(child); - sock_put(child); - child = NULL; - reqsk_put(req); - } - } else { - reqsk_free(req); + if (inet_csk_reqsk_queue_add(sk, req, child)) + return child; + + bh_unlock_sock(child); + sock_put(child); } - return child; + __reqsk_free(req); + + return NULL; } EXPORT_SYMBOL(tcp_get_cookie_sock); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ba0fc4b18465..2316c08e9591 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -549,6 +549,15 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, + { + .procname = "fib_sync_mem", + .data = &sysctl_fib_sync_mem, + .maxlen = sizeof(sysctl_fib_sync_mem), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = &sysctl_fib_sync_mem_min, + .extra2 = &sysctl_fib_sync_mem_max, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6baa6dc1b13b..82bd707c0347 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -865,6 +865,16 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, { struct sk_buff *skb; + if (likely(!size)) { + skb = sk->sk_tx_skb_cache; + if (skb && !skb_cloned(skb)) { + skb->truesize -= skb->data_len; + sk->sk_tx_skb_cache = NULL; + pskb_trim(skb, 0); + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); + return skb; + } + } /* The TCP header must be at least 32-bit aligned. */ size = ALIGN(size, 4); @@ -1098,30 +1108,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(tcp_sendpage); -/* Do not bother using a page frag for very small frames. - * But use this heuristic only for the first skb in write queue. - * - * Having no payload in skb->head allows better SACK shifting - * in tcp_shift_skb_data(), reducing sack/rack overhead, because - * write queue has less skbs. - * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB. - * This also speeds up tso_fragment(), since it wont fallback - * to tcp_fragment(). - */ -static int linear_payload_sz(bool first_skb) -{ - if (first_skb) - return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); - return 0; -} - -static int select_size(bool first_skb, bool zc) -{ - if (zc) - return 0; - return linear_payload_sz(first_skb); -} - void tcp_free_fastopen_req(struct tcp_sock *tp) { if (tp->fastopen_req) { @@ -1272,7 +1258,6 @@ restart: if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; - int linear; new_segment: if (!sk_stream_memory_free(sk)) @@ -1283,8 +1268,7 @@ new_segment: goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - linear = select_size(first_skb, zc); - skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, first_skb); if (!skb) goto wait_for_memory; @@ -2552,6 +2536,11 @@ void tcp_write_queue_purge(struct sock *sk) sk_wmem_free_skb(sk, skb); } tcp_rtx_queue_purge(sk); + skb = sk->sk_tx_skb_cache; + if (skb) { + __kfree_skb(skb); + sk->sk_tx_skb_cache = NULL; + } INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); sk_mem_reclaim(sk); tcp_clear_all_retrans_hints(tcp_sk(sk)); @@ -2587,6 +2576,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_rx_skb_cache) { + __kfree_skb(sk->sk_rx_skb_cache); + sk->sk_rx_skb_cache = NULL; + } tp->copied_seq = tp->rcv_nxt; tp->urg_data = 0; tcp_write_queue_purge(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5def3c48870e..5dfbc333e79a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6502,8 +6502,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, reqsk_fastopen_remove(fastopen_sk, req, false); bh_unlock_sock(fastopen_sk); sock_put(fastopen_sk); - reqsk_put(req); - goto drop; + goto drop_and_free; } sk->sk_data_ready(sk); bh_unlock_sock(fastopen_sk); @@ -6527,7 +6526,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, drop_and_release: dst_release(dst); drop_and_free: - reqsk_free(req); + __reqsk_free(req); drop: tcp_listendrop(sk); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 277d71239d75..3979939804b7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1774,6 +1774,7 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + struct sk_buff *skb_to_free; int sdif = inet_sdif(skb); const struct iphdr *iph; const struct tcphdr *th; @@ -1905,11 +1906,17 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { + skb_to_free = sk->sk_rx_skb_cache; + sk->sk_rx_skb_cache = NULL; ret = tcp_v4_do_rcv(sk, skb); - } else if (tcp_add_backlog(sk, skb)) { - goto discard_and_relse; + } else { + if (tcp_add_backlog(sk, skb)) + goto discard_and_relse; + skb_to_free = NULL; } bh_unlock_sock(sk); + if (skb_to_free) + __kfree_skb(skb_to_free); put_and_return: if (refcounted) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b467a7cabf40..4ccec4c705f7 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -953,12 +953,10 @@ static const struct genl_ops tcp_metrics_nl_ops[] = { .cmd = TCP_METRICS_CMD_GET, .doit = tcp_metrics_nl_cmd_get, .dumpit = tcp_metrics_nl_dump, - .policy = tcp_metrics_nl_policy, }, { .cmd = TCP_METRICS_CMD_DEL, .doit = tcp_metrics_nl_cmd_del, - .policy = tcp_metrics_nl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -968,6 +966,7 @@ static struct genl_family tcp_metrics_nl_family __ro_after_init = { .name = TCP_METRICS_GENL_NAME, .version = TCP_METRICS_GENL_VERSION, .maxattr = TCP_METRICS_ATTR_MAX, + .policy = tcp_metrics_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = tcp_metrics_nl_ops, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4522579aaca2..e265d1aeeb66 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -52,12 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp) { u64 val = tcp_clock_ns(); - if (val > tp->tcp_clock_cache) - tp->tcp_clock_cache = val; - - val = div_u64(val, NSEC_PER_USEC); - if (val > tp->tcp_mstamp) - tp->tcp_mstamp = val; + tp->tcp_clock_cache = val; + tp->tcp_mstamp = div_u64(val, NSEC_PER_USEC); } static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2f45d2a3e3a3..fa6b404cbd10 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -847,6 +847,8 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; + net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; + net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 802faa2fcc0e..cc14b9998941 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -683,12 +683,20 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; struct ipcm6_cookie ipc6; u32 mark = IP6_REPLY_MARK(net, skb->mark); + bool acast; + + if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && + net->ipv6.sysctl.icmpv6_echo_ignore_multicast) + return; saddr = &ipv6_hdr(skb)->daddr; + acast = ipv6_anycast_destination(skb_dst(skb), saddr); + if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast) + return; + if (!ipv6_unicast_destination(skb) && - !(net->ipv6.sysctl.anycast_src_echo_reply && - ipv6_anycast_destination(skb_dst(skb), saddr))) + !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); @@ -1115,6 +1123,20 @@ static struct ctl_table ipv6_icmp_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "echo_ignore_multicast", + .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "echo_ignore_anycast", + .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { }, }; @@ -1129,6 +1151,8 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) if (table) { table[0].data = &net->ipv6.sysctl.icmpv6_time; table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; + table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; + table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; } return table; } diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 18fac76b9520..8d31a5066d0c 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -17,19 +17,16 @@ static const struct genl_ops ila_nl_ops[] = { { .cmd = ILA_CMD_ADD, .doit = ila_xlat_nl_cmd_add_mapping, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_DEL, .doit = ila_xlat_nl_cmd_del_mapping, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_FLUSH, .doit = ila_xlat_nl_cmd_flush, - .policy = ila_nl_policy, .flags = GENL_ADMIN_PERM, }, { @@ -38,7 +35,6 @@ static const struct genl_ops ila_nl_ops[] = { .start = ila_xlat_nl_dump_start, .dumpit = ila_xlat_nl_dump, .done = ila_xlat_nl_dump_done, - .policy = ila_nl_policy, }, }; @@ -49,6 +45,7 @@ struct genl_family ila_nl_family __ro_after_init = { .name = ILA_GENL_NAME, .version = ILA_GENL_VERSION, .maxattr = ILA_ATTR_MAX, + .policy = ila_nl_policy, .netnsok = true, .parallel_ops = true, .module = THIS_MODULE, diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 4fe7c90962dd..868ae23dbae1 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -10,15 +10,25 @@ #include <net/secure_seq.h> #include <linux/netfilter.h> -static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, +static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { + const struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + .src = *src, + }; u32 hash, id; - hash = __ipv6_addr_jhash(dst, hashrnd); - hash = __ipv6_addr_jhash(src, hash); - hash ^= net_hash_mix(net); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); + + hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, */ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { - static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; struct in6_addr *addrs; u32 id; @@ -53,11 +62,7 @@ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) if (!addrs) return 0; - net_get_random_once(&ip6_proxy_idents_hashrnd, - sizeof(ip6_proxy_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, - &addrs[1], &addrs[0]); + id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); return htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); @@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) { - static u32 ip6_idents_hashrnd __read_mostly; u32 id; - net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr); + id = __ipv6_select_ident(net, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0302e0eb07af..61f231f58da5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1015,14 +1015,13 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn, } } -static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, - bool null_fallback) +static bool ip6_hold_safe(struct net *net, struct rt6_info **prt) { struct rt6_info *rt = *prt; if (dst_hold_safe(&rt->dst)) return true; - if (null_fallback) { + if (net) { rt = net->ipv6.ip6_null_entry; dst_hold(&rt->dst); } else { @@ -1095,7 +1094,7 @@ restart: /* Search through exception table */ rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); if (rt) { - if (ip6_hold_safe(net, &rt, true)) + if (ip6_hold_safe(net, &rt)) dst_use_noref(&rt->dst, jiffies); } else if (f6i == net->ipv6.fib6_null_entry) { rt = net->ipv6.ip6_null_entry; @@ -1242,7 +1241,7 @@ static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt) pcpu_rt = *p; if (pcpu_rt) - ip6_hold_safe(NULL, &pcpu_rt, false); + ip6_hold_safe(NULL, &pcpu_rt); return pcpu_rt; } @@ -1867,7 +1866,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, /*Search through exception table */ rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); if (rt) { - if (ip6_hold_safe(net, &rt, true)) + if (ip6_hold_safe(net, &rt)) dst_use_noref(&rt->dst, jiffies); rcu_read_unlock(); @@ -2482,7 +2481,7 @@ restart: out: if (ret) - ip6_hold_safe(net, &ret, true); + ip6_hold_safe(net, &ret); else ret = ip6_create_rt_rcu(rt); @@ -2953,9 +2952,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } - if (cfg->fc_metric == 0) - cfg->fc_metric = IP6_RT_PRIO_USER; - if (cfg->fc_flags & RTNH_F_ONLINK) { if (!dev) { NL_SET_ERR_MSG(extack, @@ -3084,7 +3080,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } - if (!(dev->flags & IFF_UP)) { + if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) { NL_SET_ERR_MSG(extack, "Nexthop device is not up"); err = -ENETDOWN; goto out; @@ -3606,7 +3602,7 @@ static void rtmsg_to_fib6_config(struct net *net, .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? : RT6_TABLE_MAIN, .fc_ifindex = rtmsg->rtmsg_ifindex, - .fc_metric = rtmsg->rtmsg_metric, + .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER, .fc_expires = rtmsg->rtmsg_info, .fc_dst_len = rtmsg->rtmsg_dst_len, .fc_src_len = rtmsg->rtmsg_src_len, @@ -3717,36 +3713,26 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, const struct in6_addr *addr, bool anycast, gfp_t gfp_flags) { - u32 tb_id; - struct net_device *dev = idev->dev; - struct fib6_info *f6i; - - f6i = fib6_info_alloc(gfp_flags); - if (!f6i) - return ERR_PTR(-ENOMEM); + struct fib6_config cfg = { + .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL, + .fc_ifindex = idev->dev->ifindex, + .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP, + .fc_dst = *addr, + .fc_dst_len = 128, + .fc_protocol = RTPROT_KERNEL, + .fc_nlinfo.nl_net = net, + .fc_ignore_dev_down = true, + }; - f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL); - f6i->dst_nocount = true; - f6i->dst_host = true; - f6i->fib6_protocol = RTPROT_KERNEL; - f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) { - f6i->fib6_type = RTN_ANYCAST; - f6i->fib6_flags |= RTF_ANYCAST; + cfg.fc_type = RTN_ANYCAST; + cfg.fc_flags |= RTF_ANYCAST; } else { - f6i->fib6_type = RTN_LOCAL; - f6i->fib6_flags |= RTF_LOCAL; + cfg.fc_type = RTN_LOCAL; + cfg.fc_flags |= RTF_LOCAL; } - f6i->fib6_nh.nh_gw = *addr; - dev_hold(dev); - f6i->fib6_nh.nh_dev = dev; - f6i->fib6_dst.addr = *addr; - f6i->fib6_dst.plen = 128; - tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - f6i->fib6_table = fib6_get_table(net, tb_id); - - return f6i; + return ip6_route_info_create(&cfg, gfp_flags, NULL); } /* remove deleted ip from prefsrc entries */ @@ -4526,6 +4512,9 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (cfg.fc_metric == 0) + cfg.fc_metric = IP6_RT_PRIO_USER; + if (cfg.fc_mp) return ip6_route_multipath_add(&cfg, extack); else diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 9b2f272ca164..ceff773471e7 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -399,7 +399,6 @@ static const struct genl_ops seg6_genl_ops[] = { { .cmd = SEG6_CMD_SETHMAC, .doit = seg6_genl_sethmac, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { @@ -407,19 +406,16 @@ static const struct genl_ops seg6_genl_ops[] = { .start = seg6_genl_dumphmac_start, .dumpit = seg6_genl_dumphmac, .done = seg6_genl_dumphmac_done, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_SET_TUNSRC, .doit = seg6_genl_set_tunsrc, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = SEG6_CMD_GET_TUNSRC, .doit = seg6_genl_get_tunsrc, - .policy = seg6_genl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -429,6 +425,7 @@ static struct genl_family seg6_genl_family __ro_after_init = { .name = SEG6_GENL_NAME, .version = SEG6_GENL_VERSION, .maxattr = SEG6_ATTR_MAX, + .policy = seg6_genl_policy, .netnsok = true, .parallel_ops = true, .ops = seg6_genl_ops, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 44d431849d39..eec814fe53b8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -90,6 +90,17 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, } #endif +/* Helper returning the inet6 address from a given tcp socket. + * It can be used in TCP stack instead of inet6_sk(sk). + * This avoids a dereference and allow compiler optimizations. + */ +static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) +{ + struct tcp6_sock *tcp6 = container_of(tcp_sk(sk), struct tcp6_sock, tcp); + + return &tcp6->inet6; +} + static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -99,7 +110,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); } } @@ -138,7 +149,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct ipv6_txoptions *opt; @@ -390,7 +401,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_CLOSE) goto out; - if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) { + if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } @@ -405,7 +416,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - np = inet6_sk(sk); + np = tcp_inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { @@ -478,7 +489,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, enum tcp_synack_type synack_type) { struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; @@ -737,7 +748,7 @@ static void tcp_v6_init_req(struct request_sock *req, { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); - const struct ipv6_pinfo *np = inet6_sk(sk_listener); + const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; @@ -1066,9 +1077,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; - const struct ipv6_pinfo *np = inet6_sk(sk); + const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; - struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1088,11 +1098,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (!newsk) return NULL; - newtcp6sk = (struct tcp6_sock *)newsk; - inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; + inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newinet = inet_sk(newsk); - newnp = inet6_sk(newsk); + newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -1156,12 +1165,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * ip6_dst_store(newsk, dst, NULL, NULL); inet6_sk_rx_dst_set(newsk, skb); - newtcp6sk = (struct tcp6_sock *)newsk; - inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; + inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); - newnp = inet6_sk(newsk); + newnp = tcp_inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -1276,9 +1284,9 @@ out: */ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp; + struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; + struct tcp_sock *tp; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. @@ -1428,6 +1436,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, static int tcp_v6_rcv(struct sk_buff *skb) { + struct sk_buff *skb_to_free; int sdif = inet6_sdif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; @@ -1524,7 +1533,7 @@ process: return 0; } } - if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { + if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto discard_and_relse; } @@ -1554,12 +1563,17 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { + skb_to_free = sk->sk_rx_skb_cache; + sk->sk_rx_skb_cache = NULL; ret = tcp_v6_do_rcv(sk, skb); - } else if (tcp_add_backlog(sk, skb)) { - goto discard_and_relse; + } else { + if (tcp_add_backlog(sk, skb)) + goto discard_and_relse; + skb_to_free = NULL; } bh_unlock_sock(sk); - + if (skb_to_free) + __kfree_skb(skb_to_free); put_and_return: if (refcounted) sock_put(sk); @@ -1669,7 +1683,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) - dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); + dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie); if (dst && inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index edbd5d1fbcde..77595fcc9f75 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -916,57 +916,48 @@ static const struct genl_ops l2tp_nl_ops[] = { { .cmd = L2TP_CMD_NOOP, .doit = l2tp_nl_cmd_noop, - .policy = l2tp_nl_policy, /* can be retrieved by unprivileged users */ }, { .cmd = L2TP_CMD_TUNNEL_CREATE, .doit = l2tp_nl_cmd_tunnel_create, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_DELETE, .doit = l2tp_nl_cmd_tunnel_delete, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_MODIFY, .doit = l2tp_nl_cmd_tunnel_modify, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_TUNNEL_GET, .doit = l2tp_nl_cmd_tunnel_get, .dumpit = l2tp_nl_cmd_tunnel_dump, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_CREATE, .doit = l2tp_nl_cmd_session_create, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_DELETE, .doit = l2tp_nl_cmd_session_delete, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_MODIFY, .doit = l2tp_nl_cmd_session_modify, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = L2TP_CMD_SESSION_GET, .doit = l2tp_nl_cmd_session_get, .dumpit = l2tp_nl_cmd_session_dump, - .policy = l2tp_nl_policy, .flags = GENL_ADMIN_PERM, }, }; @@ -976,6 +967,7 @@ static struct genl_family l2tp_nl_family __ro_after_init = { .version = L2TP_GENL_VERSION, .hdrsize = 0, .maxattr = L2TP_ATTR_MAX, + .policy = l2tp_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = l2tp_nl_ops, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 4a6ff1482a9f..f0d97eba250b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1133,8 +1133,7 @@ static void ieee80211_uninit(struct net_device *dev) static u16 ieee80211_netdev_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } @@ -1179,8 +1178,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { static u16 ieee80211_monitor_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index bad17bba8ba7..367b2f6513e0 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -723,38 +723,32 @@ static int ncsi_set_channel_mask_nl(struct sk_buff *msg, static const struct genl_ops ncsi_ops[] = { { .cmd = NCSI_CMD_PKG_INFO, - .policy = ncsi_genl_policy, .doit = ncsi_pkg_info_nl, .dumpit = ncsi_pkg_info_all_nl, .flags = 0, }, { .cmd = NCSI_CMD_SET_INTERFACE, - .policy = ncsi_genl_policy, .doit = ncsi_set_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_CLEAR_INTERFACE, - .policy = ncsi_genl_policy, .doit = ncsi_clear_interface_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SEND_CMD, - .policy = ncsi_genl_policy, .doit = ncsi_send_cmd_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_PACKAGE_MASK, - .policy = ncsi_genl_policy, .doit = ncsi_set_package_mask_nl, .flags = GENL_ADMIN_PERM, }, { .cmd = NCSI_CMD_SET_CHANNEL_MASK, - .policy = ncsi_genl_policy, .doit = ncsi_set_channel_mask_nl, .flags = GENL_ADMIN_PERM, }, @@ -764,6 +758,7 @@ static struct genl_family ncsi_genl_family __ro_after_init = { .name = "NCSI", .version = 0, .maxattr = NCSI_ATTR_MAX, + .policy = ncsi_genl_policy, .module = THIS_MODULE, .ops = ncsi_ops, .n_ops = ARRAY_SIZE(ncsi_ops), diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 053cd96b9c76..4b933669fd83 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3775,19 +3775,16 @@ static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_NEW_SERVICE, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_SET_SERVICE, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_DEL_SERVICE, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { @@ -3795,42 +3792,35 @@ static const struct genl_ops ip_vs_genl_ops[] = { .flags = GENL_ADMIN_PERM, .doit = ip_vs_genl_get_cmd, .dumpit = ip_vs_genl_dump_services, - .policy = ip_vs_cmd_policy, }, { .cmd = IPVS_CMD_NEW_DEST, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_SET_DEST, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_DEL_DEST, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { .cmd = IPVS_CMD_GET_DEST, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .dumpit = ip_vs_genl_dump_dests, }, { .cmd = IPVS_CMD_NEW_DAEMON, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_daemon, }, { .cmd = IPVS_CMD_DEL_DAEMON, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_daemon, }, { @@ -3841,7 +3831,6 @@ static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_SET_CONFIG, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { @@ -3857,7 +3846,6 @@ static const struct genl_ops ip_vs_genl_ops[] = { { .cmd = IPVS_CMD_ZERO, .flags = GENL_ADMIN_PERM, - .policy = ip_vs_cmd_policy, .doit = ip_vs_genl_set_cmd, }, { @@ -3872,6 +3860,7 @@ static struct genl_family ip_vs_genl_family __ro_after_init = { .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, .maxattr = IPVS_CMD_ATTR_MAX, + .policy = ip_vs_cmd_policy, .netnsok = true, /* Make ipvsadm to work on netns */ .module = THIS_MODULE, .ops = ip_vs_genl_ops, diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 4d748975117d..80184513b2b2 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -322,28 +322,24 @@ static const struct genl_ops netlbl_calipso_ops[] = { { .cmd = NLBL_CALIPSO_C_ADD, .flags = GENL_ADMIN_PERM, - .policy = calipso_genl_policy, .doit = netlbl_calipso_add, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_REMOVE, .flags = GENL_ADMIN_PERM, - .policy = calipso_genl_policy, .doit = netlbl_calipso_remove, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_LIST, .flags = 0, - .policy = calipso_genl_policy, .doit = netlbl_calipso_list, .dumpit = NULL, }, { .cmd = NLBL_CALIPSO_C_LISTALL, .flags = 0, - .policy = calipso_genl_policy, .doit = NULL, .dumpit = netlbl_calipso_listall, }, @@ -354,6 +350,7 @@ static struct genl_family netlbl_calipso_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_CALIPSO_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_CALIPSO_A_MAX, + .policy = calipso_genl_policy, .module = THIS_MODULE, .ops = netlbl_calipso_ops, .n_ops = ARRAY_SIZE(netlbl_calipso_ops), diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 9aacf2da3d98..ba7800f94ccc 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -734,28 +734,24 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { { .cmd = NLBL_CIPSOV4_C_ADD, .flags = GENL_ADMIN_PERM, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_add, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_REMOVE, .flags = GENL_ADMIN_PERM, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_remove, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LIST, .flags = 0, - .policy = netlbl_cipsov4_genl_policy, .doit = netlbl_cipsov4_list, .dumpit = NULL, }, { .cmd = NLBL_CIPSOV4_C_LISTALL, .flags = 0, - .policy = netlbl_cipsov4_genl_policy, .doit = NULL, .dumpit = netlbl_cipsov4_listall, }, @@ -766,6 +762,7 @@ static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_CIPSOV4_A_MAX, + .policy = netlbl_cipsov4_genl_policy, .module = THIS_MODULE, .ops = netlbl_cipsov4_ops, .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops), diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 21e0095b1d14..a16eacfb2236 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -773,56 +773,48 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { { .cmd = NLBL_MGMT_C_ADD, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_add, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVE, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_remove, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTALL, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = NULL, .dumpit = netlbl_mgmt_listall, }, { .cmd = NLBL_MGMT_C_ADDDEF, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_adddef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVEDEF, .flags = GENL_ADMIN_PERM, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_removedef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTDEF, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_listdef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_PROTOCOLS, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = NULL, .dumpit = netlbl_mgmt_protocols, }, { .cmd = NLBL_MGMT_C_VERSION, .flags = 0, - .policy = netlbl_mgmt_genl_policy, .doit = netlbl_mgmt_version, .dumpit = NULL, }, @@ -833,6 +825,7 @@ static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_MGMT_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_MGMT_A_MAX, + .policy = netlbl_mgmt_genl_policy, .module = THIS_MODULE, .ops = netlbl_mgmt_genl_ops, .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c92894c3e40a..6b1b6c2b5141 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1318,56 +1318,48 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { { .cmd = NLBL_UNLABEL_C_STATICADD, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticadd, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVE, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticremove, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLIST, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = NULL, .dumpit = netlbl_unlabel_staticlist, }, { .cmd = NLBL_UNLABEL_C_STATICADDDEF, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticadddef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_staticremovedef, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_STATICLISTDEF, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = NULL, .dumpit = netlbl_unlabel_staticlistdef, }, { .cmd = NLBL_UNLABEL_C_ACCEPT, .flags = GENL_ADMIN_PERM, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_accept, .dumpit = NULL, }, { .cmd = NLBL_UNLABEL_C_LIST, .flags = 0, - .policy = netlbl_unlabel_genl_policy, .doit = netlbl_unlabel_list, .dumpit = NULL, }, @@ -1378,6 +1370,7 @@ static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_UNLABEL_A_MAX, + .policy = netlbl_unlabel_genl_policy, .module = THIS_MODULE, .ops = netlbl_unlabel_genl_ops, .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f0ec068e1d02..288456090710 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -578,7 +578,7 @@ static int genl_family_rcv_msg(const struct genl_family *family, if (attrbuf) { err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - ops->policy, extack); + family->policy, extack); if (err < 0) goto out; } @@ -678,7 +678,7 @@ static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq, op_flags |= GENL_CMD_CAP_DUMP; if (ops->doit) op_flags |= GENL_CMD_CAP_DO; - if (ops->policy) + if (family->policy) op_flags |= GENL_CMD_CAP_HASPOL; nest = nla_nest_start(skb, i + 1); @@ -940,7 +940,6 @@ static const struct genl_ops genl_ctrl_ops[] = { .cmd = CTRL_CMD_GETFAMILY, .doit = ctrl_getfamily, .dumpit = ctrl_dumpfamily, - .policy = ctrl_policy, }, }; @@ -958,6 +957,7 @@ static struct genl_family genl_ctrl __ro_after_init = { .name = "nlctrl", .version = 0x2, .maxattr = CTRL_ATTR_MAX, + .policy = ctrl_policy, .netnsok = true, }; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 376181cc1def..4d9f3ac8d562 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1670,99 +1670,80 @@ static const struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_get_device, .dumpit = nfc_genl_dump_devices, .done = nfc_genl_dump_devices_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEV_UP, .doit = nfc_genl_dev_up, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEV_DOWN, .doit = nfc_genl_dev_down, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_START_POLL, .doit = nfc_genl_start_poll, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_STOP_POLL, .doit = nfc_genl_stop_poll, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEP_LINK_UP, .doit = nfc_genl_dep_link_up, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEP_LINK_DOWN, .doit = nfc_genl_dep_link_down, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_GET_TARGET, .dumpit = nfc_genl_dump_targets, .done = nfc_genl_dump_targets_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_GET_PARAMS, .doit = nfc_genl_llc_get_params, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_SET_PARAMS, .doit = nfc_genl_llc_set_params, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_LLC_SDREQ, .doit = nfc_genl_llc_sdreq, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_FW_DOWNLOAD, .doit = nfc_genl_fw_download, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_ENABLE_SE, .doit = nfc_genl_enable_se, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DISABLE_SE, .doit = nfc_genl_disable_se, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_GET_SE, .dumpit = nfc_genl_dump_ses, .done = nfc_genl_dump_ses_done, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_SE_IO, .doit = nfc_genl_se_io, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_ACTIVATE_TARGET, .doit = nfc_genl_activate_target, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_VENDOR, .doit = nfc_genl_vendor_cmd, - .policy = nfc_genl_policy, }, { .cmd = NFC_CMD_DEACTIVATE_TARGET, .doit = nfc_genl_deactivate_target, - .policy = nfc_genl_policy, }, }; @@ -1771,6 +1752,7 @@ static struct genl_family nfc_genl_family __ro_after_init = { .name = NFC_GENL_NAME, .version = NFC_GENL_VERSION, .maxattr = NFC_ATTR_MAX, + .policy = nfc_genl_policy, .module = THIS_MODULE, .ops = nfc_genl_ops, .n_ops = ARRAY_SIZE(nfc_genl_ops), diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e47ebbbe71b8..2c151bb322c1 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -169,6 +169,10 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, const struct nlattr *actions, int len, bool last, bool clone_flow_key); +static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *attr, int len); + static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, __be16 ethertype) { @@ -1213,6 +1217,40 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true); } +static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *attr, bool last) +{ + const struct nlattr *actions, *cpl_arg; + const struct check_pkt_len_arg *arg; + int rem = nla_len(attr); + bool clone_flow_key; + + /* The first netlink attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ARG'. + */ + cpl_arg = nla_data(attr); + arg = nla_data(cpl_arg); + + if (skb->len <= arg->pkt_len) { + /* Second netlink attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. + */ + actions = nla_next(cpl_arg, &rem); + clone_flow_key = !arg->exec_for_lesser_equal; + } else { + /* Third netlink attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'. + */ + actions = nla_next(cpl_arg, &rem); + actions = nla_next(actions, &rem); + clone_flow_key = !arg->exec_for_greater; + } + + return clone_execute(dp, skb, key, 0, nla_data(actions), + nla_len(actions), last, clone_flow_key); +} + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1374,6 +1412,16 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; } + + case OVS_ACTION_ATTR_CHECK_PKT_LEN: { + bool last = nla_is_last(a, rem); + + err = execute_check_pkt_len(dp, skb, key, a, last); + if (last) + return err; + + break; + } } if (unlikely(err)) { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 1b6896896fff..845b83598e0d 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -990,6 +990,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, GFP_ATOMIC); if (err) return err; + + /* helper installed, add seqadj if NAT is required */ + if (info->nat && !nfct_seqadj(ct)) { + if (!nfct_seqadj_ext_add(ct)) + return -EINVAL; + } } /* Call the helper only if: @@ -2154,18 +2160,15 @@ static struct genl_ops ct_limit_genl_ops[] = { { .cmd = OVS_CT_LIMIT_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_set, }, { .cmd = OVS_CT_LIMIT_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_del, }, { .cmd = OVS_CT_LIMIT_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = ct_limit_policy, .doit = ovs_ct_limit_cmd_get, }, }; @@ -2179,6 +2182,7 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { .name = OVS_CT_LIMIT_FAMILY, .version = OVS_CT_LIMIT_VERSION, .maxattr = OVS_CT_LIMIT_ATTR_MAX, + .policy = ct_limit_policy, .netnsok = true, .parallel_ops = true, .ops = ct_limit_genl_ops, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9dd158ab51b3..a64d3eb1f9a9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -639,7 +639,6 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { static const struct genl_ops dp_packet_genl_ops[] = { { .cmd = OVS_PACKET_CMD_EXECUTE, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = packet_policy, .doit = ovs_packet_cmd_execute } }; @@ -649,6 +648,7 @@ static struct genl_family dp_packet_genl_family __ro_after_init = { .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, .maxattr = OVS_PACKET_ATTR_MAX, + .policy = packet_policy, .netnsok = true, .parallel_ops = true, .ops = dp_packet_genl_ops, @@ -1424,23 +1424,19 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_del }, { .cmd = OVS_FLOW_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = flow_policy, .doit = ovs_flow_cmd_get, .dumpit = ovs_flow_cmd_dump }, { .cmd = OVS_FLOW_CMD_SET, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = flow_policy, .doit = ovs_flow_cmd_set, }, }; @@ -1450,6 +1446,7 @@ static struct genl_family dp_flow_genl_family __ro_after_init = { .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, .maxattr = OVS_FLOW_ATTR_MAX, + .policy = flow_policy, .netnsok = true, .parallel_ops = true, .ops = dp_flow_genl_ops, @@ -1817,23 +1814,19 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { static const struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_del }, { .cmd = OVS_DP_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_get, .dumpit = ovs_dp_cmd_dump }, { .cmd = OVS_DP_CMD_SET, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = datapath_policy, .doit = ovs_dp_cmd_set, }, }; @@ -1843,6 +1836,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, .maxattr = OVS_DP_ATTR_MAX, + .policy = datapath_policy, .netnsok = true, .parallel_ops = true, .ops = dp_datapath_genl_ops, @@ -2260,23 +2254,19 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { static const struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_del }, { .cmd = OVS_VPORT_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = vport_policy, .doit = ovs_vport_cmd_get, .dumpit = ovs_vport_cmd_dump }, { .cmd = OVS_VPORT_CMD_SET, .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ - .policy = vport_policy, .doit = ovs_vport_cmd_set, }, }; @@ -2286,6 +2276,7 @@ struct genl_family dp_vport_genl_family __ro_after_init = { .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, .maxattr = OVS_VPORT_ATTR_MAX, + .policy = vport_policy, .netnsok = true, .parallel_ops = true, .ops = dp_vport_genl_ops, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 691da853bef5..b7543700db87 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -91,6 +91,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_SET: case OVS_ACTION_ATTR_SET_MASKED: case OVS_ACTION_ATTR_METER: + case OVS_ACTION_ATTR_CHECK_PKT_LEN: default: return true; } @@ -2838,6 +2839,87 @@ static int validate_userspace(const struct nlattr *attr) return 0; } +static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = { + [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 }, + [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED }, + [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED }, +}; + +static int validate_and_copy_check_pkt_len(struct net *net, + const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci, + bool log, bool last) +{ + const struct nlattr *acts_if_greater, *acts_if_lesser_eq; + struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; + struct check_pkt_len_arg arg; + int nested_acts_start; + int start, err; + + err = nla_parse_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX, nla_data(attr), + nla_len(attr), cpl_policy, NULL); + if (err) + return err; + + if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] || + !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN])) + return -EINVAL; + + acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]; + acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER]; + + /* Both the nested action should be present. */ + if (!acts_if_greater || !acts_if_lesser_eq) + return -EINVAL; + + /* validation done, copy the nested actions. */ + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN, + log); + if (start < 0) + return start; + + arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]); + arg.exec_for_lesser_equal = + last || !actions_may_change_flow(acts_if_lesser_eq); + arg.exec_for_greater = + last || !actions_may_change_flow(acts_if_greater); + + err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg, + sizeof(arg), log); + if (err) + return err; + + nested_acts_start = add_nested_action_start(sfa, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log); + if (nested_acts_start < 0) + return nested_acts_start; + + err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, + eth_type, vlan_tci, log); + + if (err) + return err; + + add_nested_action_end(*sfa, nested_acts_start); + + nested_acts_start = add_nested_action_start(sfa, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log); + if (nested_acts_start < 0) + return nested_acts_start; + + err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, + eth_type, vlan_tci, log); + + if (err) + return err; + + add_nested_action_end(*sfa, nested_acts_start); + add_nested_action_end(*sfa, start); + return 0; +} + static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa, bool log) { @@ -2884,6 +2966,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_POP_NSH] = 0, [OVS_ACTION_ATTR_METER] = sizeof(u32), [OVS_ACTION_ATTR_CLONE] = (u32)-1, + [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3085,6 +3168,19 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, break; } + case OVS_ACTION_ATTR_CHECK_PKT_LEN: { + bool last = nla_is_last(a, rem); + + err = validate_and_copy_check_pkt_len(net, a, key, sfa, + eth_type, + vlan_tci, log, + last); + if (err) + return err; + skip_copy = true; + break; + } + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; @@ -3183,6 +3279,75 @@ static int clone_action_to_attr(const struct nlattr *attr, return err; } +static int check_pkt_len_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) +{ + struct nlattr *start, *ac_start = NULL; + const struct check_pkt_len_arg *arg; + const struct nlattr *a, *cpl_arg; + int err = 0, rem = nla_len(attr); + + start = nla_nest_start(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN); + if (!start) + return -EMSGSIZE; + + /* The first nested attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ARG'. + */ + cpl_arg = nla_data(attr); + arg = nla_data(cpl_arg); + + if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) { + err = -EMSGSIZE; + goto out; + } + + /* Second nested attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. + */ + a = nla_next(cpl_arg, &rem); + ac_start = nla_nest_start(skb, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); + if (!ac_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) { + nla_nest_cancel(skb, ac_start); + goto out; + } else { + nla_nest_end(skb, ac_start); + } + + /* Third nested attribute in 'attr' is always + * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER. + */ + a = nla_next(a, &rem); + ac_start = nla_nest_start(skb, + OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); + if (!ac_start) { + err = -EMSGSIZE; + goto out; + } + + err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); + if (err) { + nla_nest_cancel(skb, ac_start); + goto out; + } else { + nla_nest_end(skb, ac_start); + } + + nla_nest_end(skb, start); + return 0; + +out: + nla_nest_cancel(skb, start); + return err; +} + static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); @@ -3277,6 +3442,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_CHECK_PKT_LEN: + err = check_pkt_len_action_to_attr(a, skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 43849d752a1e..0be3d097ae01 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -527,26 +527,22 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, static struct genl_ops dp_meter_genl_ops[] = { { .cmd = OVS_METER_CMD_FEATURES, .flags = 0, /* OK for unprivileged users. */ - .policy = meter_policy, .doit = ovs_meter_cmd_features }, { .cmd = OVS_METER_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = meter_policy, .doit = ovs_meter_cmd_set, }, { .cmd = OVS_METER_CMD_GET, .flags = 0, /* OK for unprivileged users. */ - .policy = meter_policy, .doit = ovs_meter_cmd_get, }, { .cmd = OVS_METER_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ - .policy = meter_policy, .doit = ovs_meter_cmd_del }, }; @@ -560,6 +556,7 @@ struct genl_family dp_meter_genl_family __ro_after_init = { .name = OVS_METER_FAMILY, .version = OVS_METER_VERSION, .maxattr = OVS_METER_ATTR_MAX, + .policy = meter_policy, .netnsok = true, .parallel_ops = true, .ops = dp_meter_genl_ops, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9419c5cf4de5..08fe8b79c0bf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -275,24 +275,22 @@ static bool packet_use_direct_xmit(const struct packet_sock *po) return po->xmit == packet_direct_xmit; } -static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL); -} - static u16 packet_pick_tx_queue(struct sk_buff *skb) { struct net_device *dev = skb->dev; const struct net_device_ops *ops = dev->netdev_ops; + int cpu = raw_smp_processor_id(); u16 queue_index; +#ifdef CONFIG_XPS + skb->sender_cpu = cpu + 1; +#endif + skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb, NULL, - __packet_pick_tx_queue); + queue_index = ops->ndo_select_queue(dev, skb, NULL); queue_index = netdev_cap_txqueue(dev, queue_index); } else { - queue_index = __packet_pick_tx_queue(dev, skb, NULL); + queue_index = netdev_pick_tx(dev, skb, NULL); } return queue_index; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index c04247b403ed..0638f17ac5ab 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/rhashtable.h> #include <linux/workqueue.h> +#include <linux/refcount.h> #include <linux/if_ether.h> #include <linux/in6.h> @@ -75,6 +76,7 @@ struct fl_flow_mask { struct list_head filters; struct rcu_work rwork; struct list_head list; + refcount_t refcnt; }; struct fl_flow_tmplt { @@ -86,6 +88,7 @@ struct fl_flow_tmplt { struct cls_fl_head { struct rhashtable ht; + spinlock_t masks_lock; /* Protect masks list */ struct list_head masks; struct rcu_work rwork; struct idr handle_idr; @@ -104,6 +107,12 @@ struct cls_fl_filter { u32 in_hw_count; struct rcu_work rwork; struct net_device *hw_dev; + /* Flower classifier is unlocked, which means that its reference counter + * can be changed concurrently without any kind of external + * synchronization. Use atomic reference counter to be concurrency-safe. + */ + refcount_t refcnt; + bool deleted; }; static const struct rhashtable_params mask_ht_params = { @@ -304,6 +313,7 @@ static int fl_init(struct tcf_proto *tp) if (!head) return -ENOBUFS; + spin_lock_init(&head->masks_lock); INIT_LIST_HEAD_RCU(&head->masks); rcu_assign_pointer(tp->root, head); idr_init(&head->handle_idr); @@ -313,6 +323,7 @@ static int fl_init(struct tcf_proto *tp) static void fl_mask_free(struct fl_flow_mask *mask) { + WARN_ON(!list_empty(&mask->filters)); rhashtable_destroy(&mask->ht); kfree(mask); } @@ -328,11 +339,15 @@ static void fl_mask_free_work(struct work_struct *work) static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask, bool async) { - if (!list_empty(&mask->filters)) + if (!refcount_dec_and_test(&mask->refcnt)) return false; rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params); + + spin_lock(&head->masks_lock); list_del_rcu(&mask->list); + spin_unlock(&head->masks_lock); + if (async) tcf_queue_work(&mask->rwork, fl_mask_free_work); else @@ -353,37 +368,48 @@ static void fl_destroy_filter_work(struct work_struct *work) struct cls_fl_filter *f = container_of(to_rcu_work(work), struct cls_fl_filter, rwork); - rtnl_lock(); __fl_destroy_filter(f); - rtnl_unlock(); } static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; + if (!rtnl_held) + rtnl_lock(); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + spin_lock(&tp->lock); tcf_block_offload_dec(block, &f->flags); + spin_unlock(&tp->lock); + + if (!rtnl_held) + rtnl_unlock(); } static int fl_hw_replace_filter(struct tcf_proto *tp, - struct cls_fl_filter *f, + struct cls_fl_filter *f, bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; bool skip_sw = tc_skip_sw(f->flags); - int err; + int err = 0; + + if (!rtnl_held) + rtnl_lock(); cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) - return -ENOMEM; + if (!cls_flower.rule) { + err = -ENOMEM; + goto errout; + } tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; @@ -396,35 +422,48 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); if (err) { kfree(cls_flower.rule); - if (skip_sw) { + if (skip_sw) NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - return err; - } - return 0; + else + err = 0; + goto errout; } err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); kfree(cls_flower.rule); if (err < 0) { - fl_hw_destroy_filter(tp, f, NULL); - return err; + fl_hw_destroy_filter(tp, f, true, NULL); + goto errout; } else if (err > 0) { f->in_hw_count = err; + err = 0; + spin_lock(&tp->lock); tcf_block_offload_inc(block, &f->flags); + spin_unlock(&tp->lock); } - if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) - return -EINVAL; + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { + err = -EINVAL; + goto errout; + } - return 0; +errout: + if (!rtnl_held) + rtnl_unlock(); + + return err; } -static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) +static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, + bool rtnl_held) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; + if (!rtnl_held) + rtnl_lock(); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; @@ -435,27 +474,94 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, cls_flower.stats.lastused); + + if (!rtnl_held) + rtnl_unlock(); } -static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, - struct netlink_ext_ack *extack) +static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + /* Flower classifier only changes root pointer during init and destroy. + * Users must obtain reference to tcf_proto instance before calling its + * API, so tp->root pointer is protected from concurrent call to + * fl_destroy() by reference counting. + */ + return rcu_dereference_raw(tp->root); +} + +static void __fl_put(struct cls_fl_filter *f) +{ + if (!refcount_dec_and_test(&f->refcnt)) + return; + + WARN_ON(!f->deleted); + + if (tcf_exts_get_net(&f->exts)) + tcf_queue_work(&f->rwork, fl_destroy_filter_work); + else + __fl_destroy_filter(f); +} + +static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle) +{ + struct cls_fl_filter *f; + + rcu_read_lock(); + f = idr_find(&head->handle_idr, handle); + if (f && !refcount_inc_not_zero(&f->refcnt)) + f = NULL; + rcu_read_unlock(); + + return f; +} + +static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp, + unsigned long *handle) +{ + struct cls_fl_head *head = fl_head_dereference(tp); + struct cls_fl_filter *f; + + rcu_read_lock(); + while ((f = idr_get_next_ul(&head->handle_idr, handle))) { + /* don't return filters that are being deleted */ + if (refcount_inc_not_zero(&f->refcnt)) + break; + ++(*handle); + } + rcu_read_unlock(); + + return f; +} + +static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, + bool *last, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct cls_fl_head *head = fl_head_dereference(tp); bool async = tcf_exts_get_net(&f->exts); - bool last; + *last = false; + + spin_lock(&tp->lock); + if (f->deleted) { + spin_unlock(&tp->lock); + return -ENOENT; + } + + f->deleted = true; + rhashtable_remove_fast(&f->mask->ht, &f->ht_node, + f->mask->filter_ht_params); idr_remove(&head->handle_idr, f->handle); list_del_rcu(&f->list); - last = fl_mask_put(head, f->mask, async); + spin_unlock(&tp->lock); + + *last = fl_mask_put(head, f->mask, async); if (!tc_skip_hw(f->flags)) - fl_hw_destroy_filter(tp, f, extack); + fl_hw_destroy_filter(tp, f, rtnl_held, extack); tcf_unbind_filter(tp, &f->res); - if (async) - tcf_queue_work(&f->rwork, fl_destroy_filter_work); - else - __fl_destroy_filter(f); + __fl_put(f); - return last; + return 0; } static void fl_destroy_sleepable(struct work_struct *work) @@ -472,13 +578,15 @@ static void fl_destroy_sleepable(struct work_struct *work) static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct fl_flow_mask *mask, *next_mask; struct cls_fl_filter *f, *next; + bool last; list_for_each_entry_safe(mask, next_mask, &head->masks, list) { list_for_each_entry_safe(f, next, &mask->filters, list) { - if (__fl_delete(tp, f, extack)) + __fl_delete(tp, f, &last, rtnl_held, extack); + if (last) break; } } @@ -488,11 +596,18 @@ static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, tcf_queue_work(&head->rwork, fl_destroy_sleepable); } +static void fl_put(struct tcf_proto *tp, void *arg) +{ + struct cls_fl_filter *f = arg; + + __fl_put(f); +} + static void *fl_get(struct tcf_proto *tp, u32 handle) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); - return idr_find(&head->handle_idr, handle); + return __fl_get(head, handle); } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -1227,12 +1342,18 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head, INIT_LIST_HEAD_RCU(&newmask->filters); - err = rhashtable_insert_fast(&head->ht, &newmask->ht_node, - mask_ht_params); + refcount_set(&newmask->refcnt, 1); + err = rhashtable_replace_fast(&head->ht, &mask->ht_node, + &newmask->ht_node, mask_ht_params); if (err) goto errout_destroy; + /* Wait until any potential concurrent users of mask are finished */ + synchronize_rcu(); + + spin_lock(&head->masks_lock); list_add_tail_rcu(&newmask->list, &head->masks); + spin_unlock(&head->masks_lock); return newmask; @@ -1250,41 +1371,77 @@ static int fl_check_assign_mask(struct cls_fl_head *head, struct fl_flow_mask *mask) { struct fl_flow_mask *newmask; + int ret = 0; + + rcu_read_lock(); - fnew->mask = rhashtable_lookup_fast(&head->ht, mask, mask_ht_params); + /* Insert mask as temporary node to prevent concurrent creation of mask + * with same key. Any concurrent lookups with same key will return + * -EAGAIN because mask's refcnt is zero. It is safe to insert + * stack-allocated 'mask' to masks hash table because we call + * synchronize_rcu() before returning from this function (either in case + * of error or after replacing it with heap-allocated mask in + * fl_create_new_mask()). + */ + fnew->mask = rhashtable_lookup_get_insert_fast(&head->ht, + &mask->ht_node, + mask_ht_params); if (!fnew->mask) { - if (fold) - return -EINVAL; + rcu_read_unlock(); + + if (fold) { + ret = -EINVAL; + goto errout_cleanup; + } newmask = fl_create_new_mask(head, mask); - if (IS_ERR(newmask)) - return PTR_ERR(newmask); + if (IS_ERR(newmask)) { + ret = PTR_ERR(newmask); + goto errout_cleanup; + } fnew->mask = newmask; + return 0; + } else if (IS_ERR(fnew->mask)) { + ret = PTR_ERR(fnew->mask); } else if (fold && fold->mask != fnew->mask) { - return -EINVAL; + ret = -EINVAL; + } else if (!refcount_inc_not_zero(&fnew->mask->refcnt)) { + /* Mask was deleted concurrently, try again */ + ret = -EAGAIN; } + rcu_read_unlock(); + return ret; - return 0; +errout_cleanup: + rhashtable_remove_fast(&head->ht, &mask->ht_node, + mask_ht_params); + /* Wait until any potential concurrent users of mask are finished */ + synchronize_rcu(); + return ret; } static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr, - struct fl_flow_tmplt *tmplt, + struct fl_flow_tmplt *tmplt, bool rtnl_held, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held, extack); if (err < 0) return err; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); + if (!rtnl_held) + rtnl_lock(); tcf_bind_filter(tp, &f->res, base); + if (!rtnl_held) + rtnl_unlock(); } err = fl_set_key(net, tb, &f->key, &mask->key, extack); @@ -1308,19 +1465,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct fl_flow_mask *mask; struct nlattr **tb; int err; - if (!tca[TCA_OPTIONS]) - return -EINVAL; + if (!tca[TCA_OPTIONS]) { + err = -EINVAL; + goto errout_fold; + } mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); - if (!mask) - return -ENOBUFS; + if (!mask) { + err = -ENOBUFS; + goto errout_fold; + } tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); if (!tb) { @@ -1343,6 +1504,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, err = -ENOBUFS; goto errout_tb; } + refcount_set(&fnew->refcnt, 1); err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0); if (err < 0) @@ -1358,7 +1520,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, - tp->chain->tmplt_priv, extack); + tp->chain->tmplt_priv, rtnl_held, extack); if (err) goto errout; @@ -1366,73 +1528,111 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout; - if (!handle) { - handle = 1; - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - INT_MAX, GFP_KERNEL); - } else if (!fold) { - /* user specifies a handle and it doesn't exist */ - err = idr_alloc_u32(&head->handle_idr, fnew, &handle, - handle, GFP_KERNEL); - } - if (err) - goto errout_mask; - fnew->handle = handle; - - if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) { - err = -EEXIST; - goto errout_idr; - } - - err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, - fnew->mask->filter_ht_params); - if (err) - goto errout_idr; - if (!tc_skip_hw(fnew->flags)) { - err = fl_hw_replace_filter(tp, fnew, extack); + err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack); if (err) - goto errout_mask_ht; + goto errout_mask; } if (!tc_in_hw(fnew->flags)) fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + spin_lock(&tp->lock); + + /* tp was deleted concurrently. -EAGAIN will cause caller to lookup + * proto again or create new one, if necessary. + */ + if (tp->deleting) { + err = -EAGAIN; + goto errout_hw; + } + + refcount_inc(&fnew->refcnt); if (fold) { + /* Fold filter was deleted concurrently. Retry lookup. */ + if (fold->deleted) { + err = -EAGAIN; + goto errout_hw; + } + + fnew->handle = handle; + + err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); + if (err) + goto errout_hw; + rhashtable_remove_fast(&fold->mask->ht, &fold->ht_node, fold->mask->filter_ht_params); - if (!tc_skip_hw(fold->flags)) - fl_hw_destroy_filter(tp, fold, NULL); - } - - *arg = fnew; - - if (fold) { idr_replace(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); + fold->deleted = true; + + spin_unlock(&tp->lock); + + fl_mask_put(head, fold->mask, true); + if (!tc_skip_hw(fold->flags)) + fl_hw_destroy_filter(tp, fold, rtnl_held, NULL); tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); - tcf_queue_work(&fold->rwork, fl_destroy_filter_work); + /* Caller holds reference to fold, so refcnt is always > 0 + * after this. + */ + refcount_dec(&fold->refcnt); + __fl_put(fold); } else { + if (__fl_lookup(fnew->mask, &fnew->mkey)) { + err = -EEXIST; + goto errout_hw; + } + + if (handle) { + /* user specifies a handle and it doesn't exist */ + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + handle, GFP_ATOMIC); + + /* Filter with specified handle was concurrently + * inserted after initial check in cls_api. This is not + * necessarily an error if NLM_F_EXCL is not set in + * message flags. Returning EAGAIN will cause cls_api to + * try to update concurrently inserted rule. + */ + if (err == -ENOSPC) + err = -EAGAIN; + } else { + handle = 1; + err = idr_alloc_u32(&head->handle_idr, fnew, &handle, + INT_MAX, GFP_ATOMIC); + } + if (err) + goto errout_hw; + + fnew->handle = handle; + + err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); + if (err) + goto errout_idr; + list_add_tail_rcu(&fnew->list, &fnew->mask->filters); + spin_unlock(&tp->lock); } + *arg = fnew; + kfree(tb); kfree(mask); return 0; -errout_mask_ht: - rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, - fnew->mask->filter_ht_params); - errout_idr: - if (!fold) - idr_remove(&head->handle_idr, fnew->handle); - + idr_remove(&head->handle_idr, fnew->handle); +errout_hw: + spin_unlock(&tp->lock); + if (!tc_skip_hw(fnew->flags)) + fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL); errout_mask: - fl_mask_put(head, fnew->mask, false); - + fl_mask_put(head, fnew->mask, true); errout: tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -1440,37 +1640,42 @@ errout_tb: kfree(tb); errout_mask_alloc: kfree(mask); +errout_fold: + if (fold) + __fl_put(fold); return err; } static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct cls_fl_filter *f = arg; + bool last_on_mask; + int err = 0; - rhashtable_remove_fast(&f->mask->ht, &f->ht_node, - f->mask->filter_ht_params); - __fl_delete(tp, f, extack); + err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack); *last = list_empty(&head->masks); - return 0; + __fl_put(f); + + return err; } static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { - struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; arg->count = arg->skip; - while ((f = idr_get_next_ul(&head->handle_idr, - &arg->cookie)) != NULL) { + while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) { if (arg->fn(tp, f, arg) < 0) { + __fl_put(f); arg->stop = 1; break; } - arg->cookie = f->handle + 1; + __fl_put(f); + arg->cookie++; arg->count++; } } @@ -1478,7 +1683,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_head *head = fl_head_dereference(tp); struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; struct fl_flow_mask *mask; @@ -1526,8 +1731,10 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, continue; } + spin_lock(&tp->lock); tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags, add); + spin_unlock(&tp->lock); } } @@ -2061,6 +2268,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, struct cls_fl_filter *f = fh; struct nlattr *nest; struct fl_flow_key *key, *mask; + bool skip_hw; if (!f) return skb->len; @@ -2071,21 +2279,26 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, if (!nest) goto nla_put_failure; + spin_lock(&tp->lock); + if (f->res.classid && nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid)) - goto nla_put_failure; + goto nla_put_failure_locked; key = &f->key; mask = &f->mask->key; + skip_hw = tc_skip_hw(f->flags); if (fl_dump_key(skb, net, key, mask)) - goto nla_put_failure; - - if (!tc_skip_hw(f->flags)) - fl_hw_update_stats(tp, f); + goto nla_put_failure_locked; if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags)) - goto nla_put_failure; + goto nla_put_failure_locked; + + spin_unlock(&tp->lock); + + if (!skip_hw) + fl_hw_update_stats(tp, f, rtnl_held); if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count)) goto nla_put_failure; @@ -2100,6 +2313,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, return skb->len; +nla_put_failure_locked: + spin_unlock(&tp->lock); nla_put_failure: nla_nest_cancel(skb, nest); return -1; @@ -2144,6 +2359,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .init = fl_init, .destroy = fl_destroy, .get = fl_get, + .put = fl_put, .change = fl_change, .delete = fl_delete, .walk = fl_walk, @@ -2154,6 +2370,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .tmplt_destroy = fl_tmplt_destroy, .tmplt_dump = fl_tmplt_dump, .owner = THIS_MODULE, + .flags = TCF_PROTO_OPS_DOIT_UNLOCKED, }; static int __init cls_fl_init(void) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a117d9260558..81356ef38d1d 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -671,6 +671,8 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) qdisc_qstats_cpu_backlog_dec(qdisc, skb); qdisc_bstats_cpu_update(qdisc, skb); qdisc_qstats_atomic_qlen_dec(qdisc); + } else { + qdisc->empty = true; } return skb; @@ -880,6 +882,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; + sch->empty = true; dev_hold(dev); refcount_set(&sch->refcnt, 1); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 8d2f6296279c..3cdf81cf97a3 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -611,7 +611,6 @@ static const struct genl_ops smc_pnet_ops[] = { { .cmd = SMC_PNETID_GET, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_get, .dumpit = smc_pnet_dump, .start = smc_pnet_dump_start @@ -619,19 +618,16 @@ static const struct genl_ops smc_pnet_ops[] = { { .cmd = SMC_PNETID_ADD, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_add }, { .cmd = SMC_PNETID_DEL, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_del }, { .cmd = SMC_PNETID_FLUSH, .flags = GENL_ADMIN_PERM, - .policy = smc_pnet_policy, .doit = smc_pnet_flush } }; @@ -642,6 +638,7 @@ static struct genl_family smc_pnet_nl_family __ro_after_init = { .name = SMCR_GENL_FAMILY_NAME, .version = SMCR_GENL_FAMILY_VERSION, .maxattr = SMC_PNETID_MAX, + .policy = smc_pnet_policy, .netnsok = true, .module = THIS_MODULE, .ops = smc_pnet_ops, diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d8026543bf4c..76e14dc08bb9 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -54,7 +54,9 @@ const char tipc_bclink_name[] = "broadcast-link"; * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast + * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ @@ -64,7 +66,9 @@ struct tipc_bc_base { int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; + bool force_bcast; bool rcast_support; + bool force_rcast; int rc_ratio; int bc_threshold; }; @@ -216,9 +220,24 @@ static void tipc_bcast_select_xmit_method(struct net *net, int dests, } /* Can current method be changed ? */ method->expires = jiffies + TIPC_METHOD_EXPIRE; - if (method->mandatory || time_before(jiffies, exp)) + if (method->mandatory) return; + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && + time_before(jiffies, exp)) + return; + + /* Configuration as force 'broadcast' method */ + if (bb->force_bcast) { + method->rcast = false; + return; + } + /* Configuration as force 'replicast' method */ + if (bb->force_rcast) { + method->rcast = true; + return; + } + /* Configuration as 'autoselect' or default method */ /* Determine method to use now */ method->rcast = dests <= bb->bc_threshold; } @@ -281,6 +300,63 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, return 0; } +/* tipc_mcast_send_sync - deliver a dummy message with SYN bit + * @net: the applicable net namespace + * @skb: socket buffer to copy + * @method: send method to be used + * @dests: destination nodes for message. + * @cong_link_cnt: returns number of encountered congested destination links + * Returns 0 if success, otherwise errno + */ +static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, + struct tipc_mc_method *method, + struct tipc_nlist *dests, + u16 *cong_link_cnt) +{ + struct tipc_msg *hdr, *_hdr; + struct sk_buff_head tmpq; + struct sk_buff *_skb; + + /* Is a cluster supporting with new capabilities ? */ + if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) + return 0; + + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_get_wrapped(hdr); + if (msg_type(hdr) != TIPC_MCAST_MSG) + return 0; + + /* Allocate dummy message */ + _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); + if (!_skb) + return -ENOMEM; + + /* Preparing for 'synching' header */ + msg_set_syn(hdr, 1); + + /* Copy skb's header into a dummy header */ + skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); + skb_orphan(_skb); + + /* Reverse method for dummy message */ + _hdr = buf_msg(_skb); + msg_set_size(_hdr, MCAST_H_SIZE); + msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); + + skb_queue_head_init(&tmpq); + __skb_queue_tail(&tmpq, _skb); + if (method->rcast) + tipc_bcast_xmit(net, &tmpq, cong_link_cnt); + else + tipc_rcast_xmit(net, &tmpq, dests, cong_link_cnt); + + /* This queue should normally be empty by now */ + __skb_queue_purge(&tmpq); + + return 0; +} + /* tipc_mcast_xmit - deliver message to indicated destination nodes * and to identified node local sockets * @net: the applicable net namespace @@ -296,6 +372,9 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt) { struct sk_buff_head inputq, localq; + bool rcast = method->rcast; + struct tipc_msg *hdr; + struct sk_buff *skb; int rc = 0; skb_queue_head_init(&inputq); @@ -309,6 +388,18 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, /* Send according to determined transmit method */ if (dests->remote) { tipc_bcast_select_xmit_method(net, dests->remote, method); + + skb = skb_peek(pkts); + hdr = buf_msg(skb); + if (msg_user(hdr) == MSG_FRAGMENTER) + hdr = msg_get_wrapped(hdr); + msg_set_is_rcast(hdr, method->rcast); + + /* Switch method ? */ + if (rcast != method->rcast) + tipc_mcast_send_sync(net, skb, method, + dests, cong_link_cnt); + if (method->rcast) rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); else @@ -485,10 +576,63 @@ static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) return 0; } +static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + switch (bc_mode) { + case BCLINK_MODE_BCAST: + if (!bb->bcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = true; + bb->force_rcast = false; + break; + case BCLINK_MODE_RCAST: + if (!bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = true; + break; + case BCLINK_MODE_SEL: + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + bb->force_bcast = false; + bb->force_rcast = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (!bb->bcast_support || !bb->rcast_support) + return -ENOPROTOOPT; + + if (bc_ratio > 100 || bc_ratio <= 0) + return -EINVAL; + + bb->rc_ratio = bc_ratio; + tipc_bcast_lock(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); + + return 0; +} + int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) { int err; u32 win; + u32 bc_mode; + u32 bc_ratio; struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; if (!attrs[TIPC_NLA_LINK_PROP]) @@ -498,12 +642,28 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) if (err) return err; - if (!props[TIPC_NLA_PROP_WIN]) + if (!props[TIPC_NLA_PROP_WIN] && + !props[TIPC_NLA_PROP_BROADCAST] && + !props[TIPC_NLA_PROP_BROADCAST_RATIO]) { return -EOPNOTSUPP; + } + + if (props[TIPC_NLA_PROP_BROADCAST]) { + bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]); + err = tipc_bc_link_set_broadcast_mode(net, bc_mode); + } + + if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) { + bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]); + err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio); + } - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (!err && props[TIPC_NLA_PROP_WIN]) { + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + err = tipc_bc_link_set_queue_limits(net, win); + } - return tipc_bc_link_set_queue_limits(net, win); + return err; } int tipc_bcast_init(struct net *net) @@ -529,7 +689,7 @@ int tipc_bcast_init(struct net *net) goto enomem; bb->link = l; tn->bcl = l; - bb->rc_ratio = 25; + bb->rc_ratio = 10; bb->rcast_support = true; return 0; enomem: @@ -576,3 +736,105 @@ void tipc_nlist_purge(struct tipc_nlist *nl) nl->remote = 0; nl->local = false; } + +u32 tipc_bcast_get_broadcast_mode(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + if (bb->force_bcast) + return BCLINK_MODE_BCAST; + + if (bb->force_rcast) + return BCLINK_MODE_RCAST; + + if (bb->bcast_support && bb->rcast_support) + return BCLINK_MODE_SEL; + + return 0; +} + +u32 tipc_bcast_get_broadcast_ratio(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + return bb->rc_ratio; +} + +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq) +{ + struct sk_buff *skb, *_skb, *tmp; + struct tipc_msg *hdr, *_hdr; + bool match = false; + u32 node, port; + + skb = skb_peek(inputq); + hdr = buf_msg(skb); + + if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) + return; + + node = msg_orignode(hdr); + if (node == tipc_own_addr(net)) + return; + + port = msg_origport(hdr); + + /* Has the twin SYN message already arrived ? */ + skb_queue_walk(defq, _skb) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + match = true; + break; + } + + if (!match) { + if (!msg_is_syn(hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Deliver non-SYN message from other link, otherwise queue it */ + if (!msg_is_syn(hdr)) { + if (msg_is_rcast(hdr) != msg_is_rcast(_hdr)) + return; + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Queue non-SYN/SYN message from same link */ + if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) { + __skb_dequeue(inputq); + __skb_queue_tail(defq, skb); + return; + } + + /* Matching SYN messages => return the one with data, if any */ + __skb_unlink(_skb, defq); + if (msg_data_sz(hdr)) { + kfree_skb(_skb); + } else { + __skb_dequeue(inputq); + kfree_skb(skb); + __skb_queue_tail(inputq, _skb); + } + + /* Deliver subsequent non-SYN messages from same peer */ + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (msg_orignode(_hdr) != node) + continue; + if (msg_origport(_hdr) != port) + continue; + if (msg_is_syn(_hdr)) + break; + __skb_unlink(_skb, defq); + __skb_queue_tail(inputq, _skb); + } +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 751530ab0c49..dadad953e2be 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -48,6 +48,10 @@ extern const char tipc_bclink_name[]; #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) +#define BCLINK_MODE_BCAST 0x1 +#define BCLINK_MODE_RCAST 0x2 +#define BCLINK_MODE_SEL 0x4 + struct tipc_nlist { struct list_head list; u32 self; @@ -63,11 +67,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node); /* Cookie to be used between socket and broadcast layer * @rcast: replicast (instead of broadcast) was used at previous xmit * @mandatory: broadcast/replicast indication was set by user + * @deferredq: defer queue to make message in order * @expires: re-evaluate non-mandatory transmit method if we are past this */ struct tipc_mc_method { bool rcast; bool mandatory; + struct sk_buff_head deferredq; unsigned long expires; }; @@ -92,6 +98,12 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net); +u32 tipc_bcast_get_broadcast_mode(struct net *net); +u32 tipc_bcast_get_broadcast_ratio(struct net *net); + +void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, + struct sk_buff_head *inputq); + static inline void tipc_bcast_lock(struct net *net) { spin_lock_bh(&tipc_net(net)->bclock); diff --git a/net/tipc/core.c b/net/tipc/core.c index 5b38f5164281..27cccd101ef6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -43,6 +43,7 @@ #include "net.h" #include "socket.h" #include "bcast.h" +#include "node.h" #include <linux/module.h> @@ -59,6 +60,7 @@ static int __net_init tipc_init_net(struct net *net) tn->node_addr = 0; tn->trial_addr = 0; tn->addr_trial_end = 0; + tn->capabilities = TIPC_NODE_CAPABILITIES; memset(tn->node_id, 0, sizeof(tn->node_id)); memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; diff --git a/net/tipc/core.h b/net/tipc/core.h index 8020a6c360ff..7a68e1b6a066 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -122,6 +122,9 @@ struct tipc_net { /* Topology subscription server */ struct tipc_topsrv *topsrv; atomic_t subscription_count; + + /* Cluster capabilities */ + u16 capabilities; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/link.c b/net/tipc/link.c index 341ecd796aa4..52d23b3ffaf5 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2197,6 +2197,8 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) struct nlattr *attrs; struct nlattr *prop; struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 bc_mode = tipc_bcast_get_broadcast_mode(net); + u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); struct tipc_link *bcl = tn->bcl; if (!bcl) @@ -2233,6 +2235,12 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) + goto prop_msg_full; + if (bc_mode & BCLINK_MODE_SEL) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO, + bc_ratio)) + goto prop_msg_full; nla_nest_end(msg->skb, prop); err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d7e4b8b93f9d..528ba9241acc 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -257,6 +257,16 @@ static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 18, 1, d); } +static inline bool msg_is_rcast(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 0x1); +} + +static inline void msg_set_is_rcast(struct tipc_msg *m, bool d) +{ + msg_set_bits(m, 0, 18, 0x1, d); +} + static inline void msg_set_size(struct tipc_msg *m, u32 sz) { m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 99ee419210ba..2d178df0a89f 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -110,7 +110,9 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, - [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 } + [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 } }; const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { @@ -142,114 +144,93 @@ static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, .doit = tipc_nl_bearer_disable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ENABLE, .doit = tipc_nl_bearer_enable, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_GET, .doit = tipc_nl_bearer_get, .dumpit = tipc_nl_bearer_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_ADD, .doit = tipc_nl_bearer_add, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_BEARER_SET, .doit = tipc_nl_bearer_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_SOCK_GET, .start = tipc_dump_start, .dumpit = tipc_nl_sk_dump, .done = tipc_dump_done, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PUBL_GET, .dumpit = tipc_nl_publ_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_GET, .doit = tipc_nl_node_get_link, .dumpit = tipc_nl_node_dump_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_SET, .doit = tipc_nl_node_set_link, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_LINK_RESET_STATS, .doit = tipc_nl_node_reset_link_stats, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_GET, .doit = tipc_nl_media_get, .dumpit = tipc_nl_media_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MEDIA_SET, .doit = tipc_nl_media_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NODE_GET, .dumpit = tipc_nl_node_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_GET, .dumpit = tipc_nl_net_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NET_SET, .doit = tipc_nl_net_set, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_NAME_TABLE_GET, .dumpit = tipc_nl_name_table_dump, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_SET, .doit = tipc_nl_node_set_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_GET, .doit = tipc_nl_node_get_monitor, .dumpit = tipc_nl_node_dump_monitor, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_MON_PEER_GET, .dumpit = tipc_nl_node_dump_monitor_peer, - .policy = tipc_nl_policy, }, { .cmd = TIPC_NL_PEER_REMOVE, .doit = tipc_nl_peer_rm, - .policy = tipc_nl_policy, }, #ifdef CONFIG_TIPC_MEDIA_UDP { .cmd = TIPC_NL_UDP_GET_REMOTEIP, .dumpit = tipc_udp_nl_dump_remoteip, - .policy = tipc_nl_policy, }, #endif }; @@ -259,6 +240,7 @@ struct genl_family tipc_genl_family __ro_after_init = { .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, .maxattr = TIPC_NLA_MAX, + .policy = tipc_nl_policy, .netnsok = true, .module = THIS_MODULE, .ops = tipc_genl_v2_ops, diff --git a/net/tipc/node.c b/net/tipc/node.c index dd3b6dc17662..3469b5d4ed32 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -383,6 +383,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_link_update_caps(l, capabilities); } write_unlock_bh(&n->lock); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } goto exit; } n = kzalloc(sizeof(*n), GFP_ATOMIC); @@ -433,6 +438,11 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, break; } list_add_tail_rcu(&n->list, &temp_node->list); + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } trace_tipc_node_create(n, true, " "); exit: spin_unlock_bh(&tn->node_list_lock); @@ -589,6 +599,7 @@ static void tipc_node_clear_links(struct tipc_node *node) */ static bool tipc_node_cleanup(struct tipc_node *peer) { + struct tipc_node *temp_node; struct tipc_net *tn = tipc_net(peer->net); bool deleted = false; @@ -604,6 +615,13 @@ static bool tipc_node_cleanup(struct tipc_node *peer) deleted = true; } tipc_node_write_unlock(peer); + + /* Calculate cluster capabilities */ + tn->capabilities = TIPC_NODE_CAPABILITIES; + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + tn->capabilities &= temp_node->capabilities; + } + spin_unlock_bh(&tn->node_list_lock); return deleted; } diff --git a/net/tipc/node.h b/net/tipc/node.h index 4f59a30e989a..2404225c5d58 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -51,7 +51,8 @@ enum { TIPC_BLOCK_FLOWCTL = (1 << 3), TIPC_BCAST_RCAST = (1 << 4), TIPC_NODE_ID128 = (1 << 5), - TIPC_LINK_PROTO_SEQNO = (1 << 6) + TIPC_LINK_PROTO_SEQNO = (1 << 6), + TIPC_MCAST_RBCTL = (1 << 7) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -60,7 +61,8 @@ enum { TIPC_BCAST_RCAST | \ TIPC_BLOCK_FLOWCTL | \ TIPC_NODE_ID128 | \ - TIPC_LINK_PROTO_SEQNO) + TIPC_LINK_PROTO_SEQNO | \ + TIPC_MCAST_RBCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b542f14ed444..8ac8ddf1e324 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -485,6 +485,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); + __skb_queue_head_init(&tsk->mc_method.deferredq); } trace_tipc_sk_create(sk, NULL, TIPC_DUMP_NONE, " "); @@ -582,6 +583,7 @@ static int tipc_release(struct socket *sock) sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_leave(tsk); tipc_sk_withdraw(tsk, 0, NULL); + __skb_queue_purge(&tsk->mc_method.deferredq); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); @@ -2149,6 +2151,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct net *net = sock_net(sk); struct sk_buff_head inputq; + int mtyp = msg_type(hdr); int limit, err = TIPC_OK; trace_tipc_sk_filter_rcv(sk, skb, TIPC_DUMP_ALL, " "); @@ -2162,6 +2165,9 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, if (unlikely(grp)) tipc_group_filter_msg(grp, &inputq, xmitq); + if (unlikely(!grp) && mtyp == TIPC_MCAST_MSG) + tipc_mcast_filter_msg(net, &tsk->mc_method.deferredq, &inputq); + /* Validate and add to receive buffer if there is space */ while ((skb = __skb_dequeue(&inputq))) { hdr = buf_msg(skb); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index df921a2904b9..0e24edab2535 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -469,27 +469,32 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: + optsize = sizeof(struct tls12_crypto_info_aes_gcm_128); + break; case TLS_CIPHER_AES_GCM_256: { - optsize = crypto_info->cipher_type == TLS_CIPHER_AES_GCM_128 ? - sizeof(struct tls12_crypto_info_aes_gcm_128) : - sizeof(struct tls12_crypto_info_aes_gcm_256); - if (optlen != optsize) { - rc = -EINVAL; - goto err_crypto_info; - } - rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), - optlen - sizeof(*crypto_info)); - if (rc) { - rc = -EFAULT; - goto err_crypto_info; - } + optsize = sizeof(struct tls12_crypto_info_aes_gcm_256); break; } + case TLS_CIPHER_AES_CCM_128: + optsize = sizeof(struct tls12_crypto_info_aes_ccm_128); + break; default: rc = -EINVAL; goto err_crypto_info; } + if (optlen != optsize) { + rc = -EINVAL; + goto err_crypto_info; + } + + rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), + optlen - sizeof(*crypto_info)); + if (rc) { + rc = -EFAULT; + goto err_crypto_info; + } + if (tx) { #ifdef CONFIG_TLS_DEVICE rc = tls_set_device_offload(sk, ctx); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 425351ac2a9b..4f821edeeae6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -42,8 +42,6 @@ #include <net/strparser.h> #include <net/tls.h> -#define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE - static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { @@ -225,7 +223,7 @@ static int tls_do_decryption(struct sock *sk, /* Using skb->sk to push sk through to crypto async callback * handler. This allows propagating errors up to the socket * if needed. It _must_ be cleared in the async handler - * before kfree_skb is called. We _know_ skb->sk is NULL + * before consume_skb is called. We _know_ skb->sk is NULL * because it is a clone from strparser. */ skb->sk = sk; @@ -479,11 +477,18 @@ static int tls_do_encryption(struct sock *sk, struct tls_rec *rec = ctx->open_rec; struct sk_msg *msg_en = &rec->msg_encrypted; struct scatterlist *sge = sk_msg_elem(msg_en, start); - int rc; + int rc, iv_offset = 0; + + /* For CCM based ciphers, first byte of IV is a constant */ + if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) { + rec->iv_data[0] = TLS_AES_CCM_IV_B0_BYTE; + iv_offset = 1; + } + + memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, + prot->iv_size + prot->salt_size); - memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data)); - xor_iv_with_seq(prot->version, rec->iv_data, - tls_ctx->tx.rec_seq); + xor_iv_with_seq(prot->version, rec->iv_data, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; @@ -1344,6 +1349,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout = NULL; const int data_len = rxm->full_len - prot->overhead_size + prot->tail_size; + int iv_offset = 0; if (*zc && (out_iov || out_sg)) { if (out_iov) @@ -1386,18 +1392,25 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aad = (u8 *)(sgout + n_sgout); iv = aad + prot->aad_size; + /* For CCM based ciphers, first byte of nonce+iv is always '2' */ + if (prot->cipher_type == TLS_CIPHER_AES_CCM_128) { + iv[0] = 2; + iv_offset = 1; + } + /* Prepare IV */ err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, - iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, + iv + iv_offset + prot->salt_size, prot->iv_size); if (err < 0) { kfree(mem); return err; } if (prot->version == TLS_1_3_VERSION) - memcpy(iv, tls_ctx->rx.iv, crypto_aead_ivsize(ctx->aead_recv)); + memcpy(iv + iv_offset, tls_ctx->rx.iv, + crypto_aead_ivsize(ctx->aead_recv)); else - memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq); @@ -1522,7 +1535,7 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb, rxm->full_len -= len; return false; } - kfree_skb(skb); + consume_skb(skb); } /* Finished with message */ @@ -1631,7 +1644,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, if (!is_peek) { skb_unlink(skb, &ctx->rx_list); - kfree_skb(skb); + consume_skb(skb); } skb = next_skb; @@ -2152,14 +2165,15 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct tls_crypto_info *crypto_info; struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; struct tls12_crypto_info_aes_gcm_256 *gcm_256_info; + struct tls12_crypto_info_aes_ccm_128 *ccm_128_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; struct cipher_context *cctx; struct crypto_aead **aead; struct strp_callbacks cb; - u16 nonce_size, tag_size, iv_size, rec_seq_size; + u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size; struct crypto_tfm *tfm; - char *iv, *rec_seq, *key, *salt; + char *iv, *rec_seq, *key, *salt, *cipher_name; size_t keysize; int rc = 0; @@ -2224,6 +2238,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE; key = gcm_128_info->key; salt = gcm_128_info->salt; + salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE; + cipher_name = "gcm(aes)"; break; } case TLS_CIPHER_AES_GCM_256: { @@ -2239,6 +2255,25 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE; key = gcm_256_info->key; salt = gcm_256_info->salt; + salt_size = TLS_CIPHER_AES_GCM_256_SALT_SIZE; + cipher_name = "gcm(aes)"; + break; + } + case TLS_CIPHER_AES_CCM_128: { + nonce_size = TLS_CIPHER_AES_CCM_128_IV_SIZE; + tag_size = TLS_CIPHER_AES_CCM_128_TAG_SIZE; + iv_size = TLS_CIPHER_AES_CCM_128_IV_SIZE; + iv = ((struct tls12_crypto_info_aes_ccm_128 *)crypto_info)->iv; + rec_seq_size = TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE; + rec_seq = + ((struct tls12_crypto_info_aes_ccm_128 *)crypto_info)->rec_seq; + ccm_128_info = + (struct tls12_crypto_info_aes_ccm_128 *)crypto_info; + keysize = TLS_CIPHER_AES_CCM_128_KEY_SIZE; + key = ccm_128_info->key; + salt = ccm_128_info->salt; + salt_size = TLS_CIPHER_AES_CCM_128_SALT_SIZE; + cipher_name = "ccm(aes)"; break; } default: @@ -2268,16 +2303,16 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size; prot->iv_size = iv_size; - cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - GFP_KERNEL); + prot->salt_size = salt_size; + cctx->iv = kmalloc(iv_size + salt_size, GFP_KERNEL); if (!cctx->iv) { rc = -ENOMEM; goto free_priv; } /* Note: 128 & 256 bit salt are the same size */ - memcpy(cctx->iv, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); - memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); prot->rec_seq_size = rec_seq_size; + memcpy(cctx->iv, salt, salt_size); + memcpy(cctx->iv + salt_size, iv, iv_size); cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); if (!cctx->rec_seq) { rc = -ENOMEM; @@ -2285,7 +2320,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (!*aead) { - *aead = crypto_alloc_aead("gcm(aes)", 0, 0); + *aead = crypto_alloc_aead(cipher_name, 0, 0); if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; diff --git a/net/wimax/stack.c b/net/wimax/stack.c index a6307813b6d5..b7f571e55448 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -420,25 +420,21 @@ static const struct genl_ops wimax_gnl_ops[] = { { .cmd = WIMAX_GNL_OP_MSG_FROM_USER, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_msg_from_user, }, { .cmd = WIMAX_GNL_OP_RESET, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_reset, }, { .cmd = WIMAX_GNL_OP_RFKILL, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_rfkill, }, { .cmd = WIMAX_GNL_OP_STATE_GET, .flags = GENL_ADMIN_PERM, - .policy = wimax_gnl_policy, .doit = wimax_gnl_doit_state_get, }, }; @@ -582,6 +578,7 @@ struct genl_family wimax_gnl_family __ro_after_init = { .version = WIMAX_GNL_VERSION, .hdrsize = 0, .maxattr = WIMAX_GNL_ATTR_MAX, + .policy = wimax_gnl_policy, .module = THIS_MODULE, .ops = wimax_gnl_ops, .n_ops = ARRAY_SIZE(wimax_gnl_ops), diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 25a9e3b5c154..33408ba1d7ee 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13368,7 +13368,6 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, .done = nl80211_dump_wiphy_done, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13376,7 +13375,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_WIPHY, .doit = nl80211_set_wiphy, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, }, @@ -13384,7 +13382,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_INTERFACE, .doit = nl80211_get_interface, .dumpit = nl80211_dump_interface, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13392,7 +13389,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_INTERFACE, .doit = nl80211_set_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13400,7 +13396,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NEW_INTERFACE, .doit = nl80211_new_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13408,7 +13403,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_INTERFACE, .doit = nl80211_del_interface, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13416,7 +13410,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_KEY, .doit = nl80211_get_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13424,7 +13417,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_KEY, .doit = nl80211_set_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13433,7 +13425,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NEW_KEY, .doit = nl80211_new_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13442,14 +13433,12 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_KEY, .doit = nl80211_del_key, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_BEACON, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13457,7 +13446,6 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_START_AP, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13465,7 +13453,6 @@ static const struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_STOP_AP, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | @@ -13475,14 +13462,12 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_STATION, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_STATION, .doit = nl80211_set_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13490,7 +13475,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NEW_STATION, .doit = nl80211_new_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13498,7 +13482,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_STATION, .doit = nl80211_del_station, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13507,7 +13490,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_MPATH, .doit = nl80211_get_mpath, .dumpit = nl80211_dump_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13516,7 +13498,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_MPP, .doit = nl80211_get_mpp, .dumpit = nl80211_dump_mpp, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13524,7 +13505,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MPATH, .doit = nl80211_set_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13532,7 +13512,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NEW_MPATH, .doit = nl80211_new_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13540,7 +13519,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_MPATH, .doit = nl80211_del_mpath, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13548,7 +13526,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_BSS, .doit = nl80211_set_bss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13557,7 +13534,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_REG, .doit = nl80211_get_reg_do, .dumpit = nl80211_get_reg_dump, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ }, @@ -13565,7 +13541,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_REG, .doit = nl80211_set_reg, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_RTNL, }, @@ -13573,19 +13548,16 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_REQ_SET_REG, .doit = nl80211_req_set_reg, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_RELOAD_REGDB, .doit = nl80211_reload_regdb, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_GET_MESH_CONFIG, .doit = nl80211_get_mesh_config, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13593,7 +13565,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MESH_CONFIG, .doit = nl80211_update_mesh_config, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13601,7 +13572,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TRIGGER_SCAN, .doit = nl80211_trigger_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13609,20 +13579,17 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_ABORT_SCAN, .doit = nl80211_abort_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_SCAN, - .policy = nl80211_policy, .dumpit = nl80211_dump_scan, }, { .cmd = NL80211_CMD_START_SCHED_SCAN, .doit = nl80211_start_sched_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13630,7 +13597,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_STOP_SCHED_SCAN, .doit = nl80211_stop_sched_scan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13638,7 +13604,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_AUTHENTICATE, .doit = nl80211_authenticate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13647,7 +13612,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_ASSOCIATE, .doit = nl80211_associate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13655,7 +13619,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEAUTHENTICATE, .doit = nl80211_deauthenticate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13663,7 +13626,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DISASSOCIATE, .doit = nl80211_disassociate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13671,7 +13633,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_JOIN_IBSS, .doit = nl80211_join_ibss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13679,7 +13640,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_LEAVE_IBSS, .doit = nl80211_leave_ibss, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13689,7 +13649,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_TESTMODE, .doit = nl80211_testmode_do, .dumpit = nl80211_testmode_dump, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13698,7 +13657,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CONNECT, .doit = nl80211_connect, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13706,7 +13664,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, .doit = nl80211_update_connect_params, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13714,7 +13671,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DISCONNECT, .doit = nl80211_disconnect, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13722,20 +13678,17 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_WIPHY_NETNS, .doit = nl80211_wiphy_netns, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_SURVEY, - .policy = nl80211_policy, .dumpit = nl80211_dump_survey, }, { .cmd = NL80211_CMD_SET_PMKSA, .doit = nl80211_setdel_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13743,7 +13696,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_PMKSA, .doit = nl80211_setdel_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13751,7 +13703,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_FLUSH_PMKSA, .doit = nl80211_flush_pmksa, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13759,7 +13710,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, .doit = nl80211_remain_on_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13767,7 +13717,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, .doit = nl80211_cancel_remain_on_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13775,7 +13724,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, .doit = nl80211_set_tx_bitrate_mask, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13783,7 +13731,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_REGISTER_FRAME, .doit = nl80211_register_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13791,7 +13738,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_FRAME, .doit = nl80211_tx_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13799,7 +13745,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, .doit = nl80211_tx_mgmt_cancel_wait, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13807,7 +13752,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_POWER_SAVE, .doit = nl80211_set_power_save, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13815,7 +13759,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_POWER_SAVE, .doit = nl80211_get_power_save, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13823,7 +13766,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_CQM, .doit = nl80211_set_cqm, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13831,7 +13773,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_CHANNEL, .doit = nl80211_set_channel, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13839,7 +13780,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_WDS_PEER, .doit = nl80211_set_wds_peer, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13847,7 +13787,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_JOIN_MESH, .doit = nl80211_join_mesh, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13855,7 +13794,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_LEAVE_MESH, .doit = nl80211_leave_mesh, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13863,7 +13801,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_JOIN_OCB, .doit = nl80211_join_ocb, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13871,7 +13808,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_LEAVE_OCB, .doit = nl80211_leave_ocb, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13880,7 +13816,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WOWLAN, .doit = nl80211_get_wowlan, - .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13888,7 +13823,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_WOWLAN, .doit = nl80211_set_wowlan, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13897,7 +13831,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_REKEY_OFFLOAD, .doit = nl80211_set_rekey_data, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL | @@ -13906,7 +13839,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TDLS_MGMT, .doit = nl80211_tdls_mgmt, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13914,7 +13846,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TDLS_OPER, .doit = nl80211_tdls_oper, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13922,7 +13853,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_UNEXPECTED_FRAME, .doit = nl80211_register_unexpected_frame, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13930,7 +13860,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_PROBE_CLIENT, .doit = nl80211_probe_client, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13938,7 +13867,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_REGISTER_BEACONS, .doit = nl80211_register_beacons, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -13946,7 +13874,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_NOACK_MAP, .doit = nl80211_set_noack_map, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -13954,7 +13881,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_START_P2P_DEVICE, .doit = nl80211_start_p2p_device, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13962,7 +13888,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_STOP_P2P_DEVICE, .doit = nl80211_stop_p2p_device, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13970,7 +13895,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_START_NAN, .doit = nl80211_start_nan, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, @@ -13978,7 +13902,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_STOP_NAN, .doit = nl80211_stop_nan, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13986,7 +13909,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_ADD_NAN_FUNCTION, .doit = nl80211_nan_add_func, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -13994,7 +13916,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_NAN_FUNCTION, .doit = nl80211_nan_del_func, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14002,7 +13923,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, .doit = nl80211_nan_change_config, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14010,7 +13930,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -14018,7 +13937,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MAC_ACL, .doit = nl80211_set_mac_acl, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -14026,7 +13944,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_RADAR_DETECT, .doit = nl80211_start_radar_detection, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14034,12 +13951,10 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, .doit = nl80211_get_protocol_features, - .policy = nl80211_policy, }, { .cmd = NL80211_CMD_UPDATE_FT_IES, .doit = nl80211_update_ft_ies, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14047,7 +13962,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CRIT_PROTOCOL_START, .doit = nl80211_crit_protocol_start, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14055,7 +13969,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, .doit = nl80211_crit_protocol_stop, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14063,14 +13976,12 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_COALESCE, .doit = nl80211_get_coalesce, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_COALESCE, .doit = nl80211_set_coalesce, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -14078,7 +13989,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CHANNEL_SWITCH, .doit = nl80211_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14087,7 +13997,6 @@ static const struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_VENDOR, .doit = nl80211_vendor_cmd, .dumpit = nl80211_vendor_cmd_dump, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, @@ -14095,7 +14004,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_QOS_MAP, .doit = nl80211_set_qos_map, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14103,7 +14011,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_ADD_TX_TS, .doit = nl80211_add_tx_ts, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14111,7 +14018,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_DEL_TX_TS, .doit = nl80211_del_tx_ts, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14119,7 +14025,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, .doit = nl80211_tdls_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14127,7 +14032,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, .doit = nl80211_tdls_cancel_channel_switch, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14135,7 +14039,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, .doit = nl80211_set_multicast_to_unicast, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, @@ -14143,21 +14046,18 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_SET_PMK, .doit = nl80211_set_pmk, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_DEL_PMK, .doit = nl80211_del_pmk, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_EXTERNAL_AUTH, .doit = nl80211_external_auth, - .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14165,7 +14065,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_CONTROL_PORT_FRAME, .doit = nl80211_tx_control_port, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14173,14 +14072,12 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, .doit = nl80211_get_ftm_responder_stats, - .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, .doit = nl80211_pmsr_start, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14188,7 +14085,6 @@ static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_NOTIFY_RADAR, .doit = nl80211_notify_radar_detection, - .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, @@ -14200,6 +14096,7 @@ static struct genl_family nl80211_fam __ro_after_init = { .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, + .policy = nl80211_policy, .netnsok = true, .pre_doit = nl80211_pre_doit, .post_doit = nl80211_post_doit, diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index b8736f56e7f7..2db1626557c5 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -247,7 +247,7 @@ void xfrm_dev_resume(struct sk_buff *skb) unsigned long flags; rcu_read_lock(); - txq = netdev_pick_tx(dev, skb, NULL); + txq = netdev_core_pick_tx(dev, skb, NULL); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index dbb817dbacfc..59e40998e249 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -44,5 +44,6 @@ xdp_redirect_cpu xdp_redirect_map xdp_router_ipv4 xdp_rxq_info +xdp_sample_pkts xdp_tx_iptunnel xdpsock diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 929c8e537a14..837024512baf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1478,13 +1478,27 @@ union bpf_attr { * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. * - * There is a single supported mode at this time: + * There are two supported modes at this time: + * + * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer + * (room space is added or removed below the layer 2 header). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer * (room space is added or removed below the layer 3 header). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The following flags are supported at this time: + * + * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size. + * Adjusting mss in this way is not allowed for datagrams. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 **: + * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 **: + * Any new space is reserved to hold a tunnel header. + * Configure skb offsets and other fields accordingly. + * + * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE **: + * * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **: + * Use with ENCAP_L3 flags to further specify the tunnel type. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2431,6 +2445,38 @@ union bpf_attr { * Return * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. + * + * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-**NULL**, released via **bpf_sk_release**\ (). + * + * This function is identical to bpf_sk_lookup_tcp, except that it + * also returns timewait or request sockets. Use bpf_sk_fullsock + * or bpf_tcp_socket to access the full structure. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. + * + * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Check whether iph and th contain a valid SYN cookie ACK for + * the listening socket in sk. + * + * iph points to the start of the IPv4 or IPv6 header, while + * iph_len contains sizeof(struct iphdr) or sizeof(struct ip6hdr). + * + * th points to the start of the TCP header, while th_len contains + * sizeof(struct tcphdr). + * + * Return + * 0 if iph and th are a valid SYN cookie ACK, or a negative error + * otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2531,7 +2577,9 @@ union bpf_attr { FN(sk_fullsock), \ FN(tcp_sock), \ FN(skb_ecn_set_ce), \ - FN(get_listener_sock), + FN(get_listener_sock), \ + FN(skc_lookup_tcp), \ + FN(tcp_check_syncookie), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2590,9 +2638,18 @@ enum bpf_func_id { /* Current network namespace */ #define BPF_F_CURRENT_NETNS (-1L) +/* BPF_FUNC_skb_adjust_room flags. */ +#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) + +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) +#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) +#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) +#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, + BPF_ADJ_ROOM_MAC, }; /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */ diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 3b74d23fffab..41e8a689aa77 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -30,4 +30,5 @@ test_netcnt test_section_names test_tcpnotify_user test_libbpf +test_tcp_check_syncookie_user alu32 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2aed37ea61a4..77b73b892136 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -51,7 +51,10 @@ TEST_PROGS := test_kmod.sh \ test_skb_cgroup_id.sh \ test_flow_dissector.sh \ test_xdp_vlan.sh \ - test_lwt_ip_encap.sh + test_lwt_ip_encap.sh \ + test_tcp_check_syncookie.sh \ + test_tc_tunnel.sh \ + test_tc_edt.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ @@ -60,7 +63,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ - flow_dissector_load test_flow_dissector + flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user include ../lib.mk @@ -69,7 +72,7 @@ TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c - $(CC) -o $@ -static $< -Wl,--build-id + $(CC) -o $@ $< -Wl,--build-id BPFOBJ := $(OUTPUT)/libbpf.a diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index c81fc350f7ad..97d140961438 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -159,6 +159,11 @@ static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_tcp; +static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned long long netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_skc_lookup_tcp; static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, int size, unsigned long long netns_id, @@ -184,6 +189,9 @@ static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = (void *) BPF_FUNC_get_listener_sock; static int (*bpf_skb_ecn_set_ce)(void *ctx) = (void *) BPF_FUNC_skb_ecn_set_ce; +static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, + void *ip, int ip_len, void *tcp, int tcp_len) = + (void *) BPF_FUNC_tcp_check_syncookie; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -274,6 +282,9 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #elif defined(__TARGET_ARCH_s930x) #define bpf_target_s930x #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm) + #define bpf_target_arm + #define bpf_target_defined #elif defined(__TARGET_ARCH_arm64) #define bpf_target_arm64 #define bpf_target_defined @@ -296,6 +307,8 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #define bpf_target_x86 #elif defined(__s390x__) #define bpf_target_s930x +#elif defined(__arm__) + #define bpf_target_arm #elif defined(__aarch64__) #define bpf_target_arm64 #elif defined(__mips__) @@ -333,6 +346,19 @@ static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, #define PT_REGS_SP(x) ((x)->gprs[15]) #define PT_REGS_IP(x) ((x)->psw.addr) +#elif defined(bpf_target_arm) + +#define PT_REGS_PARM1(x) ((x)->uregs[0]) +#define PT_REGS_PARM2(x) ((x)->uregs[1]) +#define PT_REGS_PARM3(x) ((x)->uregs[2]) +#define PT_REGS_PARM4(x) ((x)->uregs[3]) +#define PT_REGS_PARM5(x) ((x)->uregs[4]) +#define PT_REGS_RET(x) ((x)->uregs[14]) +#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->uregs[0]) +#define PT_REGS_SP(x) ((x)->uregs[13]) +#define PT_REGS_IP(x) ((x)->uregs[12]) + #elif defined(bpf_target_arm64) #define PT_REGS_PARM1(x) ((x)->regs[0]) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 37f947ec44ed..a42f4fc4dc11 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -23,3 +23,5 @@ CONFIG_LWTUNNEL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_XDP_SOCKETS=y CONFIG_FTRACE_SYSCALLS=y +CONFIG_IPV6_TUNNEL=y +CONFIG_IPV6_GRE=y diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 8a114bb1c379..1c1a2f75f3d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -1,13 +1,25 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +static __u64 read_perf_max_sample_freq(void) +{ + __u64 sample_freq = 5000; /* fallback to 5000 on error */ + FILE *f; + + f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r"); + if (f == NULL) + return sample_freq; + fscanf(f, "%llu", &sample_freq); + fclose(f); + return sample_freq; +} + void test_stacktrace_build_id_nmi(void) { int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; const char *file = "./test_stacktrace_build_id.o"; int err, pmu_fd, prog_fd; struct perf_event_attr attr = { - .sample_freq = 5000, .freq = 1, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -20,6 +32,8 @@ void test_stacktrace_build_id_nmi(void) int build_id_matches = 0; int retry = 1; + attr.sample_freq = read_perf_max_sample_freq(); + retry: err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c new file mode 100644 index 000000000000..3af64c470d64 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +/* the maximum delay we are willing to add (drop packets beyond that) */ +#define TIME_HORIZON_NS (2000 * 1000 * 1000) +#define NS_PER_SEC 1000000000 +#define ECN_HORIZON_NS 5000000 +#define THROTTLE_RATE_BPS (5 * 1000 * 1000) + +/* flow_key => last_tstamp timestamp used */ +struct bpf_map_def SEC("maps") flow_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(uint32_t), + .value_size = sizeof(uint64_t), + .max_entries = 1, +}; + +static inline int throttle_flow(struct __sk_buff *skb) +{ + int key = 0; + uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key); + uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / + THROTTLE_RATE_BPS; + uint64_t now = bpf_ktime_get_ns(); + uint64_t tstamp, next_tstamp = 0; + + if (last_tstamp) + next_tstamp = *last_tstamp + delay_ns; + + tstamp = skb->tstamp; + if (tstamp < now) + tstamp = now; + + /* should we throttle? */ + if (next_tstamp <= tstamp) { + if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY)) + return TC_ACT_SHOT; + return TC_ACT_OK; + } + + /* do not queue past the time horizon */ + if (next_tstamp - now >= TIME_HORIZON_NS) + return TC_ACT_SHOT; + + /* set ecn bit, if needed */ + if (next_tstamp - now >= ECN_HORIZON_NS) + bpf_skb_ecn_set_ce(skb); + + if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST)) + return TC_ACT_SHOT; + skb->tstamp = next_tstamp; + + return TC_ACT_OK; +} + +static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp) +{ + void *data_end = (void *)(long)skb->data_end; + + /* drop malformed packets */ + if ((void *)(tcp + 1) > data_end) + return TC_ACT_SHOT; + + if (tcp->dest == bpf_htons(9000)) + return throttle_flow(skb); + + return TC_ACT_OK; +} + +static inline int handle_ipv4(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct iphdr *iph; + uint32_t ihl; + + /* drop malformed packets */ + if (data + sizeof(struct ethhdr) > data_end) + return TC_ACT_SHOT; + iph = (struct iphdr *)(data + sizeof(struct ethhdr)); + if ((void *)(iph + 1) > data_end) + return TC_ACT_SHOT; + ihl = iph->ihl * 4; + if (((void *)iph) + ihl > data_end) + return TC_ACT_SHOT; + + if (iph->protocol == IPPROTO_TCP) + return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl)); + + return TC_ACT_OK; +} + +SEC("cls_test") int tc_prog(struct __sk_buff *skb) +{ + if (skb->protocol == bpf_htons(ETH_P_IP)) + return handle_ipv4(skb); + + return TC_ACT_OK; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c new file mode 100644 index 000000000000..f541c2de947d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* In-place tunneling */ + +#include <stdbool.h> +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include <linux/types.h> + +#include "bpf_endian.h" +#include "bpf_helpers.h" + +static const int cfg_port = 8000; + +struct grev4hdr { + struct iphdr ip; + __be16 flags; + __be16 protocol; +} __attribute__((packed)); + +struct grev6hdr { + struct ipv6hdr ip; + __be16 flags; + __be16 protocol; +} __attribute__((packed)); + +static __always_inline void set_ipv4_csum(struct iphdr *iph) +{ + __u16 *iph16 = (__u16 *)iph; + __u32 csum; + int i; + + iph->check = 0; + +#pragma clang loop unroll(full) + for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++) + csum += *iph16++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre) +{ + struct grev4hdr h_outer; + struct iphdr iph_inner; + struct tcphdr tcph; + __u64 flags; + int olen; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, + sizeof(iph_inner)) < 0) + return TC_ACT_OK; + + /* filter only packets we want */ + if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP) + return TC_ACT_OK; + + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), + &tcph, sizeof(tcph)) < 0) + return TC_ACT_OK; + + if (tcph.dest != __bpf_constant_htons(cfg_port)) + return TC_ACT_OK; + + flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4; + if (with_gre) { + flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; + olen = sizeof(h_outer); + } else { + olen = sizeof(h_outer.ip); + } + + /* add room between mac and network header */ + if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) + return TC_ACT_SHOT; + + /* prepare new outer network header */ + h_outer.ip = iph_inner; + h_outer.ip.tot_len = bpf_htons(olen + + bpf_htons(h_outer.ip.tot_len)); + if (with_gre) { + h_outer.ip.protocol = IPPROTO_GRE; + h_outer.protocol = bpf_htons(ETH_P_IP); + h_outer.flags = 0; + } else { + h_outer.ip.protocol = IPPROTO_IPIP; + } + + set_ipv4_csum((void *)&h_outer.ip); + + /* store new outer network header */ + if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, + BPF_F_INVALIDATE_HASH) < 0) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre) +{ + struct ipv6hdr iph_inner; + struct grev6hdr h_outer; + struct tcphdr tcph; + __u64 flags; + int olen; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner, + sizeof(iph_inner)) < 0) + return TC_ACT_OK; + + /* filter only packets we want */ + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner), + &tcph, sizeof(tcph)) < 0) + return TC_ACT_OK; + + if (tcph.dest != __bpf_constant_htons(cfg_port)) + return TC_ACT_OK; + + flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6; + if (with_gre) { + flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE; + olen = sizeof(h_outer); + } else { + olen = sizeof(h_outer.ip); + } + + + /* add room between mac and network header */ + if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags)) + return TC_ACT_SHOT; + + /* prepare new outer network header */ + h_outer.ip = iph_inner; + h_outer.ip.payload_len = bpf_htons(olen + + bpf_ntohs(h_outer.ip.payload_len)); + if (with_gre) { + h_outer.ip.nexthdr = IPPROTO_GRE; + h_outer.protocol = bpf_htons(ETH_P_IPV6); + h_outer.flags = 0; + } else { + h_outer.ip.nexthdr = IPPROTO_IPV6; + } + + /* store new outer network header */ + if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen, + BPF_F_INVALIDATE_HASH) < 0) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +SEC("encap_ipip") +int __encap_ipip(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, false); + else + return TC_ACT_OK; +} + +SEC("encap_gre") +int __encap_gre(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) + return encap_ipv4(skb, true); + else + return TC_ACT_OK; +} + +SEC("encap_ip6tnl") +int __encap_ip6tnl(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, false); + else + return TC_ACT_OK; +} + +SEC("encap_ip6gre") +int __encap_ip6gre(struct __sk_buff *skb) +{ + if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) + return encap_ipv6(skb, true); + else + return TC_ACT_OK; +} + +static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) +{ + char buf[sizeof(struct grev6hdr)]; + int olen; + + switch (proto) { + case IPPROTO_IPIP: + case IPPROTO_IPV6: + olen = len; + break; + case IPPROTO_GRE: + olen = len + 4 /* gre hdr */; + break; + default: + return TC_ACT_OK; + } + + if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO)) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +static int decap_ipv4(struct __sk_buff *skb) +{ + struct iphdr iph_outer; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, + sizeof(iph_outer)) < 0) + return TC_ACT_OK; + + if (iph_outer.ihl != 5) + return TC_ACT_OK; + + return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), + iph_outer.protocol); +} + +static int decap_ipv6(struct __sk_buff *skb) +{ + struct ipv6hdr iph_outer; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, + sizeof(iph_outer)) < 0) + return TC_ACT_OK; + + return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), + iph_outer.nexthdr); +} + +SEC("decap") +int decap_f(struct __sk_buff *skb) +{ + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + return decap_ipv4(skb); + case __bpf_constant_htons(ETH_P_IPV6): + return decap_ipv6(skb); + default: + /* does not match, ignore */ + return TC_ACT_OK; + } +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c new file mode 100644 index 000000000000..1ab095bcacd8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook +// Copyright (c) 2019 Cloudflare + +#include <string.h> + +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <sys/socket.h> +#include <linux/tcp.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +struct bpf_map_def SEC("maps") results = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +static __always_inline void check_syncookie(void *ctx, void *data, + void *data_end) +{ + struct bpf_sock_tuple tup; + struct bpf_sock *sk; + struct ethhdr *ethh; + struct iphdr *ipv4h; + struct ipv6hdr *ipv6h; + struct tcphdr *tcph; + int ret; + __u32 key = 0; + __u64 value = 1; + + ethh = data; + if (ethh + 1 > data_end) + return; + + switch (bpf_ntohs(ethh->h_proto)) { + case ETH_P_IP: + ipv4h = data + sizeof(struct ethhdr); + if (ipv4h + 1 > data_end) + return; + + if (ipv4h->ihl != 5) + return; + + tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr); + if (tcph + 1 > data_end) + return; + + tup.ipv4.saddr = ipv4h->saddr; + tup.ipv4.daddr = ipv4h->daddr; + tup.ipv4.sport = tcph->source; + tup.ipv4.dport = tcph->dest; + + sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4), + BPF_F_CURRENT_NETNS, 0); + if (!sk) + return; + + if (sk->state != BPF_TCP_LISTEN) + goto release; + + ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h), + tcph, sizeof(*tcph)); + break; + + case ETH_P_IPV6: + ipv6h = data + sizeof(struct ethhdr); + if (ipv6h + 1 > data_end) + return; + + if (ipv6h->nexthdr != IPPROTO_TCP) + return; + + tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + if (tcph + 1 > data_end) + return; + + memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr)); + memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr)); + tup.ipv6.sport = tcph->source; + tup.ipv6.dport = tcph->dest; + + sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6), + BPF_F_CURRENT_NETNS, 0); + if (!sk) + return; + + if (sk->state != BPF_TCP_LISTEN) + goto release; + + ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h), + tcph, sizeof(*tcph)); + break; + + default: + return; + } + + if (ret == 0) + bpf_map_update_elem(&results, &key, &value, 0); + +release: + bpf_sk_release(sk); +} + +SEC("clsact/check_syncookie") +int check_syncookie_clsact(struct __sk_buff *skb) +{ + check_syncookie(skb, (void *)(long)skb->data, + (void *)(long)skb->data_end); + return TC_ACT_OK; +} + +SEC("xdp/check_syncookie") +int check_syncookie_xdp(struct xdp_md *ctx) +{ + check_syncookie(ctx, (void *)(long)ctx->data, + (void *)(long)ctx->data_end); + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh new file mode 100755 index 000000000000..f38567ef694b --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_edt.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test installs a TC bpf program that throttles a TCP flow +# with dst port = 9000 down to 5MBps. Then it measures actual +# throughput of the flow. + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +# check that nc, dd, and timeout are present +command -v nc >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v dd >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v timeout >/dev/null 2>&1 || \ + { echo >&2 "timeout is not available"; exit 1; } + +readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" +readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" + +readonly IP_SRC="172.16.1.100" +readonly IP_DST="172.16.2.100" + +cleanup() +{ + ip netns del ${NS_SRC} + ip netns del ${NS_DST} +} + +trap cleanup EXIT + +set -e # exit on error + +ip netns add "${NS_SRC}" +ip netns add "${NS_DST}" +ip link add veth_src type veth peer name veth_dst +ip link set veth_src netns ${NS_SRC} +ip link set veth_dst netns ${NS_DST} + +ip -netns ${NS_SRC} addr add ${IP_SRC}/24 dev veth_src +ip -netns ${NS_DST} addr add ${IP_DST}/24 dev veth_dst + +ip -netns ${NS_SRC} link set dev veth_src up +ip -netns ${NS_DST} link set dev veth_dst up + +ip -netns ${NS_SRC} route add ${IP_DST}/32 dev veth_src +ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst + +# set up TC on TX +ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq +ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact +ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ + bpf da obj test_tc_edt.o sec cls_test + + +# start the listener +ip netns exec ${NS_DST} bash -c \ + "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &" +declare -i NC_PID=$! +sleep 1 + +declare -ir TIMEOUT=20 +declare -ir EXPECTED_BPS=5000000 + +# run the load, capture RX bytes on DST +declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \ + cat /sys/class/net/veth_dst/statistics/rx_bytes ) + +set +e +ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \ + bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null" +set -e + +declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \ + cat /sys/class/net/veth_dst/statistics/rx_bytes ) + +declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT )) + +echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \ + awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n", + $1, ($2-$3)*100.0/$3}' + +# Pass the test if the actual bps is within 1% of the expected bps. +# The difference is usually about 0.1% on a 20-sec test, and ==> zero +# the longer the test runs. +declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \ + awk 'function abs(x){return ((x < 0.0) ? -x : x)} + {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" } + else { print "0"} }' ) +if [ "${RES}" == "0" ] ; then + echo "PASS" +else + echo "FAIL" + exit 1 +fi diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh new file mode 100755 index 000000000000..c805adb88f3a --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -0,0 +1,186 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# In-place tunneling + +# must match the port that the bpf program filters on +readonly port=8000 + +readonly ns_prefix="ns-$$-" +readonly ns1="${ns_prefix}1" +readonly ns2="${ns_prefix}2" + +readonly ns1_v4=192.168.1.1 +readonly ns2_v4=192.168.1.2 +readonly ns1_v6=fd::1 +readonly ns2_v6=fd::2 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" + +setup() { + ip netns add "${ns1}" + ip netns add "${ns2}" + + ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \ + peer name veth2 mtu 1500 netns "${ns2}" + + ip netns exec "${ns1}" ethtool -K veth1 tso off + + ip -netns "${ns1}" link set veth1 up + ip -netns "${ns2}" link set veth2 up + + ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1 + ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2 + ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad + ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad + + # clamp route to reserve room for tunnel headers + ip -netns "${ns1}" -4 route flush table main + ip -netns "${ns1}" -6 route flush table main + ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1 + ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { + ip netns del "${ns2}" + ip netns del "${ns1}" + + if [[ -f "${outfile}" ]]; then + rm "${outfile}" + fi + if [[ -f "${infile}" ]]; then + rm "${infile}" + fi +} + +server_listen() { + ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" & + server_pid=$! + sleep 0.2 +} + +client_connect() { + ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}" + echo $? +} + +verify_data() { + wait "${server_pid}" + # sha1sum returns two fields [sha1] [filepath] + # convert to bash array and access first elem + insum=($(sha1sum ${infile})) + outsum=($(sha1sum ${outfile})) + if [[ "${insum[0]}" != "${outsum[0]}" ]]; then + echo "data mismatch" + exit 1 + fi +} + +set -e + +# no arguments: automated test, run all +if [[ "$#" -eq "0" ]]; then + echo "ipip" + $0 ipv4 ipip 100 + + echo "ip6ip6" + $0 ipv6 ip6tnl 100 + + echo "ip gre" + $0 ipv4 gre 100 + + echo "ip6 gre" + $0 ipv6 ip6gre 100 + + echo "ip gre gso" + $0 ipv4 gre 2000 + + echo "ip6 gre gso" + $0 ipv6 ip6gre 2000 + + echo "OK. All tests passed" + exit 0 +fi + +if [[ "$#" -ne "3" ]]; then + echo "Usage: $0" + echo " or: $0 <ipv4|ipv6> <tuntype> <data_len>" + exit 1 +fi + +case "$1" in +"ipv4") + readonly addr1="${ns1_v4}" + readonly addr2="${ns2_v4}" + readonly netcat_opt=-4 + ;; +"ipv6") + readonly addr1="${ns1_v6}" + readonly addr2="${ns2_v6}" + readonly netcat_opt=-6 + ;; +*) + echo "unknown arg: $1" + exit 1 + ;; +esac + +readonly tuntype=$2 +readonly datalen=$3 + +echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}" + +trap cleanup EXIT + +setup + +# basic communication works +echo "test basic connectivity" +server_listen +client_connect +verify_data + +# clientside, insert bpf program to encap all TCP to port ${port} +# client can no longer connect +ip netns exec "${ns1}" tc qdisc add dev veth1 clsact +ip netns exec "${ns1}" tc filter add dev veth1 egress \ + bpf direct-action object-file ./test_tc_tunnel.o \ + section "encap_${tuntype}" +echo "test bpf encap without decap (expect failure)" +server_listen +! client_connect + +# serverside, insert decap module +# server is still running +# client can connect again +ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \ + remote "${addr1}" local "${addr2}" +# Because packets are decapped by the tunnel they arrive on testtun0 from +# the IP stack perspective. Ensure reverse path filtering is disabled +# otherwise we drop the TCP SYN as arriving on testtun0 instead of the +# expected veth2 (veth2 is where 192.168.1.2 is configured). +ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 +# rp needs to be disabled for both all and testtun0 as the rp value is +# selected as the max of the "all" and device-specific values. +ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0 +ip netns exec "${ns2}" ip link set dev testtun0 up +echo "test bpf encap with tunnel device decap" +client_connect +verify_data + +# serverside, use BPF for decap +ip netns exec "${ns2}" ip link del dev testtun0 +ip netns exec "${ns2}" tc qdisc add dev veth2 clsact +ip netns exec "${ns2}" tc filter add dev veth2 ingress \ + bpf direct-action object-file ./test_tc_tunnel.o section decap +server_listen +echo "test bpf encap with bpf decap" +client_connect +verify_data + +echo OK diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh new file mode 100755 index 000000000000..d48e51716d19 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh @@ -0,0 +1,81 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018 Facebook +# Copyright (c) 2019 Cloudflare + +set -eu + +wait_for_ip() +{ + local _i + printf "Wait for IP %s to become available " "$1" + for _i in $(seq ${MAX_PING_TRIES}); do + printf "." + if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then + echo " OK" + return + fi + sleep 1 + done + echo 1>&2 "ERROR: Timeout waiting for test IP to become available." + exit 1 +} + +get_prog_id() +{ + awk '/ id / {sub(/.* id /, "", $0); print($1)}' +} + +ns1_exec() +{ + ip netns exec ns1 "$@" +} + +setup() +{ + ip netns add ns1 + ns1_exec ip link set lo up + + ns1_exec sysctl -w net.ipv4.tcp_syncookies=2 + + wait_for_ip 127.0.0.1 + wait_for_ip ::1 +} + +cleanup() +{ + ip netns del ns1 2>/dev/null || : +} + +main() +{ + trap cleanup EXIT 2 3 6 15 + setup + + printf "Testing clsact..." + ns1_exec tc qdisc add dev "${TEST_IF}" clsact + ns1_exec tc filter add dev "${TEST_IF}" ingress \ + bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da + + BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \ + get_prog_id) + ns1_exec "${PROG}" "${BPF_PROG_ID}" + ns1_exec tc qdisc del dev "${TEST_IF}" clsact + + printf "Testing XDP..." + ns1_exec ip link set "${TEST_IF}" xdp \ + object "${BPF_PROG_OBJ}" section "${XDP_SECTION}" + BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id) + ns1_exec "${PROG}" "${BPF_PROG_ID}" +} + +DIR=$(dirname $0) +TEST_IF=lo +MAX_PING_TRIES=5 +BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o" +CLSACT_SECTION="clsact/check_syncookie" +XDP_SECTION="xdp/check_syncookie" +BPF_PROG_ID=0 +PROG="${DIR}/test_tcp_check_syncookie_user" + +main diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c new file mode 100644 index 000000000000..87829c86c746 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook +// Copyright (c) 2019 Cloudflare + +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_rlimit.h" +#include "cgroup_helpers.h" + +static int start_server(const struct sockaddr *addr, socklen_t len) +{ + int fd; + + fd = socket(addr->sa_family, SOCK_STREAM, 0); + if (fd == -1) { + log_err("Failed to create server socket"); + goto out; + } + + if (bind(fd, addr, len) == -1) { + log_err("Failed to bind server socket"); + goto close_out; + } + + if (listen(fd, 128) == -1) { + log_err("Failed to listen on server socket"); + goto close_out; + } + + goto out; + +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int connect_to_server(int server_fd) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd = -1; + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + goto out; + } + + fd = socket(addr.ss_family, SOCK_STREAM, 0); + if (fd == -1) { + log_err("Failed to create client socket"); + goto out; + } + + if (connect(fd, (const struct sockaddr *)&addr, len) == -1) { + log_err("Fail to connect to server"); + goto close_out; + } + + goto out; + +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int get_map_fd_by_prog_id(int prog_id) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 map_ids[1]; + int prog_fd = -1; + int map_fd = -1; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + log_err("Failed to get fd by prog id %d", prog_id); + goto err; + } + + info.nr_map_ids = 1; + info.map_ids = (__u64)(unsigned long)map_ids; + + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + log_err("Failed to get info by prog fd %d", prog_fd); + goto err; + } + + if (!info.nr_map_ids) { + log_err("No maps found for prog fd %d", prog_fd); + goto err; + } + + map_fd = bpf_map_get_fd_by_id(map_ids[0]); + if (map_fd < 0) + log_err("Failed to get fd by map id %d", map_ids[0]); +err: + if (prog_fd >= 0) + close(prog_fd); + return map_fd; +} + +static int run_test(int server_fd, int results_fd) +{ + int client = -1, srv_client = -1; + int ret = 0; + __u32 key = 0; + __u64 value = 0; + + if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) { + log_err("Can't clear results"); + goto err; + } + + client = connect_to_server(server_fd); + if (client == -1) + goto err; + + srv_client = accept(server_fd, NULL, 0); + if (srv_client == -1) { + log_err("Can't accept connection"); + goto err; + } + + if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) { + log_err("Can't lookup result"); + goto err; + } + + if (value != 1) { + log_err("Didn't match syncookie: %llu", value); + goto err; + } + + goto out; + +err: + ret = 1; +out: + close(client); + close(srv_client); + return ret; +} + +int main(int argc, char **argv) +{ + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + int server = -1; + int server_v6 = -1; + int results = -1; + int err = 0; + + if (argc < 2) { + fprintf(stderr, "Usage: %s prog_id\n", argv[0]); + exit(1); + } + + results = get_map_fd_by_prog_id(atoi(argv[1])); + if (results < 0) { + log_err("Can't get map"); + goto err; + } + + memset(&addr4, 0, sizeof(addr4)); + addr4.sin_family = AF_INET; + addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr4.sin_port = 0; + + memset(&addr6, 0, sizeof(addr6)); + addr6.sin6_family = AF_INET6; + addr6.sin6_addr = in6addr_loopback; + addr6.sin6_port = 0; + + server = start_server((const struct sockaddr *)&addr4, sizeof(addr4)); + if (server == -1) + goto err; + + server_v6 = start_server((const struct sockaddr *)&addr6, + sizeof(addr6)); + if (server_v6 == -1) + goto err; + + if (run_test(server, results)) + goto err; + + if (run_test(server_v6, results)) + goto err; + + printf("ok\n"); + goto out; +err: + err = 1; +out: + close(server); + close(server_v6); + close(results); + return err; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 477a9dcf9fff..19b5d03acc2a 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -198,7 +198,7 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) } /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ -#define BPF_SK_LOOKUP \ +#define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ BPF_MOV64_IMM(BPF_REG_2, 0), \ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \ @@ -207,13 +207,13 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \ - /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */ \ + /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */ \ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \ BPF_MOV64_IMM(BPF_REG_4, 0), \ BPF_MOV64_IMM(BPF_REG_5, 0), \ - BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp) + BPF_EMIT_CALL(BPF_FUNC_ ## func) /* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return * value into 0 and does necessary preparation for direct packet access diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c index 9de8b7cb4e6d..db781052758d 100644 --- a/tools/testing/selftests/bpf/urandom_read.c +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -7,11 +7,19 @@ #define BUF_SIZE 256 +static __attribute__((noinline)) +void urandom_read(int fd, int count) +{ + char buf[BUF_SIZE]; + int i; + + for (i = 0; i < count; ++i) + read(fd, buf, BUF_SIZE); +} + int main(int argc, char *argv[]) { int fd = open("/dev/urandom", O_RDONLY); - int i; - char buf[BUF_SIZE]; int count = 4; if (fd < 0) @@ -20,8 +28,7 @@ int main(int argc, char *argv[]) if (argc == 2) count = atoi(argv[1]); - for (i = 0; i < count; ++i) - read(fd, buf, BUF_SIZE); + urandom_read(fd, count); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 923f2110072d..ebcbf154c460 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -1,7 +1,18 @@ { "reference tracking: leak potential reference", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: leak potential reference to sock_common", + .insns = { + BPF_SK_LOOKUP(skc_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ BPF_EXIT_INSN(), }, @@ -12,7 +23,7 @@ { "reference tracking: leak potential reference on stack", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), @@ -26,7 +37,7 @@ { "reference tracking: leak potential reference on stack 2", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), @@ -41,7 +52,18 @@ { "reference tracking: zero potential reference", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: zero potential reference to sock_common", + .insns = { + BPF_SK_LOOKUP(skc_lookup_tcp), BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ BPF_EXIT_INSN(), }, @@ -52,7 +74,7 @@ { "reference tracking: copy and zero potential references", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */ @@ -65,7 +87,7 @@ { "reference tracking: release reference without check", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* reference in r0 may be NULL */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_2, 0), @@ -77,9 +99,35 @@ .result = REJECT, }, { + "reference tracking: release reference to sock_common without check", + .insns = { + BPF_SK_LOOKUP(skc_lookup_tcp), + /* reference in r0 may be NULL */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=sock_common_or_null expected=sock", + .result = REJECT, +}, +{ "reference tracking: release reference", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: release reference to sock_common", + .insns = { + BPF_SK_LOOKUP(skc_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_EMIT_CALL(BPF_FUNC_sk_release), @@ -91,7 +139,7 @@ { "reference tracking: release reference 2", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), @@ -104,7 +152,7 @@ { "reference tracking: release reference twice", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), @@ -120,7 +168,7 @@ { "reference tracking: release reference twice inside branch", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */ @@ -147,7 +195,7 @@ BPF_EXIT_INSN(), BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, offsetof(struct __sk_buff, mark)), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */ /* Leak reference in R0 */ BPF_EXIT_INSN(), @@ -175,7 +223,7 @@ BPF_EXIT_INSN(), BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, offsetof(struct __sk_buff, mark)), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), @@ -193,7 +241,7 @@ { "reference tracking in call: free reference in subprog", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -211,7 +259,7 @@ { "reference tracking in call: free reference in subprog and outside", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), @@ -241,7 +289,7 @@ /* subprog 1 */ BPF_MOV64_REG(BPF_REG_6, BPF_REG_4), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* spill unchecked sk_ptr into stack of caller */ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), @@ -262,7 +310,7 @@ BPF_EXIT_INSN(), /* subprog 1 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_EXIT_INSN(), /* return sk */ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -291,7 +339,7 @@ BPF_EXIT_INSN(), /* subprog 2 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -324,7 +372,7 @@ BPF_EXIT_INSN(), /* subprog 2 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -334,7 +382,7 @@ "reference tracking: allow LD_ABS", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_EMIT_CALL(BPF_FUNC_sk_release), @@ -350,7 +398,7 @@ "reference tracking: forbid LD_ABS while holding reference", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_LD_ABS(BPF_B, 0), BPF_LD_ABS(BPF_H, 0), BPF_LD_ABS(BPF_W, 0), @@ -367,7 +415,7 @@ "reference tracking: allow LD_IND", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), BPF_EMIT_CALL(BPF_FUNC_sk_release), @@ -384,7 +432,7 @@ "reference tracking: forbid LD_IND while holding reference", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_7, 1), BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), @@ -402,7 +450,7 @@ "reference tracking: check reference or tail call", .insns = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* if (sk) bpf_sk_release() */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7), @@ -424,7 +472,7 @@ "reference tracking: release reference then tail call", .insns = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* if (sk) bpf_sk_release() */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), @@ -446,7 +494,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), /* Look up socket and store in REG_6 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), /* bpf_tail_call() */ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_3, 2), @@ -470,7 +518,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), /* Look up socket and store in REG_6 */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* if (!sk) goto end */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), @@ -492,7 +540,7 @@ { "reference tracking: mangle and release sock_or_null", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), @@ -506,7 +554,7 @@ { "reference tracking: mangle and release sock", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), @@ -520,7 +568,7 @@ { "reference tracking: access member", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), @@ -534,7 +582,7 @@ { "reference tracking: write to member", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), @@ -553,7 +601,7 @@ { "reference tracking: invalid 64-bit access of member", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), @@ -568,7 +616,7 @@ { "reference tracking: access after release", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_EMIT_CALL(BPF_FUNC_sk_release), @@ -608,7 +656,7 @@ { "reference tracking: use ptr from bpf_tcp_sock() after release", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), @@ -631,7 +679,7 @@ { "reference tracking: use ptr from bpf_sk_fullsock() after release", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), @@ -654,7 +702,7 @@ { "reference tracking: use ptr from bpf_sk_fullsock(tp) after release", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), @@ -681,7 +729,7 @@ { "reference tracking: use sk after bpf_sk_release(tp)", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), @@ -703,7 +751,7 @@ { "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), @@ -725,7 +773,7 @@ { "reference tracking: bpf_sk_release(listen_sk)", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), @@ -750,7 +798,7 @@ /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */ "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)", .insns = { - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index dbaf5be947b2..91bb77c24a2e 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -242,7 +242,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* u64 foo; */ /* void *target = &foo; */ @@ -276,7 +276,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* u64 foo; */ /* void *target = &foo; */ @@ -307,7 +307,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* u64 foo; */ /* void *target = &foo; */ @@ -339,7 +339,7 @@ .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, + BPF_SK_LOOKUP(sk_lookup_tcp), BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), /* u64 foo; */ /* void *target = &foo; */ |