diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
65 files changed, 7560 insertions, 1246 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 6b8635378f1f..fa6d2354a0e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -81,8 +81,9 @@ void mlx4_cq_tasklet_cb(unsigned long data) static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) { - unsigned long flags; struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + unsigned long flags; + bool kick; spin_lock_irqsave(&tasklet_ctx->lock, flags); /* When migrating CQs between EQs will be implemented, please note @@ -92,7 +93,10 @@ static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) */ if (list_empty_careful(&cq->tasklet_ctx.list)) { atomic_inc(&cq->refcount); + kick = list_empty(&tasklet_ctx->list); list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + if (kick) + tasklet_schedule(&tasklet_ctx->task); } spin_unlock_irqrestore(&tasklet_ctx->lock, flags); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 504461a464c5..e7b81a305469 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -62,12 +62,13 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, struct skb_shared_hwtstamps *hwts, u64 timestamp) { - unsigned long flags; + unsigned int seq; u64 nsec; - read_lock_irqsave(&mdev->clock_lock, flags); - nsec = timecounter_cyc2time(&mdev->clock, timestamp); - read_unlock_irqrestore(&mdev->clock_lock, flags); + do { + seq = read_seqbegin(&mdev->clock_lock); + nsec = timecounter_cyc2time(&mdev->clock, timestamp); + } while (read_seqretry(&mdev->clock_lock, seq)); memset(hwts, 0, sizeof(struct skb_shared_hwtstamps)); hwts->hwtstamp = ns_to_ktime(nsec); @@ -95,9 +96,9 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev) unsigned long flags; if (timeout) { - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_read(&mdev->clock); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); mdev->last_overflow_check = jiffies; } } @@ -128,10 +129,10 @@ static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) adj *= delta; diff = div_u64(adj, 1000000000ULL); - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_read(&mdev->clock); mdev->cycles.mult = neg_adj ? mult - diff : mult + diff; - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -149,9 +150,9 @@ static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) ptp_clock_info); unsigned long flags; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_adjtime(&mdev->clock, delta); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -172,9 +173,9 @@ static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp, unsigned long flags; u64 ns; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); ns = timecounter_read(&mdev->clock); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); *ts = ns_to_timespec64(ns); @@ -198,9 +199,9 @@ static int mlx4_en_phc_settime(struct ptp_clock_info *ptp, unsigned long flags; /* reset the timecounter */ - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_init(&mdev->clock, &mdev->cycles, ns); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -266,7 +267,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) if (mdev->ptp_clock) return; - rwlock_init(&mdev->clock_lock); + seqlock_init(&mdev->clock_lock); memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; @@ -276,10 +277,10 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); mdev->nominal_c_mult = mdev->cycles.mult; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_init(&mdev->clock, &mdev->cycles, ktime_to_ns(ktime_get_real())); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); /* Calculate period in seconds to call the overflow watchdog - to make * sure counter is checked at least once every wrap around. diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 9aa422691954..c4d714fcc7da 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -902,6 +902,7 @@ mlx4_en_set_link_ksettings(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_ptys_reg ptys_reg; __be32 proto_admin; + u8 cur_autoneg; int ret; u32 ptys_adv = ethtool2ptys_link_modes( @@ -931,10 +932,21 @@ mlx4_en_set_link_ksettings(struct net_device *dev, return 0; } - proto_admin = link_ksettings->base.autoneg == AUTONEG_ENABLE ? - cpu_to_be32(ptys_adv) : - speed_set_ptys_admin(priv, speed, - ptys_reg.eth_proto_cap); + cur_autoneg = ptys_reg.flags & MLX4_PTYS_AN_DISABLE_ADMIN ? + AUTONEG_DISABLE : AUTONEG_ENABLE; + + if (link_ksettings->base.autoneg == AUTONEG_DISABLE) { + proto_admin = speed_set_ptys_admin(priv, speed, + ptys_reg.eth_proto_cap); + if ((be32_to_cpu(proto_admin) & + (MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII) | + MLX4_PROT_MASK(MLX4_1000BASE_KX))) && + (ptys_reg.flags & MLX4_PTYS_AN_DISABLE_CAP)) + ptys_reg.flags |= MLX4_PTYS_AN_DISABLE_ADMIN; + } else { + proto_admin = cpu_to_be32(ptys_adv); + ptys_reg.flags &= ~MLX4_PTYS_AN_DISABLE_ADMIN; + } proto_admin &= ptys_reg.eth_proto_cap; if (!proto_admin) { @@ -942,7 +954,9 @@ mlx4_en_set_link_ksettings(struct net_device *dev, return -EINVAL; /* nothing to change due to bad input */ } - if (proto_admin == ptys_reg.eth_proto_admin) + if ((proto_admin == ptys_reg.eth_proto_admin) && + ((ptys_reg.flags & MLX4_PTYS_AN_DISABLE_CAP) && + (link_ksettings->base.autoneg == cur_autoneg))) return 0; /* Nothing to change */ en_dbg(DRV, priv, "mlx4_ACCESS_PTYS_REG SET: ptys_reg.eth_proto_admin = 0x%x\n", @@ -1788,7 +1802,7 @@ static int mlx4_en_set_channels(struct net_device *dev, netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); - if (dev->num_tc) + if (netdev_get_num_tc(dev)) mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP); en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]); @@ -1980,7 +1994,7 @@ static int mlx4_en_get_module_info(struct net_device *dev, modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: - return -ENOSYS; + return -EINVAL; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 3b4961a8e8e4..afe4444e5434 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1321,7 +1321,7 @@ static void mlx4_en_tx_timeout(struct net_device *dev) } -static struct rtnl_link_stats64 * +static void mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1330,8 +1330,6 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) mlx4_en_fold_software_stats(dev); netdev_stats_to_stats64(stats, &dev->stats); spin_unlock_bh(&priv->stats_lock); - - return stats; } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) @@ -1384,6 +1382,7 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); + u32 pkt_rate_high, pkt_rate_low; struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; @@ -1397,37 +1396,40 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; + pkt_rate_low = READ_ONCE(priv->pkt_rate_low); + pkt_rate_high = READ_ONCE(priv->pkt_rate_high); + for (ring = 0; ring < priv->rx_ring_num; ring++) { rx_packets = READ_ONCE(priv->rx_ring[ring]->packets); rx_bytes = READ_ONCE(priv->rx_ring[ring]->bytes); - rx_pkt_diff = ((unsigned long) (rx_packets - - priv->last_moder_packets[ring])); + rx_pkt_diff = rx_packets - priv->last_moder_packets[ring]; packets = rx_pkt_diff; rate = packets * HZ / period; - avg_pkt_size = packets ? ((unsigned long) (rx_bytes - - priv->last_moder_bytes[ring])) / packets : 0; + avg_pkt_size = packets ? (rx_bytes - + priv->last_moder_bytes[ring]) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { - if (rate < priv->pkt_rate_low) + if (rate <= pkt_rate_low) moder_time = priv->rx_usecs_low; - else if (rate > priv->pkt_rate_high) + else if (rate >= pkt_rate_high) moder_time = priv->rx_usecs_high; else - moder_time = (rate - priv->pkt_rate_low) * + moder_time = (rate - pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / - (priv->pkt_rate_high - priv->pkt_rate_low) + + (pkt_rate_high - pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } - if (moder_time != priv->last_moder_time[ring]) { + cq = priv->rx_cq[ring]; + if (moder_time != priv->last_moder_time[ring] || + cq->moder_cnt != priv->rx_frames) { priv->last_moder_time[ring] = moder_time; - cq = priv->rx_cq[ring]; cq->moder_time = moder_time; cq->moder_cnt = priv->rx_frames; err = mlx4_en_set_cq_moder(priv, cq); @@ -1697,6 +1699,14 @@ int mlx4_en_start_port(struct net_device *dev) priv->port, err); goto tx_err; } + + err = mlx4_SET_PORT_user_mtu(mdev->dev, priv->port, dev->mtu); + if (err) { + en_err(priv, "Failed to pass user MTU(%d) to Firmware for port %d, with error %d\n", + dev->mtu, priv->port, err); + goto tx_err; + } + /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h index 040da4b16b1c..930f961fee42 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.h +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h @@ -35,7 +35,6 @@ #define _MLX4_EN_PORT_H_ -#define SET_PORT_GEN_ALL_VALID 0x7 #define SET_PORT_PROMISC_SHIFT 31 #define SET_PORT_MC_PROMISC_SHIFT 30 diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index cc003fdf0ed9..d85e6446f9d9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -33,6 +33,7 @@ #include <net/busy_poll.h> #include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/mlx4/cq.h> #include <linux/slab.h> #include <linux/mlx4/qp.h> @@ -709,7 +710,8 @@ static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, do { if (mlx4_en_prepare_rx_desc(priv, ring, ring->prod & ring->size_mask, - GFP_ATOMIC | __GFP_COLD)) + GFP_ATOMIC | __GFP_COLD | + __GFP_MEMALLOC)) break; ring->prod++; } while (--missing); @@ -928,10 +930,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length, cq->ring, &doorbell_pending))) goto consumed; + trace_xdp_exception(dev, xdp_prog, act); goto xdp_drop_no_cnt; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(dev, xdp_prog, act); case XDP_DROP: ring->xdp_drop++; xdp_drop_no_cnt: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 5886ad78058f..3ed42199d3f1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -710,7 +710,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, u16 rings_p_up = priv->num_tx_rings_p_up; u8 up = 0; - if (dev->num_tc) + if (netdev_get_num_tc(dev)) return skb_tx_hash(dev, skb); if (skb_vlan_tag_present(skb)) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 0509996957d9..39232b6a974f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -494,7 +494,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_eqe *eqe; - int cqn = -1; + int cqn; int eqes_found = 0; int set_ci = 0; int port; @@ -840,13 +840,6 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eq_set_ci(eq, 1); - /* cqn is 24bit wide but is initialized such that its higher bits - * are ones too. Thus, if we got any event, cqn's high bits should be off - * and we need to schedule the tasklet. - */ - if (!(cqn & ~0xffffff)) - tasklet_schedule(&eq->tasklet_ctx.task); - return eqes_found; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 84bab9f0732e..3fe885ce1902 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -672,7 +672,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); func_cap->physical_port = field; if (func_cap->physical_port != gen_or_port) { - err = -ENOSYS; + err = -EINVAL; goto out; } @@ -1875,7 +1875,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION; *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) = - (ilog2(cache_line_size()) - 4) << 5; + ((ilog2(cache_line_size()) - 4) << 5) | (1 << 4); #if defined(__LITTLE_ENDIAN) *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); @@ -2983,7 +2983,7 @@ static int mlx4_SET_PORT_phv_bit(struct mlx4_dev *dev, u8 port, u8 phv_bit) return PTR_ERR(mailbox); context = mailbox->buf; - context->v_ignore_fcs |= SET_PORT_GEN_PHV_VALID; + context->flags2 |= SET_PORT_GEN_PHV_VALID; if (phv_bit) context->phv_en |= SET_PORT_GEN_PHV_EN; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index bffa6f345f2f..15ef787e71ba 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -838,7 +838,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) */ if (hca_param.global_caps) { mlx4_err(dev, "Unknown hca global capabilities\n"); - return -ENOSYS; + return -EINVAL; } mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; @@ -896,7 +896,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n", func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK); - return -ENOSYS; + return -EINVAL; } dev->caps.num_ports = func_cap.num_ports; @@ -3492,7 +3492,7 @@ slave_start: mlx4_enable_msi_x(dev); if ((mlx4_is_mfunc(dev)) && !(dev->flags & MLX4_FLAG_MSI_X)) { - err = -ENOSYS; + err = -EOPNOTSUPP; mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n"); goto err_free_eq; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 086920b615af..b4f1bc56cc68 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -487,6 +487,7 @@ struct mlx4_slave_state { bool vst_qinq_supported; u8 function; dma_addr_t vhcr_dma; + u16 user_mtu[MLX4_MAX_PORTS + 1]; u16 mtu[MLX4_MAX_PORTS + 1]; __be32 ib_cap_mask[MLX4_MAX_PORTS + 1]; struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES]; @@ -590,6 +591,7 @@ struct mlx4_mfunc_master_ctx { struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1]; int init_port_ref[MLX4_MAX_PORTS + 1]; u16 max_mtu[MLX4_MAX_PORTS + 1]; + u16 max_user_mtu[MLX4_MAX_PORTS + 1]; u8 pptx; u8 pprx; int disable_mcast_ref[MLX4_MAX_PORTS + 1]; @@ -774,7 +776,9 @@ struct mlx4_vlan_table { int max; }; -#define SET_PORT_GEN_ALL_VALID 0x7 +#define SET_PORT_GEN_ALL_VALID (MLX4_FLAG_V_MTU_MASK | \ + MLX4_FLAG_V_PPRX_MASK | \ + MLX4_FLAG_V_PPTX_MASK) #define SET_PORT_PROMISC_SHIFT 31 #define SET_PORT_MC_PROMISC_SHIFT 30 @@ -787,7 +791,7 @@ enum { struct mlx4_set_port_general_context { u16 reserved1; - u8 v_ignore_fcs; + u8 flags2; u8 flags; union { u8 ignore_fcs; @@ -803,7 +807,8 @@ struct mlx4_set_port_general_context { u16 reserved4; u32 reserved5; u8 phv_en; - u8 reserved6[3]; + u8 reserved6[5]; + __be16 user_mtu; }; struct mlx4_set_port_rqp_calc_context { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index cec59bc264c9..4941b692e947 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -102,7 +102,8 @@ /* Use the maximum between 16384 and a single page */ #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) -#define MLX4_EN_ALLOC_PREFER_ORDER PAGE_ALLOC_COSTLY_ORDER +#define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768), \ + PAGE_ALLOC_COSTLY_ORDER) /* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU * and 4K allocations) */ @@ -424,9 +425,9 @@ struct mlx4_en_dev { u32 priv_pdn; spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; - rwlock_t clock_lock; u32 nominal_c_mult; struct cyclecounter cycles; + seqlock_t clock_lock; struct timecounter clock; unsigned long last_overflow_check; unsigned long overflow_period; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index b656dd5772e5..4e36e287d605 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -50,7 +50,11 @@ #define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL #define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL -#define MLX4_FLAG_V_IGNORE_FCS_MASK 0x2 +#define MLX4_FLAG2_V_IGNORE_FCS_MASK BIT(1) +#define MLX4_FLAG2_V_USER_MTU_MASK BIT(5) +#define MLX4_FLAG_V_MTU_MASK BIT(0) +#define MLX4_FLAG_V_PPRX_MASK BIT(1) +#define MLX4_FLAG_V_PPTX_MASK BIT(2) #define MLX4_IGNORE_FCS_MASK 0x1 #define MLX4_TC_MAX_NUMBER 8 @@ -1239,13 +1243,96 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave) return; } +static void +mlx4_en_set_port_mtu(struct mlx4_dev *dev, int slave, int port, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + struct mlx4_slave_state *slave_st = &master->slave_state[slave]; + u16 mtu, prev_mtu; + + /* Mtu is configured as the max USER_MTU among all + * the functions on the port. + */ + mtu = be16_to_cpu(gen_context->mtu); + mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] + + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); + prev_mtu = slave_st->mtu[port]; + slave_st->mtu[port] = mtu; + if (mtu > master->max_mtu[port]) + master->max_mtu[port] = mtu; + if (mtu < prev_mtu && prev_mtu == master->max_mtu[port]) { + int i; + + slave_st->mtu[port] = mtu; + master->max_mtu[port] = mtu; + for (i = 0; i < dev->num_slaves; i++) + master->max_mtu[port] = + max_t(u16, master->max_mtu[port], + master->slave_state[i].mtu[port]); + } + gen_context->mtu = cpu_to_be16(master->max_mtu[port]); +} + +static void +mlx4_en_set_port_user_mtu(struct mlx4_dev *dev, int slave, int port, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + struct mlx4_slave_state *slave_st = &master->slave_state[slave]; + u16 user_mtu, prev_user_mtu; + + /* User Mtu is configured as the max USER_MTU among all + * the functions on the port. + */ + user_mtu = be16_to_cpu(gen_context->user_mtu); + user_mtu = min_t(int, user_mtu, dev->caps.eth_mtu_cap[port]); + prev_user_mtu = slave_st->user_mtu[port]; + slave_st->user_mtu[port] = user_mtu; + if (user_mtu > master->max_user_mtu[port]) + master->max_user_mtu[port] = user_mtu; + if (user_mtu < prev_user_mtu && + prev_user_mtu == master->max_user_mtu[port]) { + int i; + + slave_st->user_mtu[port] = user_mtu; + master->max_user_mtu[port] = user_mtu; + for (i = 0; i < dev->num_slaves; i++) + master->max_user_mtu[port] = + max_t(u16, master->max_user_mtu[port], + master->slave_state[i].user_mtu[port]); + } + gen_context->user_mtu = cpu_to_be16(master->max_user_mtu[port]); +} + +static void +mlx4_en_set_port_global_pause(struct mlx4_dev *dev, int slave, + struct mlx4_set_port_general_context *gen_context) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; + + /* Slave cannot change Global Pause configuration */ + if (slave != mlx4_master_func_num(dev) && + (gen_context->pptx != master->pptx || + gen_context->pprx != master->pprx)) { + gen_context->pptx = master->pptx; + gen_context->pprx = master->pprx; + mlx4_warn(dev, "denying Global Pause change for slave:%d\n", + slave); + } else { + master->pptx = gen_context->pptx; + master->pprx = gen_context->pprx; + } +} + static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, u8 op_mod, struct mlx4_cmd_mailbox *inbox) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_port_info *port_info; - struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master; - struct mlx4_slave_state *slave_st = &master->slave_state[slave]; struct mlx4_set_port_rqp_calc_context *qpn_context; struct mlx4_set_port_general_context *gen_context; struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1; @@ -1256,7 +1343,6 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, int base; u32 in_modifier; u32 promisc; - u16 mtu, prev_mtu; int err; int i, j; int offset; @@ -1269,7 +1355,9 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, is_eth = op_mod; port_info = &priv->port[port]; - /* Slaves cannot perform SET_PORT operations except changing MTU */ + /* Slaves cannot perform SET_PORT operations, + * except for changing MTU and USER_MTU. + */ if (is_eth) { if (slave != dev->caps.function && in_modifier != MLX4_SET_PORT_GENERAL && @@ -1297,40 +1385,20 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, break; case MLX4_SET_PORT_GENERAL: gen_context = inbox->buf; - /* Mtu is configured as the max MTU among all the - * the functions on the port. */ - mtu = be16_to_cpu(gen_context->mtu); - mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port] + - ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); - prev_mtu = slave_st->mtu[port]; - slave_st->mtu[port] = mtu; - if (mtu > master->max_mtu[port]) - master->max_mtu[port] = mtu; - if (mtu < prev_mtu && prev_mtu == - master->max_mtu[port]) { - slave_st->mtu[port] = mtu; - master->max_mtu[port] = mtu; - for (i = 0; i < dev->num_slaves; i++) { - master->max_mtu[port] = - max(master->max_mtu[port], - master->slave_state[i].mtu[port]); - } - } - gen_context->mtu = cpu_to_be16(master->max_mtu[port]); - /* Slave cannot change Global Pause configuration */ - if (slave != mlx4_master_func_num(dev) && - ((gen_context->pptx != master->pptx) || - (gen_context->pprx != master->pprx))) { - gen_context->pptx = master->pptx; - gen_context->pprx = master->pprx; - mlx4_warn(dev, - "denying Global Pause change for slave:%d\n", - slave); - } else { - master->pptx = gen_context->pptx; - master->pprx = gen_context->pprx; - } + if (gen_context->flags & MLX4_FLAG_V_MTU_MASK) + mlx4_en_set_port_mtu(dev, slave, port, + gen_context); + + if (gen_context->flags2 & MLX4_FLAG2_V_USER_MTU_MASK) + mlx4_en_set_port_user_mtu(dev, slave, port, + gen_context); + + if (gen_context->flags & + (MLX4_FLAG_V_PPRX_MASK | MLX4_FLAG_V_PPTX_MASK)) + mlx4_en_set_port_global_pause(dev, slave, + gen_context); + break; case MLX4_SET_PORT_GID_TABLE: /* change to MULTIPLE entries: number of guest's gids @@ -1608,6 +1676,30 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, } EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc); +int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags2 |= MLX4_FLAG2_V_USER_MTU_MASK; + context->user_mtu = cpu_to_be16(user_mtu); + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_user_mtu); + int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) { struct mlx4_cmd_mailbox *mailbox; @@ -1619,7 +1711,7 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) if (IS_ERR(mailbox)) return PTR_ERR(mailbox); context = mailbox->buf; - context->v_ignore_fcs |= MLX4_FLAG_V_IGNORE_FCS_MASK; + context->flags2 |= MLX4_FLAG2_V_IGNORE_FCS_MASK; if (ignore_fcs_value) context->ignore_fcs |= MLX4_IGNORE_FCS_MASK; else diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1822382212ee..6fe9f76ae656 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -77,6 +77,7 @@ struct res_common { int from_state; int to_state; int removing; + const char *func_name; }; enum { @@ -236,8 +237,8 @@ static void *res_tracker_lookup(struct rb_root *root, u64 res_id) struct rb_node *node = root->rb_node; while (node) { - struct res_common *res = container_of(node, struct res_common, - node); + struct res_common *res = rb_entry(node, struct res_common, + node); if (res_id < res->res_id) node = node->rb_left; @@ -255,8 +256,8 @@ static int res_tracker_insert(struct rb_root *root, struct res_common *res) /* Figure out where to put new node */ while (*new) { - struct res_common *this = container_of(*new, struct res_common, - node); + struct res_common *this = rb_entry(*new, struct res_common, + node); parent = *new; if (res->res_id < this->res_id) @@ -837,6 +838,36 @@ static int mpt_mask(struct mlx4_dev *dev) return dev->caps.num_mpts - 1; } +static const char *mlx4_resource_type_to_str(enum mlx4_resource t) +{ + switch (t) { + case RES_QP: + return "QP"; + case RES_CQ: + return "CQ"; + case RES_SRQ: + return "SRQ"; + case RES_XRCD: + return "XRCD"; + case RES_MPT: + return "MPT"; + case RES_MTT: + return "MTT"; + case RES_MAC: + return "MAC"; + case RES_VLAN: + return "VLAN"; + case RES_COUNTER: + return "COUNTER"; + case RES_FS_RULE: + return "FS_RULE"; + case RES_EQ: + return "EQ"; + default: + return "INVALID RESOURCE"; + } +} + static void *find_res(struct mlx4_dev *dev, u64 res_id, enum mlx4_resource type) { @@ -846,9 +877,9 @@ static void *find_res(struct mlx4_dev *dev, u64 res_id, res_id); } -static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, - enum mlx4_resource type, - void *res) +static int _get_res(struct mlx4_dev *dev, int slave, u64 res_id, + enum mlx4_resource type, + void *res, const char *func_name) { struct res_common *r; int err = 0; @@ -861,6 +892,10 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, } if (r->state == RES_ANY_BUSY) { + mlx4_warn(dev, + "%s(%d) trying to get resource %llx of type %s, but it's already taken by %s\n", + func_name, slave, res_id, mlx4_resource_type_to_str(type), + r->func_name); err = -EBUSY; goto exit; } @@ -872,6 +907,7 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, r->from_state = r->state; r->state = RES_ANY_BUSY; + r->func_name = func_name; if (res) *((struct res_common **)res) = r; @@ -881,6 +917,9 @@ exit: return err; } +#define get_res(dev, slave, res_id, type, res) \ + _get_res((dev), (slave), (res_id), (type), (res), __func__) + int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev, enum mlx4_resource type, u64 res_id, int *slave) @@ -911,8 +950,10 @@ static void put_res(struct mlx4_dev *dev, int slave, u64 res_id, spin_lock_irq(mlx4_tlock(dev)); r = find_res(dev, res_id, type); - if (r) + if (r) { r->state = r->from_state; + r->func_name = ""; + } spin_unlock_irq(mlx4_tlock(dev)); } @@ -1396,7 +1437,7 @@ static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra) case RES_MTT: return remove_mtt_ok((struct res_mtt *)res, extra); case RES_MAC: - return -ENOSYS; + return -EOPNOTSUPP; case RES_EQ: return remove_eq_ok((struct res_eq *)res); case RES_COUNTER: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 32d4af9b594d..336d4738b807 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -179,6 +179,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", cq->cqn); + cq->uar = dev->priv.uar; + return 0; err_cmd: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index a9dbc28f6b97..a62f4b6a21a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -71,6 +71,16 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (dev_ctx->context) { spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (dev_ctx->intf->pfault) { + if (priv->pfault) { + mlx5_core_err(dev, "multiple page fault handlers not supported"); + } else { + priv->pfault_ctx = dev_ctx->context; + priv->pfault = dev_ctx->intf->pfault; + } + } +#endif spin_unlock_irq(&priv->ctx_lock); } else { kfree(dev_ctx); @@ -97,6 +107,15 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!dev_ctx) return; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + spin_lock_irq(&priv->ctx_lock); + if (priv->pfault == dev_ctx->intf->pfault) + priv->pfault = NULL; + spin_unlock_irq(&priv->ctx_lock); + + synchronize_srcu(&priv->pfault_srcu); +#endif + spin_lock_irq(&priv->ctx_lock); list_del(&dev_ctx->list); spin_unlock_irq(&priv->ctx_lock); @@ -329,6 +348,20 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, spin_unlock_irqrestore(&priv->ctx_lock, flags); } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +void mlx5_core_page_fault(struct mlx5_core_dev *dev, + struct mlx5_pagefault *pfault) +{ + struct mlx5_priv *priv = &dev->priv; + int srcu_idx; + + srcu_idx = srcu_read_lock(&priv->pfault_srcu); + if (priv->pfault) + priv->pfault(dev, priv->pfault_ctx, pfault); + srcu_read_unlock(&priv->pfault_srcu, srcu_idx); +} +#endif + void mlx5_dev_list_lock(void) { mutex_lock(&mlx5_intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d5ecb8f53fd4..95ca03c0d9f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -51,6 +51,9 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +#define MLX5E_HW2SW_MTU(hwmtu) ((hwmtu) - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +#define MLX5E_SW2HW_MTU(swmtu) ((swmtu) + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) + #define MLX5E_MAX_NUM_TC 8 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 @@ -67,8 +70,13 @@ #define MLX5_RX_HEADROOM NET_SKB_PAD -#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ -#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ + (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ + max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) +#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) + #define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) @@ -98,6 +106,7 @@ #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) +#define MLX5E_MIN_NUM_CHANNELS 0x1 #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 @@ -111,8 +120,7 @@ #define MLX5E_XDP_IHS_DS_COUNT \ DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT \ - (MLX5E_XDP_IHS_DS_COUNT + \ - (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) + ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) #define MLX5E_XDP_TX_WQEBBS \ DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) @@ -259,6 +267,7 @@ struct mlx5e_tstamp { struct mlx5_core_dev *mdev; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; + u8 *pps_pin_caps; }; enum { @@ -369,6 +378,7 @@ struct mlx5e_rq { unsigned long state; int ix; + u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ struct bpf_prog *xdp_prog; @@ -479,7 +489,7 @@ struct mlx5e_sq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; - struct mlx5_uar uar; + struct mlx5_sq_bfreg bfreg; struct mlx5e_channel *channel; int tc; u32 rate_limit; @@ -568,8 +578,9 @@ struct mlx5e_vlan_table { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_handle *active_vlans_rule[VLAN_N_VID]; struct mlx5_flow_handle *untagged_rule; - struct mlx5_flow_handle *any_vlan_rule; - bool filter_disabled; + struct mlx5_flow_handle *any_cvlan_rule; + struct mlx5_flow_handle *any_svlan_rule; + bool filter_disabled; }; struct mlx5e_l2_table { @@ -777,9 +788,11 @@ void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, struct skb_shared_hwtstamps *hwts); void mlx5e_timestamp_init(struct mlx5e_priv *priv); void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); +void mlx5e_pps_event_handler(struct mlx5e_priv *priv, + struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); -void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val); +void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -807,7 +820,7 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { - u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; + u16 ofst = sq->bf_offset; /* ensure wqe is visible to device before updating doorbell record */ dma_wmb(); @@ -833,7 +846,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) struct mlx5_core_cq *mcq; mcq = &cq->mcq; - mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); } static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) @@ -841,12 +854,6 @@ static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } -static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) -{ - return min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); -} - extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 746a92c13644..37e66eef6fb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -37,6 +37,22 @@ enum { MLX5E_CYCLES_SHIFT = 23 }; +enum { + MLX5E_PIN_MODE_IN = 0x0, + MLX5E_PIN_MODE_OUT = 0x1, +}; + +enum { + MLX5E_OUT_PATTERN_PULSE = 0x0, + MLX5E_OUT_PATTERN_PERIODIC = 0x1, +}; + +enum { + MLX5E_EVENT_MODE_DISABLE = 0x0, + MLX5E_EVENT_MODE_REPETETIVE = 0x1, + MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2, +}; + void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, struct skb_shared_hwtstamps *hwts) { @@ -90,11 +106,12 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) return -ERANGE; } + mutex_lock(&priv->state_lock); /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_def); + mlx5e_modify_rx_cqe_compression_locked(priv, priv->params.rx_cqe_compress_def); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -112,14 +129,16 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: /* Disable CQE compression */ netdev_warn(dev, "Disabling cqe compression"); - mlx5e_modify_rx_cqe_compression(priv, false); + mlx5e_modify_rx_cqe_compression_locked(priv, false); config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: + mutex_unlock(&priv->state_lock); return -ERANGE; } memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; @@ -189,6 +208,18 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) int neg_adj = 0; struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, ptp_info); + struct mlx5e_priv *priv = + container_of(tstamp, struct mlx5e_priv, tstamp); + + if (MLX5_CAP_GEN(priv->mdev, pps_modify)) { + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + /* For future use need to add a loop for finding all 1PPS out pins */ + MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); + MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF); + + mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + } if (delta < 0) { neg_adj = 1; @@ -208,6 +239,124 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) return 0; } +static int mlx5e_extts_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5e_tstamp *tstamp = + container_of(ptp, struct mlx5e_tstamp, ptp_info); + struct mlx5e_priv *priv = + container_of(tstamp, struct mlx5e_priv, tstamp); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_CAP_GEN(priv->mdev, pps) || + !MLX5_CAP_GEN(priv->mdev, pps_modify)) + return -EOPNOTSUPP; + + if (rq->extts.index >= tstamp->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; + } + + if (rq->extts.flags & PTP_FALLING_EDGE) + pattern = 1; + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + + err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(priv->mdev, pin, 0, + MLX5E_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5e_perout_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5e_tstamp *tstamp = + container_of(ptp, struct mlx5e_tstamp, ptp_info); + struct mlx5e_priv *priv = + container_of(tstamp, struct mlx5e_priv, tstamp); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u64 nsec_now, nsec_delta, time_stamp; + u64 cycles_now, cycles_delta; + struct timespec64 ts; + unsigned long flags; + int pin = -1; + s64 ns; + + if (!MLX5_CAP_GEN(priv->mdev, pps_modify)) + return -EOPNOTSUPP; + + if (rq->perout.index >= tstamp->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(tstamp->ptp, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + } + + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec64_to_ns(&ts); + if (on) + if ((ns >> 1) != 500000000LL) + return -EINVAL; + ts.tv_sec = rq->perout.start.sec; + ts.tv_nsec = rq->perout.start.nsec; + ns = timespec64_to_ns(&ts); + cycles_now = mlx5_read_internal_timer(tstamp->mdev); + write_lock_irqsave(&tstamp->lock, flags); + nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, + tstamp->cycles.mult); + write_unlock_irqrestore(&tstamp->lock, flags); + time_stamp = cycles_now + cycles_delta; + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); + MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + + return mlx5_set_mtpps(priv->mdev, in, sizeof(in)); +} + +static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mlx5e_extts_configure(ptp, rq, on); + case PTP_CLK_REQ_PEROUT: + return mlx5e_perout_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0; +} + static const struct ptp_clock_info mlx5e_ptp_clock_info = { .owner = THIS_MODULE, .max_adj = 100000000, @@ -221,6 +370,7 @@ static const struct ptp_clock_info mlx5e_ptp_clock_info = { .gettime64 = mlx5e_ptp_gettime, .settime64 = mlx5e_ptp_settime, .enable = NULL, + .verify = NULL, }; static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp) @@ -229,6 +379,62 @@ static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp) tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; } +static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp) +{ + int i; + + tstamp->ptp_info.pin_config = + kzalloc(sizeof(*tstamp->ptp_info.pin_config) * + tstamp->ptp_info.n_pins, GFP_KERNEL); + if (!tstamp->ptp_info.pin_config) + return -ENOMEM; + tstamp->ptp_info.enable = mlx5e_ptp_enable; + tstamp->ptp_info.verify = mlx5e_ptp_verify; + + for (i = 0; i < tstamp->ptp_info.n_pins; i++) { + snprintf(tstamp->ptp_info.pin_config[i].name, + sizeof(tstamp->ptp_info.pin_config[i].name), + "mlx5_pps%d", i); + tstamp->ptp_info.pin_config[i].index = i; + tstamp->ptp_info.pin_config[i].func = PTP_PF_NONE; + tstamp->ptp_info.pin_config[i].chan = i; + } + + return 0; +} + +static void mlx5e_get_pps_caps(struct mlx5e_priv *priv, + struct mlx5e_tstamp *tstamp) +{ + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + mlx5_query_mtpps(priv->mdev, out, sizeof(out)); + + tstamp->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, + cap_number_of_pps_pins); + tstamp->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_in_pins); + tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); + + tstamp->pps_pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + tstamp->pps_pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + tstamp->pps_pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + tstamp->pps_pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + tstamp->pps_pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + tstamp->pps_pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + tstamp->pps_pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + tstamp->pps_pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); +} + +void mlx5e_pps_event_handler(struct mlx5e_priv *priv, + struct ptp_clock_event *event) +{ + struct mlx5e_tstamp *tstamp = &priv->tstamp; + + ptp_clock_event(tstamp->ptp, event); +} + void mlx5e_timestamp_init(struct mlx5e_priv *priv) { struct mlx5e_tstamp *tstamp = &priv->tstamp; @@ -272,6 +478,18 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) tstamp->ptp_info = mlx5e_ptp_clock_info; snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp"); + /* Initialize 1PPS data structures */ +#define MAX_PIN_NUM 8 + tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL); + if (tstamp->pps_pin_caps) { + if (MLX5_CAP_GEN(priv->mdev, pps)) + mlx5e_get_pps_caps(priv, tstamp); + if (tstamp->ptp_info.n_pins) + mlx5e_init_pin_config(tstamp); + } else { + mlx5_core_warn(priv->mdev, "1PPS initialization failed\n"); + } + tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); if (IS_ERR(tstamp->ptp)) { @@ -293,5 +511,8 @@ void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv) priv->tstamp.ptp = NULL; } + kfree(tstamp->pps_pin_caps); + kfree(tstamp->ptp_info.pin_config); + cancel_delayed_work_sync(&tstamp->overflow_work); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index f175518ff07a..bd898d8deda0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -89,16 +89,10 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) struct mlx5e_resources *res = &mdev->mlx5e_res; int err; - err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false); - if (err) { - mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); - return err; - } - err = mlx5_core_alloc_pd(mdev, &res->pdn); if (err) { mlx5_core_err(mdev, "alloc pd failed, %d\n", err); - goto err_unmap_free_uar; + return err; } err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); @@ -121,9 +115,6 @@ err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); err_dealloc_pd: mlx5_core_dealloc_pd(mdev, res->pdn); -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &res->cq_uar); - return err; } @@ -134,7 +125,6 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); - mlx5_unmap_free_uar(mdev, &res->cq_uar); } int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index bb67863aa361..cc80522b5854 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -170,7 +170,8 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return NUM_SW_COUNTERS + MLX5E_NUM_Q_CNTRS(priv) + - NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS(priv) + + NUM_PCIE_COUNTERS(priv) + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + @@ -218,6 +219,14 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_stats_desc[i].format); + + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc[i].format); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -330,6 +339,14 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, pport_2819_stats_desc, i); + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i); + + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], @@ -535,7 +552,7 @@ static void mlx5e_get_channels(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); - ch->max_combined = mlx5e_get_max_num_channels(priv->mdev); + ch->max_combined = priv->profile->max_nch(priv->mdev); ch->combined_count = priv->params.num_channels; } @@ -1459,8 +1476,6 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - int err = 0; - bool reset; if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; @@ -1470,17 +1485,10 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, return -EINVAL; } - reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - - if (reset) - mlx5e_close_locked(netdev); - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, enable); + mlx5e_modify_rx_cqe_compression_locked(priv, enable); priv->params.rx_cqe_compress_def = enable; - if (reset) - err = mlx5e_open_locked(netdev); - return err; + return 0; } static int mlx5e_handle_pflag(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index a0e5a69402b3..f2762e45c8ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -150,7 +150,8 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, - MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, MLX5E_VLAN_RULE_TYPE_MATCH_VID, }; @@ -172,19 +173,31 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, dest.ft = priv->fs.l2.ft.t; spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); + switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: rule_p = &priv->fs.vlan.untagged_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - rule_p = &priv->fs.vlan.any_vlan_rule; - MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + rule_p = &priv->fs.vlan.any_cvlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + rule_p = &priv->fs.vlan.any_svlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ rule_p = &priv->fs.vlan.active_vlans_rule[vid]; - MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, @@ -235,10 +248,16 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, priv->fs.vlan.untagged_rule = NULL; } break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - if (priv->fs.vlan.any_vlan_rule) { - mlx5_del_flow_rules(priv->fs.vlan.any_vlan_rule); - priv->fs.vlan.any_vlan_rule = NULL; + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + if (priv->fs.vlan.any_cvlan_rule) { + mlx5_del_flow_rules(priv->fs.vlan.any_cvlan_rule); + priv->fs.vlan.any_cvlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + if (priv->fs.vlan.any_svlan_rule) { + mlx5_del_flow_rules(priv->fs.vlan.any_svlan_rule); + priv->fs.vlan.any_svlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: @@ -252,6 +271,23 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, } } +static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv) +{ + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + if (err) + return err; + + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) { if (!priv->fs.vlan.filter_disabled) @@ -260,7 +296,7 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_del_any_vid_rules(priv); } void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) @@ -271,7 +307,7 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_add_any_vid_rules(priv); } int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, @@ -308,7 +344,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) if (priv->fs.vlan.filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_add_any_vid_rules(priv); } static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) @@ -323,7 +359,7 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) if (priv->fs.vlan.filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_del_any_vid_rules(priv); } #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ @@ -503,8 +539,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) if (enable_promisc) { mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); if (!priv->fs.vlan.filter_disabled) - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); + mlx5e_add_any_vid_rules(priv); } if (enable_allmulti) mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); @@ -519,8 +554,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) mlx5e_del_l2_flow_rule(priv, &ea->allmulti); if (disable_promisc) { if (!priv->fs.vlan.filter_disabled) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); + mlx5e_del_any_vid_rules(priv); mlx5e_del_l2_flow_rule(priv, &ea->promisc); } @@ -976,11 +1010,13 @@ err_destroy_flow_table: return err; } -#define MLX5E_NUM_VLAN_GROUPS 2 +#define MLX5E_NUM_VLAN_GROUPS 3 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) #define MLX5E_VLAN_GROUP1_SIZE BIT(1) +#define MLX5E_VLAN_GROUP2_SIZE BIT(0) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ - MLX5E_VLAN_GROUP1_SIZE) + MLX5E_VLAN_GROUP1_SIZE +\ + MLX5E_VLAN_GROUP2_SIZE) static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, int inlen) @@ -991,7 +1027,7 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP0_SIZE; @@ -1003,7 +1039,7 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -1012,6 +1048,17 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in goto err_destroy_groups; ft->num_groups++; + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + return 0; err_destroy_groups: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index f33f72d0237c..d55fff0ba388 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -237,9 +237,9 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, if ((fs->flow_type & FLOW_EXT) && (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, - vlan_tag, 1); + cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, - vlan_tag, 1); + cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, first_vid, 0xfff); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f14ca3385fdd..3cce6281e075 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -31,6 +31,7 @@ */ #include <net/tc_act/tc_gact.h> +#include <linux/crash_dump.h> #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> @@ -83,16 +84,20 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) priv->params.rq_wq_type = rq_type; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.log_rq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; priv->params.mpwqe_log_stride_sz = MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; + MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(priv->mdev) : + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(priv->mdev); priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - priv->params.mpwqe_log_stride_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.log_rq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; } priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); @@ -268,6 +273,12 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) { + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { out = pstats->per_prio_counters[prio]; @@ -291,11 +302,34 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv) &qcnt->rx_out_of_buffer); } +static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); + void *out; + u32 *in; + + if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) + return; + + in = mlx5_vzalloc(sz); + if (!in) + return; + + out = pcie_stats->pcie_perf_counters; + MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); + + kvfree(in); +} + void mlx5e_update_stats(struct mlx5e_priv *priv) { - mlx5e_update_q_counter(priv); - mlx5e_update_vport_counters(priv); + mlx5e_update_pcie_counters(priv); mlx5e_update_pport_counters(priv); + mlx5e_update_vport_counters(priv); + mlx5e_update_q_counter(priv); mlx5e_update_sw_counters(priv); } @@ -317,6 +351,8 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; + struct ptp_clock_event ptp_event; + struct mlx5_eqe *eqe = NULL; if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) return; @@ -326,7 +362,15 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; - + case MLX5_DEV_EVENT_PPS: + eqe = (struct mlx5_eqe *)param; + ptp_event.type = PTP_CLOCK_EXTTS; + ptp_event.index = eqe->data.pps.pin; + ptp_event.timestamp = + timecounter_cyc2time(&priv->tstamp.clock, + be64_to_cpu(eqe->data.pps.time_stamp)); + mlx5e_pps_event_handler(vpriv, &ptp_event); + break; default: break; } @@ -343,9 +387,6 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); } -#define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) -#define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) - static inline int mlx5e_get_wqe_mtt_sz(void) { /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. @@ -372,7 +413,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, cseg->imm = rq->mkey_be; ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; - ucseg->klm_octowords = + ucseg->xlt_octowords = cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); ucseg->bsf_octowords = cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); @@ -534,9 +575,13 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->buff.map_dir = DMA_FROM_DEVICE; - if (rq->xdp_prog) + if (rq->xdp_prog) { rq->buff.map_dir = DMA_BIDIRECTIONAL; + rq->rx_headroom = XDP_PACKET_HEADROOM; + } else { + rq->buff.map_dir = DMA_FROM_DEVICE; + rq->rx_headroom = MLX5_RX_HEADROOM; + } switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -586,7 +631,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, byte_count = rq->buff.wqe_sz; /* calc the required page order */ - frag_sz = MLX5_RX_HEADROOM + + frag_sz = rq->rx_headroom + byte_count /* packet data */ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); frag_sz = SKB_DATA_ALIGN(frag_sz); @@ -967,10 +1012,11 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->channel = c; sq->tc = tc; - err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); + err = mlx5_alloc_bfreg(mdev, &sq->bfreg, MLX5_CAP_GEN(mdev, bf), false); if (err) return err; + sq->uar_map = sq->bfreg.map; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, @@ -979,17 +1025,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, goto err_unmap_free_uar; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - if (sq->uar.bf_map) { + if (sq->bfreg.wc) set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state); - sq->uar_map = sq->uar.bf_map; - } else { - sq->uar_map = sq->uar.map; - } + sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; - sq->min_inline_mode = - MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT ? - param->min_inline_mode : 0; + sq->min_inline_mode = param->min_inline_mode; err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); if (err) @@ -1012,7 +1053,7 @@ err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &sq->uar); + mlx5_free_bfreg(mdev, &sq->bfreg); return err; } @@ -1024,7 +1065,7 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); - mlx5_unmap_free_uar(priv->mdev, &sq->uar); + mlx5_free_bfreg(priv->mdev, &sq->bfreg); } static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) @@ -1053,12 +1094,15 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); - MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + + if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, uar_page, sq->bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); @@ -1216,7 +1260,6 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &mdev->mlx5e_res.cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1265,7 +1308,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); MLX5_SET(cqc, cqc, c_eqn, eqn); - MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); @@ -1472,6 +1515,14 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mdev->priv.eq_table.num_comp_vectors, + MLX5E_MAX_NUM_CHANNELS); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) @@ -1677,7 +1728,7 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -1756,8 +1807,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); param->max_inline = priv->params.tx_max_inline; - /* FOR XDP SQs will support only L2 inline mode */ - param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->min_inline_mode = priv->params.tx_min_inline_mode; param->type = MLX5E_SQ_XDP; } @@ -2393,7 +2443,6 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &mdev->mlx5e_res.cq_uar; cq->priv = priv; @@ -2686,7 +2735,7 @@ mqprio: return mlx5e_setup_tc(dev, tc->tc); } -static struct rtnl_link_stats64 * +static void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -2729,7 +2778,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); - return stats; } static void mlx5e_set_rx_mode(struct net_device *dev) @@ -2987,11 +3035,8 @@ static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - if (min_tx_rate) - return -EOPNOTSUPP; - return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, - max_tx_rate); + max_tx_rate, min_tx_rate); } static int mlx5_vport_link2ifla(u8 esw_link) @@ -3159,11 +3204,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) bool reset, was_opened; int i; - if (prog && prog->xdp_adjust_head) { - netdev_err(netdev, "Does not support bpf_xdp_adjust_head()\n"); - return -EOPNOTSUPP; - } - mutex_lock(&priv->state_lock); if ((netdev->features & NETIF_F_LRO) && prog) { @@ -3432,22 +3472,6 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; } -static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev, - u8 *min_inline_mode) -{ - switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { - case MLX5_CAP_INLINE_MODE_L2: - *min_inline_mode = MLX5_INLINE_MODE_L2; - break; - case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: - mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); - break; - case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: - *min_inline_mode = MLX5_INLINE_MODE_NONE; - break; - } -} - u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -3481,7 +3505,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); - priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + priv->params.log_sq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_def = false; @@ -3507,7 +3533,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - mlx5e_query_min_inline(mdev, &priv->params.tx_min_inline_mode); + mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); + if (priv->params.tx_min_inline_mode == MLX5_INLINE_MODE_NONE && + !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + priv->params.tx_min_inline_mode = MLX5_INLINE_MODE_L2; + priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 850378893b25..2c864574a9d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -374,13 +374,12 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, return -EINVAL; } -static struct rtnl_link_stats64 * +static void mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); - return stats; } static const struct switchdev_ops mlx5e_rep_switchdev_ops = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 06d5e6fecb0a..b039b87742a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -33,6 +33,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <linux/bpf_trace.h> #include <net/busy_poll.h> #include "en.h" #include "en_tc.h" @@ -155,17 +156,15 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; } -void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) +void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val) { bool was_opened; if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) return; - mutex_lock(&priv->state_lock); - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val) - goto unlock; + return; was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) @@ -176,8 +175,6 @@ void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) if (was_opened) mlx5e_open_locked(priv->netdev); -unlock: - mutex_unlock(&priv->state_lock); } #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) @@ -267,7 +264,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) if (unlikely(mlx5e_page_alloc_mapped(rq, di))) return -ENOMEM; - wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + wqe->data.addr = cpu_to_be64(di->addr + rq->rx_headroom); return 0; } @@ -647,10 +644,9 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, +static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - unsigned int data_offset, - int len) + const struct xdp_buff *xdp) { struct mlx5e_sq *sq = &rq->channel->xdp_sq; struct mlx5_wq_cyc *wq = &sq->wq; @@ -661,10 +657,18 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; + u8 ds_cnt = MLX5E_XDP_TX_DS_COUNT; + + ptrdiff_t data_offset = xdp->data - xdp->data_hard_start; + dma_addr_t dma_addr = di->addr + data_offset; + unsigned int dma_len = xdp->data_end - xdp->data; - dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; - unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; - void *data = page_address(di->page) + data_offset; + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || + MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return false; + } if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { if (sq->db.xdp.doorbell) { @@ -674,7 +678,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); - return; + return false; } dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, @@ -682,11 +686,17 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, memset(wqe, 0, sizeof(*wqe)); - /* copy the inline part */ - memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); - eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + dseg = (struct mlx5_wqe_data_seg *)eseg + 1; + /* copy the inline part if required */ + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + dma_len -= MLX5E_XDP_MIN_INLINE; + dma_addr += MLX5E_XDP_MIN_INLINE; - dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + ds_cnt += MLX5E_XDP_IHS_DS_COUNT; + dseg++; + } /* write the dma part */ dseg->addr = cpu_to_be64(dma_addr); @@ -694,7 +704,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, dseg->lkey = sq->mkey_be; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); sq->db.xdp.di[pi] = *di; wi->opcode = MLX5_OPCODE_SEND; @@ -703,32 +713,39 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, sq->db.xdp.doorbell = true; rq->stats.xdp_tx++; + return true; } /* returns true if packet was consumed by xdp */ -static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, - const struct bpf_prog *prog, - struct mlx5e_dma_info *di, - void *data, u16 len) +static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + void *va, u16 *rx_headroom, u32 *len) { + const struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); struct xdp_buff xdp; u32 act; if (!prog) return false; - xdp.data = data; - xdp.data_end = xdp.data + len; + xdp.data = va + *rx_headroom; + xdp.data_end = xdp.data + *len; + xdp.data_hard_start = va; + act = bpf_prog_run_xdp(prog, &xdp); switch (act) { case XDP_PASS: + *rx_headroom = xdp.data - xdp.data_hard_start; + *len = xdp.data_end - xdp.data; return false; case XDP_TX: - mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp))) + trace_xdp_exception(rq->netdev, prog, act); return true; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(rq->netdev, prog, act); case XDP_DROP: rq->stats.xdp_drop++; mlx5e_page_release(rq, di, true); @@ -743,15 +760,16 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_dma_info *di; struct sk_buff *skb; void *va, *data; + u16 rx_headroom = rq->rx_headroom; bool consumed; di = &rq->dma_info[wqe_counter]; va = page_address(di->page); - data = va + MLX5_RX_HEADROOM; + data = va + rx_headroom; dma_sync_single_range_for_cpu(rq->pdev, di->addr, - MLX5_RX_HEADROOM, + rx_headroom, rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(data); @@ -763,8 +781,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, READ_ONCE(rq->xdp_prog), di, data, - cqe_bcnt); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt); rcu_read_unlock(); if (consumed) return NULL; /* page/packet was consumed by XDP */ @@ -780,7 +797,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, page_ref_inc(di->page); mlx5e_page_release(rq, di, true); - skb_reserve(skb, MLX5_RX_HEADROOM); + skb_reserve(skb, rx_headroom); skb_put(skb, cqe_bcnt); return skb; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index ba5db1dd23a9..53e4992d6511 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -39,7 +39,7 @@ #define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ (*(u32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ - be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) @@ -201,6 +201,12 @@ static const struct counter_desc vport_stats_desc[] = { #define PPORT_2819_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_PHY_STATISTICAL_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +#define PPORT_PHY_STATISTICAL_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ + counter_set.phys_layer_statistical_cntrs.c##_high) #define PPORT_PER_PRIO_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_per_prio_grp_data_layout.c##_high) @@ -215,6 +221,7 @@ struct mlx5e_pport_stats { __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; static const struct counter_desc pport_802_3_stats_desc[] = { @@ -260,6 +267,11 @@ static const struct counter_desc pport_2819_stats_desc[] = { { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, }; +static const struct counter_desc pport_phy_statistical_stats_desc[] = { + { "rx_symbol_errors_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, +}; + static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, @@ -276,6 +288,21 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; +#define PCIE_PERF_OFF(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) +#define PCIE_PERF_GET(pcie_stats, c) \ + MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c) + +struct mlx5e_pcie_stats { + __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; +}; + +static const struct counter_desc pcie_perf_stats_desc[] = { + { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, + { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, +}; + struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -360,15 +387,23 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) +#define NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) \ + (ARRAY_SIZE(pport_phy_statistical_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) +#define NUM_PCIE_PERF_COUNTERS(priv) \ + (ARRAY_SIZE(pcie_perf_stats_desc) * \ + MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ ARRAY_SIZE(pport_per_prio_pfc_stats_desc) -#define NUM_PPORT_COUNTERS (NUM_PPORT_802_3_COUNTERS + \ +#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_802_3_COUNTERS + \ NUM_PPORT_2863_COUNTERS + \ NUM_PPORT_2819_COUNTERS + \ + NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO) +#define NUM_PCIE_COUNTERS(priv) NUM_PCIE_PERF_COUNTERS(priv) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) @@ -378,6 +413,7 @@ struct mlx5e_stats { struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; struct rtnl_link_stats64 vf_vport; + struct mlx5e_pcie_stats pcie; }; static const struct counter_desc mlx5e_pme_status_desc[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2ebbe80d8126..44406a5ec15d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -298,6 +298,32 @@ vxlan_match_offload_err: MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); } /* Enforce DMAC when offloading incoming tunneled flows. @@ -358,12 +384,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, f->key); switch (key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: if (parse_tunnel_attr(priv, spec, f)) return -EOPNOTSUPP; break; - case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - netdev_warn(priv->netdev, - "IPv6 tunnel decap offload isn't supported\n"); default: return -EOPNOTSUPP; } @@ -460,8 +484,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, FLOW_DISSECTOR_KEY_VLAN, f->mask); if (mask->vlan_id || mask->vlan_priority) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); @@ -644,15 +668,15 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } -static inline int cmp_encap_info(struct mlx5_encap_info *a, - struct mlx5_encap_info *b) +static inline int cmp_encap_info(struct ip_tunnel_key *a, + struct ip_tunnel_key *b) { return memcmp(a, b, sizeof(*a)); } -static inline int hash_encap_info(struct mlx5_encap_info *info) +static inline int hash_encap_info(struct ip_tunnel_key *key) { - return jhash(info, sizeof(*info), 0); + return jhash(key, sizeof(*key), 0); } static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, @@ -660,13 +684,11 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct net_device **out_dev, struct flowi4 *fl4, struct neighbour **out_n, - __be32 *saddr, int *out_ttl) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct rtable *rt; struct neighbour *n = NULL; - int ttl; #if IS_ENABLED(CONFIG_INET) int ret; @@ -684,16 +706,54 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, else *out_dev = rt->dst.dev; - ttl = ip4_dst_hoplimit(&rt->dst); + *out_ttl = ip4_dst_hoplimit(&rt->dst); n = dst_neigh_lookup(&rt->dst, &fl4->daddr); ip_rt_put(rt); if (!n) return -ENOMEM; *out_n = n; - *saddr = fl4->saddr; - *out_ttl = ttl; + return 0; +} + +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + int *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int ret; + + dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6); + ret = dst->error; + if (ret) { + dst_release(dst); + return ret; + } + + *out_ttl = ip6_dst_hoplimit(dst); + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) + *out_dev = mlx5_eswitch_get_uplink_netdev(esw); + else + *out_dev = dst->dev; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; return 0; } @@ -733,19 +793,52 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev, return encap_size; } +static int gen_vxlan_header_ipv6(struct net_device *out_dev, + char buf[], + unsigned char h_dest[ETH_ALEN], + int ttl, + struct in6_addr *daddr, + struct in6_addr *saddr, + __be16 udp_dst_port, + __be32 vx_vni) +{ + int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN; + struct ethhdr *eth = (struct ethhdr *)buf; + struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); + struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); + struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + + memset(buf, 0, encap_size); + + ether_addr_copy(eth->h_dest, h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + + ip6_flow_hdr(ip6h, 0, 0); + /* the HW fills up ipv6 payload len */ + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + udp->dest = udp_dst_port; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vx_vni); + + return encap_size; +} + static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5_encap_entry *e, struct net_device **out_dev) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + int encap_size, ttl, err; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; - int encap_size; - __be32 saddr; - int ttl; - int err; encap_header = kzalloc(max_encap_size, GFP_KERNEL); if (!encap_header) @@ -754,37 +847,108 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: fl4.flowi4_proto = IPPROTO_UDP; - fl4.fl4_dport = e->tun_info.tp_dst; + fl4.fl4_dport = tun_key->tp_dst; break; default: err = -EOPNOTSUPP; goto out; } - fl4.daddr = e->tun_info.daddr; + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, - &fl4, &n, &saddr, &ttl); + &fl4, &n, &ttl); if (err) goto out; + if (!(n->nud_state & NUD_VALID)) { + pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); + err = -EOPNOTSUPP; + goto out; + } + e->n = n; e->out_dev = *out_dev; + neigh_ha_snapshot(e->h_dest, n, *out_dev); + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + e->h_dest, ttl, + fl4.daddr, + fl4.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + encap_size, encap_header, &e->encap_id); +out: + if (err && n) + neigh_release(n); + kfree(encap_header); + return err; +} + +static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5_encap_entry *e, + struct net_device **out_dev) + +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + int encap_size, err, ttl = 0; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + char *encap_header; + + encap_header = kzalloc(max_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_dport = tun_key->tp_dst; + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev, + &fl6, &n, &ttl); + if (err) + goto out; + if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); + pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr); err = -EOPNOTSUPP; goto out; } + e->n = n; + e->out_dev = *out_dev; + neigh_ha_snapshot(e->h_dest, n, *out_dev); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header, e->h_dest, ttl, - e->tun_info.daddr, - saddr, e->tun_info.tp_dst, - e->tun_info.tun_id); + &fl6.daddr, + &fl6.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -808,13 +972,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned short family = ip_tunnel_info_af(tun_info); struct ip_tunnel_key *key = &tun_info->key; - struct mlx5_encap_info info; struct mlx5_encap_entry *e; struct net_device *out_dev; + int tunnel_type, err = -EOPNOTSUPP; uintptr_t hash_key; bool found = false; - int tunnel_type; - int err; /* udp dst port must be set */ if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) @@ -830,8 +992,6 @@ vxlan_encap_offload_err: if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - info.tp_dst = key->tp_dst; - info.tun_id = tunnel_id_to_key32(key->tun_id); tunnel_type = MLX5_HEADER_TYPE_VXLAN; } else { netdev_warn(priv->netdev, @@ -839,22 +999,11 @@ vxlan_encap_offload_err: return -EOPNOTSUPP; } - switch (family) { - case AF_INET: - info.daddr = key->u.ipv4.dst; - break; - case AF_INET6: - netdev_warn(priv->netdev, - "IPv6 tunnel encap offload isn't supported\n"); - default: - return -EOPNOTSUPP; - } - - hash_key = hash_encap_info(&info); + hash_key = hash_encap_info(key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - if (!cmp_encap_info(&e->tun_info, &info)) { + if (!cmp_encap_info(&e->tun_info.key, key)) { found = true; break; } @@ -869,11 +1018,15 @@ vxlan_encap_offload_err: if (!e) return -ENOMEM; - e->tun_info = info; + e->tun_info = *tun_info; e->tunnel_type = tunnel_type; INIT_LIST_HEAD(&e->flows); - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + if (family == AF_INET) + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + else if (family == AF_INET6) + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev); + if (err) goto out_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index cfb68371c397..f193128bac4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -154,6 +154,8 @@ static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, int hlen; switch (mode) { + case MLX5_INLINE_MODE_NONE: + return 0; case MLX5_INLINE_MODE_TCP_UDP: hlen = eth_get_headlen(skb->data, skb_headlen(skb)); if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) @@ -283,21 +285,23 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) wi->num_bytes = num_bytes; - if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data, - &skb_len); - ihs += VLAN_HLEN; - } else { - memcpy(eseg->inline_hdr_start, skb_data, ihs); - mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); + ihs += VLAN_HLEN; + } else { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + } + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); } - eseg->inline_hdr_sz = cpu_to_be16(ihs); - - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start), - MLX5_SEND_WQE_DS); - dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; + dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; wi->num_dma = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 8ffcc8808e50..ea5d8d37a75c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -54,6 +54,7 @@ enum { MLX5_NUM_SPARE_EQE = 0x80, MLX5_NUM_ASYNC_EQE = 0x100, MLX5_NUM_CMD_EQE = 32, + MLX5_NUM_PF_DRAIN = 64, }; enum { @@ -153,6 +154,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; default: return "Unrecognized event"; } @@ -188,10 +191,193 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm) mb(); } -static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +static void eqe_pf_action(struct work_struct *work) +{ + struct mlx5_pagefault *pfault = container_of(work, + struct mlx5_pagefault, + work); + struct mlx5_eq *eq = pfault->eq; + + mlx5_core_page_fault(eq->dev, pfault); + mempool_free(pfault, eq->pf_ctx.pool); +} + +static void eq_pf_process(struct mlx5_eq *eq) +{ + struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eqe_page_fault *pf_eqe; + struct mlx5_pagefault *pfault; + struct mlx5_eqe *eqe; + int set_ci = 0; + + while ((eqe = next_eqe_sw(eq))) { + pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC); + if (!pfault) { + schedule_work(&eq->pf_ctx.work); + break; + } + + dma_rmb(); + pf_eqe = &eqe->data.page_fault; + pfault->event_subtype = eqe->sub_type; + pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); + + mlx5_core_dbg(dev, + "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", + eqe->sub_type, pfault->bytes_committed); + + switch (eqe->sub_type) { + case MLX5_PFAULT_SUBTYPE_RDMA: + /* RDMA based event */ + pfault->type = + be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->rdma.pftype_token) & + MLX5_24BIT_MASK; + pfault->rdma.r_key = + be32_to_cpu(pf_eqe->rdma.r_key); + pfault->rdma.packet_size = + be16_to_cpu(pf_eqe->rdma.packet_length); + pfault->rdma.rdma_op_len = + be32_to_cpu(pf_eqe->rdma.rdma_op_len); + pfault->rdma.rdma_va = + be64_to_cpu(pf_eqe->rdma.rdma_va); + mlx5_core_dbg(dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", + pfault->type, pfault->token, + pfault->rdma.r_key); + mlx5_core_dbg(dev, + "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", + pfault->rdma.rdma_op_len, + pfault->rdma.rdma_va); + break; + + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = + be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = + be32_to_cpu(pf_eqe->wqe.pftype_wq) & + MLX5_24BIT_MASK; + pfault->wqe.wqe_index = + be16_to_cpu(pf_eqe->wqe.wqe_index); + pfault->wqe.packet_size = + be16_to_cpu(pf_eqe->wqe.packet_length); + mlx5_core_dbg(dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", + pfault->type, pfault->token, + pfault->wqe.wq_num, + pfault->wqe.wqe_index); + break; + + default: + mlx5_core_warn(dev, + "Unsupported page fault event sub-type: 0x%02hhx\n", + eqe->sub_type); + /* Unsupported page faults should still be + * resolved by the page fault handler + */ + } + + pfault->eq = eq; + INIT_WORK(&pfault->work, eqe_pf_action); + queue_work(eq->pf_ctx.wq, &pfault->work); + + ++eq->cons_index; + ++set_ci; + + if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { + eq_update_ci(eq, 0); + set_ci = 0; + } + } + + eq_update_ci(eq, 1); +} + +static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) +{ + struct mlx5_eq *eq = eq_ptr; + unsigned long flags; + + if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) { + eq_pf_process(eq); + spin_unlock_irqrestore(&eq->pf_ctx.lock, flags); + } else { + schedule_work(&eq->pf_ctx.work); + } + + return IRQ_HANDLED; +} + +/* mempool_refill() was proposed but unfortunately wasn't accepted + * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html + * Chip workaround. + */ +static void mempool_refill(mempool_t *pool) +{ + while (pool->curr_nr < pool->min_nr) + mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); +} + +static void eq_pf_action(struct work_struct *work) +{ + struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work); + + mempool_refill(eq->pf_ctx.pool); + + spin_lock_irq(&eq->pf_ctx.lock); + eq_pf_process(eq); + spin_unlock_irq(&eq->pf_ctx.lock); +} + +static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) +{ + spin_lock_init(&pf_ctx->lock); + INIT_WORK(&pf_ctx->work, eq_pf_action); + + pf_ctx->wq = alloc_ordered_workqueue(name, + WQ_MEM_RECLAIM); + if (!pf_ctx->wq) + return -ENOMEM; + + pf_ctx->pool = mempool_create_kmalloc_pool + (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault)); + if (!pf_ctx->pool) + goto err_wq; + + return 0; +err_wq: + destroy_workqueue(pf_ctx->wq); + return -ENOMEM; +} + +int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, + u32 wq_num, u8 type, int error) +{ + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; + + MLX5_SET(page_fault_resume_in, in, opcode, + MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, error, !!error); + MLX5_SET(page_fault_resume_in, in, page_fault_type, type); + MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); + MLX5_SET(page_fault_resume_in, in, token, token); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); +#endif + +static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) { + struct mlx5_eq *eq = eq_ptr; + struct mlx5_core_dev *dev = eq->dev; struct mlx5_eqe *eqe; - int eqes_found = 0; int set_ci = 0; u32 cqn = -1; u32 rsn; @@ -276,12 +462,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) } break; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - case MLX5_EVENT_TYPE_PAGE_FAULT: - mlx5_eq_pagefault(dev, eqe); - break; -#endif - #ifdef CONFIG_MLX5_CORE_EN case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); @@ -292,6 +472,10 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_port_module_event(dev, eqe); break; + case MLX5_EVENT_TYPE_PPS_EVENT: + if (dev->event) + dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe); + break; default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -299,7 +483,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) } ++eq->cons_index; - eqes_found = 1; ++set_ci; /* The HCA will think the queue has overflowed if we @@ -319,17 +502,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (cqn != -1) tasklet_schedule(&eq->tasklet_ctx.task); - return eqes_found; -} - -static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr) -{ - struct mlx5_eq *eq = eq_ptr; - struct mlx5_core_dev *dev = eq->dev; - - mlx5_eq_int(dev, eq); - - /* MSI-X vectors always belong to us */ return IRQ_HANDLED; } @@ -345,22 +517,32 @@ static void init_eq_buf(struct mlx5_eq *eq) } int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, struct mlx5_uar *uar) + int nent, u64 mask, const char *name, + enum mlx5_eq_type type) { u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; + irq_handler_t handler; __be64 *pas; void *eqc; int inlen; u32 *in; int err; + eq->type = type; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (type == MLX5_EQ_TYPE_PF) + handler = mlx5_eq_pf_int; + else +#endif + handler = mlx5_eq_int; + init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + @@ -380,7 +562,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); - MLX5_SET(eqc, eqc, uar_page, uar->index); + MLX5_SET(eqc, eqc, uar_page, priv->uar->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -395,8 +577,8 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = priv->msix_arr[vecidx].vector; eq->dev = dev; - eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, mlx5_msix_handler, 0, + eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; + err = request_irq(eq->irqn, handler, 0, priv->irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -405,11 +587,20 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (type == MLX5_EQ_TYPE_PF) { + err = init_pf_ctx(&eq->pf_ctx, name); + if (err) + goto err_irq; + } else +#endif + { + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + } /* EQs are created in ARMED state */ @@ -444,7 +635,16 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); synchronize_irq(eq->irqn); - tasklet_disable(&eq->tasklet_ctx.task); + + if (eq->type == MLX5_EQ_TYPE_COMP) { + tasklet_disable(&eq->tasklet_ctx.task); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + } else if (eq->type == MLX5_EQ_TYPE_PF) { + cancel_work_sync(&eq->pf_ctx.work); + destroy_workqueue(eq->pf_ctx.wq); + mempool_destroy(eq->pf_ctx.pool); +#endif + } mlx5_buf_free(dev, &eq->buf); return err; @@ -479,8 +679,6 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; - if (MLX5_CAP_GEN(dev, pg)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && MLX5_CAP_GEN(dev, vport_group_manager) && @@ -492,9 +690,12 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) else mlx5_core_dbg(dev, "port_module_event is not set\n"); + if (MLX5_CAP_GEN(dev, pps)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); + "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; @@ -504,7 +705,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, MLX5_NUM_ASYNC_EQE, async_event_mask, - "mlx5_async_eq", &dev->priv.uuari.uars[0]); + "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; @@ -514,13 +715,33 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) MLX5_EQ_VEC_PAGES, /* TODO: sriov max_vf + */ 1, 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", - &dev->priv.uuari.uars[0]); + MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_create_map_eq(dev, &table->pfault_eq, + MLX5_EQ_VEC_PFAULT, + MLX5_NUM_ASYNC_EQE, + 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + "mlx5_page_fault_eq", + MLX5_EQ_TYPE_PF); + if (err) { + mlx5_core_warn(dev, "failed to create page fault EQ %d\n", + err); + goto err3; + } + } + return err; +err3: + mlx5_destroy_unmap_eq(dev, &table->pages_eq); +#else + return err; +#endif err2: mlx5_destroy_unmap_eq(dev, &table->async_eq); @@ -536,6 +757,14 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = &dev->priv.eq_table; int err; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); + if (err) + return err; + } +#endif + err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d0c8bf014453..fcd5bc7e31db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -979,7 +979,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); @@ -1098,7 +1098,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); @@ -1115,7 +1115,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); @@ -1254,7 +1254,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } if (vport->info.vlan || vport->info.qos) - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); if (vport->info.spoofchk) { MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); @@ -1335,8 +1335,8 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, } /* Allowed vlan rule */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); @@ -1415,7 +1415,7 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) } static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, - u32 initial_max_rate) + u32 initial_max_rate, u32 initial_bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -1439,6 +1439,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, esw->qos.root_tsar_id); MLX5_SET(scheduling_context, &sched_ctx, max_average_bw, initial_max_rate); + MLX5_SET(scheduling_context, &sched_ctx, bw_share, initial_bw_share); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -1473,7 +1474,7 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) } static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, - u32 max_rate) + u32 max_rate, u32 bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -1497,7 +1498,9 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, esw->qos.root_tsar_id); MLX5_SET(scheduling_context, &sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, &sched_ctx, bw_share, bw_share); bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; err = mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -1563,7 +1566,8 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_apply_vport_conf(esw, vport); /* Attach vport to the eswitch rate limiter */ - if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate)) + if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate, + vport->qos.bw_share)) esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); /* Sync with current vport context */ @@ -1952,6 +1956,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; + ivi->min_tx_rate = evport->info.min_rate; ivi->max_tx_rate = evport->info.max_rate; mutex_unlock(&esw->state_lock); @@ -2046,23 +2051,103 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, return 0; } -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, - int vport, u32 max_rate) +static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) { + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_vport *evport; + u32 max_guarantee = 0; + int i; + + for (i = 0; i <= esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled || evport->info.min_rate < max_guarantee) + continue; + max_guarantee = evport->info.min_rate; + } + + return max_t(u32, max_guarantee / fw_max_bw_share, 1); +} + +static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 vport_max_rate; + u32 vport_min_rate; + u32 bw_share; + int err; + int i; + + for (i = 0; i <= esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled) + continue; + vport_min_rate = evport->info.min_rate; + vport_max_rate = evport->info.max_rate; + bw_share = MLX5_MIN_BW_SHARE; + + if (vport_min_rate) + bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, + divider, + fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_vport_qos_config(esw, i, vport_max_rate, + bw_share); + if (!err) + evport->qos.bw_share = bw_share; + else + return err; + } + + return 0; +} + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u32 max_rate, u32 min_rate) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + bool min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + bool max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + struct mlx5_vport *evport; + u32 previous_min_rate; + u32 divider; int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) + return -EOPNOTSUPP; mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - err = esw_vport_qos_config(esw, vport, max_rate); + + if (min_rate == evport->info.min_rate) + goto set_max_rate; + + previous_min_rate = evport->info.min_rate; + evport->info.min_rate = min_rate; + divider = calculate_vports_min_rate_divider(esw); + err = normalize_vports_min_rate(esw, divider); + if (err) { + evport->info.min_rate = previous_min_rate; + goto unlock; + } + +set_max_rate: + if (max_rate == evport->info.max_rate) + goto unlock; + + err = esw_vport_qos_config(esw, vport, max_rate, evport->qos.bw_share); if (!err) evport->info.max_rate = max_rate; +unlock: mutex_unlock(&esw->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8661dd3f542c..5b78883d5654 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,6 +36,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> #include <net/devlink.h> +#include <net/ip_tunnels.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -49,6 +50,11 @@ #define FDB_UPLINK_VPORT 0xffff +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) + /* L2 -mac address based- hash helpers */ struct l2addr_node { struct hlist_node hlist; @@ -115,6 +121,7 @@ struct mlx5_vport_info { u8 qos; u64 node_guid; int link_state; + u32 min_rate; u32 max_rate; bool spoofchk; bool trusted; @@ -137,6 +144,7 @@ struct mlx5_vport { struct { bool enabled; u32 esw_tsar_ix; + u32 bw_share; } qos; bool enabled; @@ -248,8 +256,8 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk); int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, int vport_num, bool setting); -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, - int vport, u32 max_rate); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u32 max_rate, u32 min_rate); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -274,18 +282,12 @@ enum { #define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 -struct mlx5_encap_info { - __be32 daddr; - __be32 tun_id; - __be16 tp_dst; -}; - struct mlx5_encap_entry { struct hlist_node encap_hlist; struct list_head flows; u32 encap_id; struct neighbour *n; - struct mlx5_encap_info tun_info; + struct ip_tunnel_info tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 595f7c7383b3..4f5b0d47d5f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -402,19 +402,18 @@ out: } #define MAX_PF_SQ 256 -#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */ #define ESW_OFFLOADS_NUM_GROUPS 4 static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int table_size, ix, esw_size, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; struct mlx5_flow_group *g; u32 *flow_group_in; void *match_criteria; - int table_size, ix, err = 0; u32 flags = 0; flow_group_in = mlx5_vzalloc(inlen); @@ -428,15 +427,19 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) goto ns_err; } - esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), + MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS); + + esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, + 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN; fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - ESW_OFFLOADS_NUM_ENTRIES, + esw_size, ESW_OFFLOADS_NUM_GROUPS, 0, flags); if (IS_ERR(fdb)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index b53fc85a2375..b64a781c7e85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -473,10 +473,13 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev, int err; u32 *in; - if (size > MLX5_CAP_ESW(dev, max_encap_header_size)) + if (size > max_encap_size) { + mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", + size, max_encap_size); return -EINVAL; + } - in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + max_encap_size, + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size, GFP_KERNEL); if (!in) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6346a8f5883b..ce3d92106386 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1665,7 +1665,7 @@ static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, #define FLOW_TABLE_BIT_SZ 1 #define GET_FLOW_TABLE_CAP(dev, offset) \ - ((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) + \ + ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \ offset / 32)) >> \ (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 5718aada6605..d0bbefa08af7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -91,6 +91,20 @@ out: } EXPORT_SYMBOL(mlx5_core_query_vendor_id); +static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_pcam_reg(dev, dev->caps.pcam, + MLX5_PCAM_FEATURE_ENHANCED_FEATURES, + MLX5_PCAM_REGS_5000_TO_507F); +} + +static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_mcam_reg(dev, dev->caps.mcam, + MLX5_MCAM_FEATURE_ENHANCED_FEATURES, + MLX5_MCAM_REGS_FIRST_128); +} + int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { int err; @@ -154,6 +168,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, pcam_reg)) + mlx5_get_pcam_reg(dev); + + if (MLX5_CAP_GEN(dev, mcam_reg)) + mlx5_get_mcam_reg(dev); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 5bcf93422ee0..d0515391d33b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -231,21 +231,6 @@ static const char *hsynd_str(u8 synd) } } -static u16 get_maj(u32 fw) -{ - return fw >> 28; -} - -static u16 get_min(u32 fw) -{ - return fw >> 16 & 0xfff; -} - -static u16 get_sub(u32 fw) -{ - return fw & 0xffff; -} - static void print_health_info(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -263,13 +248,14 @@ static void print_health_info(struct mlx5_core_dev *dev) dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr)); dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra)); - fw = ioread32be(&h->fw_ver); - sprintf(fw_str, "%d.%d.%d", get_maj(fw), get_min(fw), get_sub(fw)); + sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str); dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index)); dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); + fw = ioread32be(&h->fw_ver); + dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw); } static unsigned long get_next_poll_jiffies(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3c315eb8d270..c4242a4e8130 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -152,6 +152,26 @@ static struct mlx5_profile profile[] = { .size = 8, .limit = 4 }, + .mr_cache[16] = { + .size = 8, + .limit = 4 + }, + .mr_cache[17] = { + .size = 8, + .limit = 4 + }, + .mr_cache[18] = { + .size = 8, + .limit = 4 + }, + .mr_cache[19] = { + .size = 4, + .limit = 2 + }, + .mr_cache[20] = { + .size = 4, + .limit = 2 + }, }, }; @@ -398,11 +418,11 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, switch (cap_mode) { case HCA_CAP_OPMOD_GET_MAX: - memcpy(dev->hca_caps_max[cap_type], hca_caps, + memcpy(dev->caps.hca_max[cap_type], hca_caps, MLX5_UN_SZ_BYTES(hca_cap_union)); break; case HCA_CAP_OPMOD_GET_CUR: - memcpy(dev->hca_caps_cur[cap_type], hca_caps, + memcpy(dev->caps.hca_cur[cap_type], hca_caps, MLX5_UN_SZ_BYTES(hca_cap_union)); break; default: @@ -493,7 +513,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); - memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL], + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL], MLX5_ST_SZ_BYTES(cmd_hca_cap)); mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", @@ -517,8 +537,18 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) /* disable cmdif checksum */ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + /* If the HCA supports 4K UARs use it */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k)) + MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); + if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + cache_line_128byte, + cache_line_size() == 128 ? 1 : 0); + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); @@ -739,7 +769,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - name, &dev->priv.uuari.uars[0]); + name, MLX5_EQ_TYPE_COMP); if (err) { kfree(eq); goto clean; @@ -899,8 +929,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } - MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); - err = mlx5_init_cq_table(dev); if (err) { dev_err(&pdev->dev, "failed to initialize cq table\n"); @@ -1079,8 +1107,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_cleanup_once; } - err = mlx5_alloc_uuars(dev, &priv->uuari); - if (err) { + dev->priv.uar = mlx5_get_uars_page(dev); + if (!dev->priv.uar) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); goto err_disable_msix; } @@ -1088,7 +1116,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, err = mlx5_start_eqs(dev); if (err) { dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); - goto err_free_uar; + goto err_put_uars; } err = alloc_comp_eqs(dev); @@ -1154,8 +1182,8 @@ err_affinity_hints: err_stop_eqs: mlx5_stop_eqs(dev); -err_free_uar: - mlx5_free_uuars(dev, &priv->uuari); +err_put_uars: + mlx5_put_uars_page(dev, priv->uar); err_disable_msix: mlx5_disable_msix(dev); @@ -1218,7 +1246,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); - mlx5_free_uuars(dev, &priv->uuari); + mlx5_put_uars_page(dev, priv->uar); mlx5_disable_msix(dev); if (cleanup) mlx5_cleanup_once(dev); @@ -1284,10 +1312,24 @@ static int init_one(struct pci_dev *pdev, spin_lock_init(&priv->ctx_lock); mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + err = init_srcu_struct(&priv->pfault_srcu); + if (err) { + dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n", + err); + goto clean_dev; + } +#endif + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); + INIT_LIST_HEAD(&priv->bfregs.reg_head.list); + INIT_LIST_HEAD(&priv->bfregs.wc_head.list); + err = mlx5_pci_init(dev, priv); if (err) { dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); - goto clean_dev; + goto clean_srcu; } err = mlx5_health_init(dev); @@ -1304,9 +1346,7 @@ static int init_one(struct pci_dev *pdev, goto clean_health; } - err = request_module_nowait(MLX5_IB_MOD); - if (err) - pr_info("failed request module on %s\n", MLX5_IB_MOD); + request_module_nowait(MLX5_IB_MOD); err = devlink_register(devlink, &pdev->dev); if (err) @@ -1321,7 +1361,11 @@ clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); +clean_srcu: +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: +#endif pci_set_drvdata(pdev, NULL); devlink_free(devlink); @@ -1346,6 +1390,9 @@ static void remove_one(struct pci_dev *pdev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + cleanup_srcu_struct(&priv->pfault_srcu); +#endif pci_set_drvdata(pdev, NULL); devlink_free(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index d4a99c9757cb..b3dabe6e8836 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -86,6 +86,8 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); +void mlx5_core_page_fault(struct mlx5_core_dev *dev, + struct mlx5_pagefault *pfault); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); @@ -111,6 +113,11 @@ u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); void mlx5_cq_tasklet_cb(unsigned long data); +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group); +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group, + u8 access_reg_group); + void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); @@ -136,6 +143,11 @@ void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); +int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); + void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index fd12e0a377a5..141583daf5a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -74,6 +74,30 @@ out: } EXPORT_SYMBOL_GPL(mlx5_core_access_reg); +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(pcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(pcam_reg); + + MLX5_SET(pcam_reg, in, feature_group, feature_group); + MLX5_SET(pcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, pcam, sz, MLX5_REG_PCAM, 0, 0); +} + +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(mcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mcam_reg); + + MLX5_SET(mcam_reg, in, feature_group, feature_group); + MLX5_SET(mcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0); +} + struct mlx5_reg_pcap { u8 rsvd0; u8 port_num; @@ -866,3 +890,51 @@ void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) module_num, mlx5_pme_status[module_status - 1], mlx5_pme_error[error_type]); } + +int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps, + mtpps_size, MLX5_REG_MTPPS, 0, 0); +} + +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, mtpps, mtpps_size, out, + sizeof(out), MLX5_REG_MTPPS, 0, 1); +} + +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + int err = 0; + + MLX5_SET(mtppse_reg, in, pin, pin); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 0); + if (err) + return err; + + *arm = MLX5_GET(mtppse_reg, in, event_arm); + *mode = MLX5_GET(mtppse_reg, in, event_generation_mode); + + return err; +} + +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + + MLX5_SET(mtppse_reg, in, pin, pin); + MLX5_SET(mtppse_reg, in, event_arm, arm); + MLX5_SET(mtppse_reg, in, event_generation_mode, mode); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 1); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index d0a4005fe63a..cbbcef2884be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -143,95 +143,6 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) mlx5_core_put_rsc(common); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) -{ - struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault; - int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK; - struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn); - struct mlx5_core_qp *qp = - container_of(common, struct mlx5_core_qp, common); - struct mlx5_pagefault pfault; - - if (!qp) { - mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n", - qpn); - return; - } - - pfault.event_subtype = eqe->sub_type; - pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) & - (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA); - pfault.bytes_committed = be32_to_cpu( - pf_eqe->bytes_committed); - - mlx5_core_dbg(dev, - "PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n", - eqe->sub_type, pfault.flags); - - switch (eqe->sub_type) { - case MLX5_PFAULT_SUBTYPE_RDMA: - /* RDMA based event */ - pfault.rdma.r_key = - be32_to_cpu(pf_eqe->rdma.r_key); - pfault.rdma.packet_size = - be16_to_cpu(pf_eqe->rdma.packet_length); - pfault.rdma.rdma_op_len = - be32_to_cpu(pf_eqe->rdma.rdma_op_len); - pfault.rdma.rdma_va = - be64_to_cpu(pf_eqe->rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n", - qpn, pfault.rdma.r_key); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_op_len: 0x%08x,\n", - pfault.rdma.rdma_op_len); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_va: 0x%016llx,\n", - pfault.rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: bytes_committed: 0x%06x\n", - pfault.bytes_committed); - break; - - case MLX5_PFAULT_SUBTYPE_WQE: - /* WQE based event */ - pfault.wqe.wqe_index = - be16_to_cpu(pf_eqe->wqe.wqe_index); - pfault.wqe.packet_size = - be16_to_cpu(pf_eqe->wqe.packet_length); - mlx5_core_dbg(dev, - "PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n", - qpn, pfault.wqe.wqe_index); - mlx5_core_dbg(dev, - "PAGE_FAULT: bytes_committed: 0x%06x\n", - pfault.bytes_committed); - break; - - default: - mlx5_core_warn(dev, - "Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n", - eqe->sub_type, qpn); - /* Unsupported page faults should still be resolved by the - * page fault handler - */ - } - - if (qp->pfault_handler) { - qp->pfault_handler(qp, &pfault); - } else { - mlx5_core_err(dev, - "ODP event for QP %08x, without a fault handler in QP\n", - qpn); - /* Page fault will remain unresolved. QP will hang until it is - * destroyed - */ - } - - mlx5_core_put_rsc(common); -} -#endif - static int create_qprqsq_common(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, int rsc_type) @@ -506,31 +417,6 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, - u8 flags, int error) -{ - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - - MLX5_SET(page_fault_resume_in, in, opcode, - MLX5_CMD_OP_PAGE_FAULT_RESUME); - MLX5_SET(page_fault_resume_in, in, qpn, qpn); - - if (flags & MLX5_PAGE_FAULT_RESUME_REQUESTOR) - MLX5_SET(page_fault_resume_in, in, req_res, 1); - if (flags & MLX5_PAGE_FAULT_RESUME_WRITE) - MLX5_SET(page_fault_resume_in, in, read_write, 1); - if (flags & MLX5_PAGE_FAULT_RESUME_RDMA) - MLX5_SET(page_fault_resume_in, in, rdma, 1); - if (error) - MLX5_SET(page_fault_resume_in, in, error, 1); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} -EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); -#endif - int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *rq) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index ab0b896621a0..2e6b0f290ddc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -37,11 +37,6 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -enum { - NUM_DRIVER_UARS = 4, - NUM_LOW_LAT_UUARS = 4, -}; - int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) { u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0}; @@ -67,167 +62,269 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) } EXPORT_SYMBOL(mlx5_cmd_free_uar); -static int need_uuar_lock(int uuarn) +static int uars_per_sys_page(struct mlx5_core_dev *mdev) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - - if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS) - return 0; + if (MLX5_CAP_GEN(mdev, uar_4k)) + return MLX5_CAP_GEN(mdev, num_of_uars_per_page); return 1; } -int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - struct mlx5_bf *bf; - phys_addr_t addr; - int err; + u32 system_page_index; + + if (MLX5_CAP_GEN(mdev, uar_4k)) + system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT); + else + system_page_index = index; + + return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index; +} + +static void up_rel_func(struct kref *kref) +{ + struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); + + list_del(&up->list); + if (mlx5_cmd_free_uar(up->mdev, up->index)) + mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); + kfree(up->reg_bitmap); + kfree(up->fp_bitmap); + kfree(up); +} + +static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, + bool map_wc) +{ + struct mlx5_uars_page *up; + int err = -ENOMEM; + phys_addr_t pfn; + int bfregs; int i; - uuari->num_uars = NUM_DRIVER_UARS; - uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS; + bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; + up = kzalloc(sizeof(*up), GFP_KERNEL); + if (!up) + return ERR_PTR(err); - mutex_init(&uuari->lock); - uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL); - if (!uuari->uars) - return -ENOMEM; + up->mdev = mdev; + up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + if (!up->reg_bitmap) + goto error1; - uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL); - if (!uuari->bfs) { - err = -ENOMEM; - goto out_uars; - } + up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + if (!up->fp_bitmap) + goto error1; - uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap), - GFP_KERNEL); - if (!uuari->bitmap) { - err = -ENOMEM; - goto out_bfs; - } + for (i = 0; i < bfregs; i++) + if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR) + set_bit(i, up->reg_bitmap); + else + set_bit(i, up->fp_bitmap); - uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL); - if (!uuari->count) { - err = -ENOMEM; - goto out_bitmap; - } + up->bfregs = bfregs; + up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; - for (i = 0; i < uuari->num_uars; i++) { - err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index); - if (err) - goto out_count; + err = mlx5_cmd_alloc_uar(mdev, &up->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + goto error1; + } - addr = dev->iseg_base + ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT); - uuari->uars[i].map = ioremap(addr, PAGE_SIZE); - if (!uuari->uars[i].map) { - mlx5_cmd_free_uar(dev, uuari->uars[i].index); + pfn = uar2pfn(mdev, up->index); + if (map_wc) { + up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -EAGAIN; + goto error2; + } + } else { + up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { err = -ENOMEM; - goto out_count; + goto error2; } - mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n", - uuari->uars[i].index, uuari->uars[i].map); - } - - for (i = 0; i < tot_uuars; i++) { - bf = &uuari->bfs[i]; - - bf->buf_size = (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) / 2; - bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; - bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; - bf->reg = NULL; /* Add WC support */ - bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * - (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) + - MLX5_BF_OFFSET; - bf->need_lock = need_uuar_lock(i); - spin_lock_init(&bf->lock); - spin_lock_init(&bf->lock32); - bf->uuarn = i; } + kref_init(&up->ref_count); + mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n", + up->index, up->bfregs); + return up; + +error2: + if (mlx5_cmd_free_uar(mdev, up->index)) + mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); +error1: + kfree(up->fp_bitmap); + kfree(up->reg_bitmap); + kfree(up); + return ERR_PTR(err); +} - return 0; - -out_count: - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_uars_page *ret; + + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + if (list_empty(&mdev->priv.bfregs.reg_head.list)) { + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) { + ret = NULL; + goto out; + } + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); + } else { + ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, + struct mlx5_uars_page, list); + kref_get(&ret->ref_count); } - kfree(uuari->count); +out: + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); -out_bitmap: - kfree(uuari->bitmap); - -out_bfs: - kfree(uuari->bfs); + return ret; +} +EXPORT_SYMBOL(mlx5_get_uars_page); -out_uars: - kfree(uuari->uars); - return err; +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up) +{ + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); } +EXPORT_SYMBOL(mlx5_put_uars_page); -int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi) { - int i = uuari->num_uars; + /* return the offset in bytes from the start of the page to the + * blue flame area of the UAR + */ + return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE + + (dbi % MLX5_BFREGS_PER_UAR) * + (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET; +} - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); +static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct list_head *head; + unsigned long *bitmap; + unsigned int *avail; + struct mutex *lock; /* pointer to right mutex */ + int dbi; + + bfregs = &mdev->priv.bfregs; + if (map_wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; } - - kfree(uuari->count); - kfree(uuari->bitmap); - kfree(uuari->bfs); - kfree(uuari->uars); + mutex_lock(lock); + if (list_empty(head)) { + up = alloc_uars_page(mdev, map_wc); + if (IS_ERR(up)) { + mutex_unlock(lock); + return PTR_ERR(up); + } + list_add(&up->list, head); + } else { + up = list_entry(head->next, struct mlx5_uars_page, list); + kref_get(&up->ref_count); + } + if (fast_path) { + bitmap = up->fp_bitmap; + avail = &up->fp_avail; + } else { + bitmap = up->reg_bitmap; + avail = &up->reg_avail; + } + dbi = find_first_bit(bitmap, up->bfregs); + clear_bit(dbi, bitmap); + (*avail)--; + if (!(*avail)) + list_del(&up->list); + + bfreg->map = up->map + map_offset(mdev, dbi); + bfreg->up = up; + bfreg->wc = map_wc; + bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR; + mutex_unlock(lock); return 0; } -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar, - bool map_wc) +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) { - phys_addr_t pfn; - phys_addr_t uar_bar_start; int err; - err = mlx5_cmd_alloc_uar(mdev, &uar->index); - if (err) { - mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); - return err; - } + err = alloc_bfreg(mdev, bfreg, map_wc, fast_path); + if (!err) + return 0; - uar_bar_start = pci_resource_start(mdev->pdev, 0); - pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; + if (err == -EAGAIN && map_wc) + return alloc_bfreg(mdev, bfreg, false, fast_path); - if (map_wc) { - uar->bf_map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->bf_map) { - mlx5_core_warn(mdev, "ioremap_wc() failed\n"); - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) - goto err_free_uar; - } - } else { - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) - goto err_free_uar; - } + return err; +} +EXPORT_SYMBOL(mlx5_alloc_bfreg); - return 0; +static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev, + struct mlx5_uars_page *up, + struct mlx5_sq_bfreg *bfreg) +{ + unsigned int uar_idx; + unsigned int bfreg_idx; + unsigned int bf_reg_size; -err_free_uar: - mlx5_core_warn(mdev, "ioremap() failed\n"); - err = -ENOMEM; - mlx5_cmd_free_uar(mdev, uar->index); + bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size); - return err; + uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT; + bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size; + + return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx; } -EXPORT_SYMBOL(mlx5_alloc_map_uar); -void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg) { - if (uar->map) - iounmap(uar->map); - else - iounmap(uar->bf_map); - mlx5_cmd_free_uar(mdev, uar->index); + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct mutex *lock; /* pointer to right mutex */ + unsigned int dbi; + bool fp; + unsigned int *avail; + unsigned long *bitmap; + struct list_head *head; + + bfregs = &mdev->priv.bfregs; + if (bfreg->wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + up = bfreg->up; + dbi = addr_to_dbi_in_syspage(mdev, up, bfreg); + fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR; + if (fp) { + avail = &up->fp_avail; + bitmap = up->fp_bitmap; + } else { + avail = &up->reg_avail; + bitmap = up->reg_bitmap; + } + mutex_lock(lock); + (*avail)++; + set_bit(dbi, bitmap); + if (*avail == 1) + list_add_tail(&up->list, head); + + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(lock); } -EXPORT_SYMBOL(mlx5_unmap_free_uar); +EXPORT_SYMBOL(mlx5_free_bfreg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 7129c30a2ab4..15c2294dd2b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -127,6 +127,23 @@ int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); +void mlx5_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); + break; + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} +EXPORT_SYMBOL_GPL(mlx5_query_min_inline); + int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, u16 vport, u8 min_inline) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 16f44b9aa076..ef23eaedc2ff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -73,6 +73,8 @@ config MLXSW_SWITCHX2 config MLXSW_SPECTRUM tristate "Mellanox Technologies Spectrum support" depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q + depends on PSAMPLE || PSAMPLE=n + select PARMAN default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index fe8dadba15ab..6b6c30deee83 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o -mlxsw_core-objs := core.o +mlxsw_core-objs := core.o core_acl_flex_keys.o \ + core_acl_flex_actions.o mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o @@ -13,7 +14,8 @@ mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o spectrum_router.o \ - spectrum_kvdl.o + spectrum_kvdl.o spectrum_acl_tcam.o \ + spectrum_acl.o spectrum_flower.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index 56e19b0d2f8f..a1b48421648a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -1132,12 +1132,12 @@ static inline int mlxsw_cmd_sw2hw_eq(struct mlxsw_core *mlxsw_core, */ MLXSW_ITEM32(cmd_mbox, sw2hw_eq, int_msix, 0x00, 24, 1); -/* cmd_mbox_sw2hw_eq_int_oi +/* cmd_mbox_sw2hw_eq_oi * When set, overrun ignore is enabled. */ MLXSW_ITEM32(cmd_mbox, sw2hw_eq, oi, 0x00, 12, 1); -/* cmd_mbox_sw2hw_eq_int_st +/* cmd_mbox_sw2hw_eq_st * Event delivery state machine * 0x0 - FIRED * 0x1 - ARMED (Request for Notification) @@ -1146,19 +1146,19 @@ MLXSW_ITEM32(cmd_mbox, sw2hw_eq, oi, 0x00, 12, 1); */ MLXSW_ITEM32(cmd_mbox, sw2hw_eq, st, 0x00, 8, 2); -/* cmd_mbox_sw2hw_eq_int_log_eq_size +/* cmd_mbox_sw2hw_eq_log_eq_size * Log (base 2) of the EQ size (in entries). */ MLXSW_ITEM32(cmd_mbox, sw2hw_eq, log_eq_size, 0x00, 0, 4); -/* cmd_mbox_sw2hw_eq_int_producer_counter +/* cmd_mbox_sw2hw_eq_producer_counter * Producer Counter. The counter is incremented for each EQE that is written * by the HW to the EQ. * Maintained by HW (valid for the QUERY_EQ command only) */ MLXSW_ITEM32(cmd_mbox, sw2hw_eq, producer_counter, 0x04, 0, 16); -/* cmd_mbox_sw2hw_eq_int_pa +/* cmd_mbox_sw2hw_eq_pa * Physical Address. */ MLXSW_ITEM64_INDEXED(cmd_mbox, sw2hw_eq, pa, 0x10, 11, 53, 0x08, 0x00, true); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 57a98849551b..a4c07841aaf6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1901,11 +1901,11 @@ int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay) } EXPORT_SYMBOL(mlxsw_core_schedule_dw); -int mlxsw_core_schedule_odw(struct delayed_work *dwork, unsigned long delay) +bool mlxsw_core_schedule_work(struct work_struct *work) { - return queue_delayed_work(mlxsw_owq, dwork, delay); + return queue_work(mlxsw_owq, work); } -EXPORT_SYMBOL(mlxsw_core_schedule_odw); +EXPORT_SYMBOL(mlxsw_core_schedule_work); void mlxsw_core_flush_owq(void) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index a7f94fbc898b..cf38cf9027f8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -207,7 +207,7 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core, u8 local_port); int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); -int mlxsw_core_schedule_odw(struct delayed_work *dwork, unsigned long delay); +bool mlxsw_core_schedule_work(struct work_struct *work); void mlxsw_core_flush_owq(void); #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c new file mode 100644 index 000000000000..5f337715a4da --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -0,0 +1,679 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/rhashtable.h> +#include <linux/list.h> + +#include "item.h" +#include "core_acl_flex_actions.h" + +enum mlxsw_afa_set_type { + MLXSW_AFA_SET_TYPE_NEXT, + MLXSW_AFA_SET_TYPE_GOTO, +}; + +/* afa_set_type + * Type of the record at the end of the action set. + */ +MLXSW_ITEM32(afa, set, type, 0xA0, 28, 4); + +/* afa_set_next_action_set_ptr + * A pointer to the next action set in the KVD Centralized database. + */ +MLXSW_ITEM32(afa, set, next_action_set_ptr, 0xA4, 0, 24); + +/* afa_set_goto_g + * group - When set, the binding is of an ACL group. When cleared, + * the binding is of an ACL. + * Must be set to 1 for Spectrum. + */ +MLXSW_ITEM32(afa, set, goto_g, 0xA4, 29, 1); + +enum mlxsw_afa_set_goto_binding_cmd { + /* continue go the next binding point */ + MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, + /* jump to the next binding point no return */ + MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, + /* terminate the acl binding */ + MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM = 4, +}; + +/* afa_set_goto_binding_cmd */ +MLXSW_ITEM32(afa, set, goto_binding_cmd, 0xA4, 24, 3); + +/* afa_set_goto_next_binding + * ACL/ACL group identifier. If the g bit is set, this field should hold + * the acl_group_id, else it should hold the acl_id. + */ +MLXSW_ITEM32(afa, set, goto_next_binding, 0xA4, 0, 16); + +/* afa_all_action_type + * Action Type. + */ +MLXSW_ITEM32(afa, all, action_type, 0x00, 24, 6); + +struct mlxsw_afa { + unsigned int max_acts_per_set; + const struct mlxsw_afa_ops *ops; + void *ops_priv; + struct rhashtable set_ht; + struct rhashtable fwd_entry_ht; +}; + +#define MLXSW_AFA_SET_LEN 0xA8 + +struct mlxsw_afa_set_ht_key { + char enc_actions[MLXSW_AFA_SET_LEN]; /* Encoded set */ + bool is_first; +}; + +/* Set structure holds one action set record. It contains up to three + * actions (depends on size of particular actions). The set is either + * put directly to a rule, or it is stored in KVD linear area. + * To prevent duplicate entries in KVD linear area, a hashtable is + * used to track sets that were previously inserted and may be shared. + */ + +struct mlxsw_afa_set { + struct rhash_head ht_node; + struct mlxsw_afa_set_ht_key ht_key; + u32 kvdl_index; + bool shared; /* Inserted in hashtable (doesn't mean that + * kvdl_index is valid). + */ + unsigned int ref_count; + struct mlxsw_afa_set *next; /* Pointer to the next set. */ + struct mlxsw_afa_set *prev; /* Pointer to the previous set, + * note that set may have multiple + * sets from multiple blocks + * pointing at it. This is only + * usable until commit. + */ +}; + +static const struct rhashtable_params mlxsw_afa_set_ht_params = { + .key_len = sizeof(struct mlxsw_afa_set_ht_key), + .key_offset = offsetof(struct mlxsw_afa_set, ht_key), + .head_offset = offsetof(struct mlxsw_afa_set, ht_node), + .automatic_shrinking = true, +}; + +struct mlxsw_afa_fwd_entry_ht_key { + u8 local_port; +}; + +struct mlxsw_afa_fwd_entry { + struct rhash_head ht_node; + struct mlxsw_afa_fwd_entry_ht_key ht_key; + u32 kvdl_index; + unsigned int ref_count; +}; + +static const struct rhashtable_params mlxsw_afa_fwd_entry_ht_params = { + .key_len = sizeof(struct mlxsw_afa_fwd_entry_ht_key), + .key_offset = offsetof(struct mlxsw_afa_fwd_entry, ht_key), + .head_offset = offsetof(struct mlxsw_afa_fwd_entry, ht_node), + .automatic_shrinking = true, +}; + +struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, + const struct mlxsw_afa_ops *ops, + void *ops_priv) +{ + struct mlxsw_afa *mlxsw_afa; + int err; + + mlxsw_afa = kzalloc(sizeof(*mlxsw_afa), GFP_KERNEL); + if (!mlxsw_afa) + return ERR_PTR(-ENOMEM); + err = rhashtable_init(&mlxsw_afa->set_ht, &mlxsw_afa_set_ht_params); + if (err) + goto err_set_rhashtable_init; + err = rhashtable_init(&mlxsw_afa->fwd_entry_ht, + &mlxsw_afa_fwd_entry_ht_params); + if (err) + goto err_fwd_entry_rhashtable_init; + mlxsw_afa->max_acts_per_set = max_acts_per_set; + mlxsw_afa->ops = ops; + mlxsw_afa->ops_priv = ops_priv; + return mlxsw_afa; + +err_fwd_entry_rhashtable_init: + rhashtable_destroy(&mlxsw_afa->set_ht); +err_set_rhashtable_init: + kfree(mlxsw_afa); + return ERR_PTR(err); +} +EXPORT_SYMBOL(mlxsw_afa_create); + +void mlxsw_afa_destroy(struct mlxsw_afa *mlxsw_afa) +{ + rhashtable_destroy(&mlxsw_afa->fwd_entry_ht); + rhashtable_destroy(&mlxsw_afa->set_ht); + kfree(mlxsw_afa); +} +EXPORT_SYMBOL(mlxsw_afa_destroy); + +static void mlxsw_afa_set_goto_set(struct mlxsw_afa_set *set, + enum mlxsw_afa_set_goto_binding_cmd cmd, + u16 group_id) +{ + char *actions = set->ht_key.enc_actions; + + mlxsw_afa_set_type_set(actions, MLXSW_AFA_SET_TYPE_GOTO); + mlxsw_afa_set_goto_g_set(actions, true); + mlxsw_afa_set_goto_binding_cmd_set(actions, cmd); + mlxsw_afa_set_goto_next_binding_set(actions, group_id); +} + +static void mlxsw_afa_set_next_set(struct mlxsw_afa_set *set, + u32 next_set_kvdl_index) +{ + char *actions = set->ht_key.enc_actions; + + mlxsw_afa_set_type_set(actions, MLXSW_AFA_SET_TYPE_NEXT); + mlxsw_afa_set_next_action_set_ptr_set(actions, next_set_kvdl_index); +} + +static struct mlxsw_afa_set *mlxsw_afa_set_create(bool is_first) +{ + struct mlxsw_afa_set *set; + + set = kzalloc(sizeof(*set), GFP_KERNEL); + if (!set) + return NULL; + /* Need to initialize the set to pass by default */ + mlxsw_afa_set_goto_set(set, MLXSW_AFA_SET_GOTO_BINDING_CMD_TERM, 0); + set->ht_key.is_first = is_first; + set->ref_count = 1; + return set; +} + +static void mlxsw_afa_set_destroy(struct mlxsw_afa_set *set) +{ + kfree(set); +} + +static int mlxsw_afa_set_share(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_set *set) +{ + int err; + + err = rhashtable_insert_fast(&mlxsw_afa->set_ht, &set->ht_node, + mlxsw_afa_set_ht_params); + if (err) + return err; + err = mlxsw_afa->ops->kvdl_set_add(mlxsw_afa->ops_priv, + &set->kvdl_index, + set->ht_key.enc_actions, + set->ht_key.is_first); + if (err) + goto err_kvdl_set_add; + set->shared = true; + set->prev = NULL; + return 0; + +err_kvdl_set_add: + rhashtable_remove_fast(&mlxsw_afa->set_ht, &set->ht_node, + mlxsw_afa_set_ht_params); + return err; +} + +static void mlxsw_afa_set_unshare(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_set *set) +{ + mlxsw_afa->ops->kvdl_set_del(mlxsw_afa->ops_priv, + set->kvdl_index, + set->ht_key.is_first); + rhashtable_remove_fast(&mlxsw_afa->set_ht, &set->ht_node, + mlxsw_afa_set_ht_params); + set->shared = false; +} + +static void mlxsw_afa_set_put(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_set *set) +{ + if (--set->ref_count) + return; + if (set->shared) + mlxsw_afa_set_unshare(mlxsw_afa, set); + mlxsw_afa_set_destroy(set); +} + +static struct mlxsw_afa_set *mlxsw_afa_set_get(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_set *orig_set) +{ + struct mlxsw_afa_set *set; + int err; + + /* There is a hashtable of sets maintained. If a set with the exact + * same encoding exists, we reuse it. Otherwise, the current set + * is shared by making it available to others using the hash table. + */ + set = rhashtable_lookup_fast(&mlxsw_afa->set_ht, &orig_set->ht_key, + mlxsw_afa_set_ht_params); + if (set) { + set->ref_count++; + mlxsw_afa_set_put(mlxsw_afa, orig_set); + } else { + set = orig_set; + err = mlxsw_afa_set_share(mlxsw_afa, set); + if (err) + return ERR_PTR(err); + } + return set; +} + +/* Block structure holds a list of action sets. One action block + * represents one chain of actions executed upon match of a rule. + */ + +struct mlxsw_afa_block { + struct mlxsw_afa *afa; + bool finished; + struct mlxsw_afa_set *first_set; + struct mlxsw_afa_set *cur_set; + unsigned int cur_act_index; /* In current set. */ + struct list_head fwd_entry_ref_list; +}; + +struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa) +{ + struct mlxsw_afa_block *block; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return NULL; + INIT_LIST_HEAD(&block->fwd_entry_ref_list); + block->afa = mlxsw_afa; + + /* At least one action set is always present, so just create it here */ + block->first_set = mlxsw_afa_set_create(true); + if (!block->first_set) + goto err_first_set_create; + block->cur_set = block->first_set; + return block; + +err_first_set_create: + kfree(block); + return NULL; +} +EXPORT_SYMBOL(mlxsw_afa_block_create); + +static void mlxsw_afa_fwd_entry_refs_destroy(struct mlxsw_afa_block *block); + +void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block) +{ + struct mlxsw_afa_set *set = block->first_set; + struct mlxsw_afa_set *next_set; + + do { + next_set = set->next; + mlxsw_afa_set_put(block->afa, set); + set = next_set; + } while (set); + mlxsw_afa_fwd_entry_refs_destroy(block); + kfree(block); +} +EXPORT_SYMBOL(mlxsw_afa_block_destroy); + +int mlxsw_afa_block_commit(struct mlxsw_afa_block *block) +{ + struct mlxsw_afa_set *set = block->cur_set; + struct mlxsw_afa_set *prev_set; + + block->cur_set = NULL; + block->finished = true; + + /* Go over all linked sets starting from last + * and try to find existing set in the hash table. + * In case it is not there, assign a KVD linear index + * and insert it. + */ + do { + prev_set = set->prev; + set = mlxsw_afa_set_get(block->afa, set); + if (IS_ERR(set)) + /* No rollback is needed since the chain is + * in consistent state and mlxsw_afa_block_destroy + * will take care of putting it away. + */ + return PTR_ERR(set); + if (prev_set) { + prev_set->next = set; + mlxsw_afa_set_next_set(prev_set, set->kvdl_index); + set = prev_set; + } + } while (prev_set); + + block->first_set = set; + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_commit); + +char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block) +{ + return block->first_set->ht_key.enc_actions; +} +EXPORT_SYMBOL(mlxsw_afa_block_first_set); + +u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block) +{ + return block->first_set->kvdl_index; +} +EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index); + +void mlxsw_afa_block_continue(struct mlxsw_afa_block *block) +{ + if (WARN_ON(block->finished)) + return; + mlxsw_afa_set_goto_set(block->cur_set, + MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0); + block->finished = true; +} +EXPORT_SYMBOL(mlxsw_afa_block_continue); + +void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) +{ + if (WARN_ON(block->finished)) + return; + mlxsw_afa_set_goto_set(block->cur_set, + MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id); + block->finished = true; +} +EXPORT_SYMBOL(mlxsw_afa_block_jump); + +static struct mlxsw_afa_fwd_entry * +mlxsw_afa_fwd_entry_create(struct mlxsw_afa *mlxsw_afa, u8 local_port) +{ + struct mlxsw_afa_fwd_entry *fwd_entry; + int err; + + fwd_entry = kzalloc(sizeof(*fwd_entry), GFP_KERNEL); + if (!fwd_entry) + return ERR_PTR(-ENOMEM); + fwd_entry->ht_key.local_port = local_port; + fwd_entry->ref_count = 1; + + err = rhashtable_insert_fast(&mlxsw_afa->fwd_entry_ht, + &fwd_entry->ht_node, + mlxsw_afa_fwd_entry_ht_params); + if (err) + goto err_rhashtable_insert; + + err = mlxsw_afa->ops->kvdl_fwd_entry_add(mlxsw_afa->ops_priv, + &fwd_entry->kvdl_index, + local_port); + if (err) + goto err_kvdl_fwd_entry_add; + return fwd_entry; + +err_kvdl_fwd_entry_add: + rhashtable_remove_fast(&mlxsw_afa->fwd_entry_ht, &fwd_entry->ht_node, + mlxsw_afa_fwd_entry_ht_params); +err_rhashtable_insert: + kfree(fwd_entry); + return ERR_PTR(err); +} + +static void mlxsw_afa_fwd_entry_destroy(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_fwd_entry *fwd_entry) +{ + mlxsw_afa->ops->kvdl_fwd_entry_del(mlxsw_afa->ops_priv, + fwd_entry->kvdl_index); + rhashtable_remove_fast(&mlxsw_afa->fwd_entry_ht, &fwd_entry->ht_node, + mlxsw_afa_fwd_entry_ht_params); + kfree(fwd_entry); +} + +static struct mlxsw_afa_fwd_entry * +mlxsw_afa_fwd_entry_get(struct mlxsw_afa *mlxsw_afa, u8 local_port) +{ + struct mlxsw_afa_fwd_entry_ht_key ht_key = {0}; + struct mlxsw_afa_fwd_entry *fwd_entry; + + ht_key.local_port = local_port; + fwd_entry = rhashtable_lookup_fast(&mlxsw_afa->fwd_entry_ht, &ht_key, + mlxsw_afa_fwd_entry_ht_params); + if (fwd_entry) { + fwd_entry->ref_count++; + return fwd_entry; + } + return mlxsw_afa_fwd_entry_create(mlxsw_afa, local_port); +} + +static void mlxsw_afa_fwd_entry_put(struct mlxsw_afa *mlxsw_afa, + struct mlxsw_afa_fwd_entry *fwd_entry) +{ + if (--fwd_entry->ref_count) + return; + mlxsw_afa_fwd_entry_destroy(mlxsw_afa, fwd_entry); +} + +struct mlxsw_afa_fwd_entry_ref { + struct list_head list; + struct mlxsw_afa_fwd_entry *fwd_entry; +}; + +static struct mlxsw_afa_fwd_entry_ref * +mlxsw_afa_fwd_entry_ref_create(struct mlxsw_afa_block *block, u8 local_port) +{ + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; + struct mlxsw_afa_fwd_entry *fwd_entry; + int err; + + fwd_entry_ref = kzalloc(sizeof(*fwd_entry_ref), GFP_KERNEL); + if (!fwd_entry_ref) + return ERR_PTR(-ENOMEM); + fwd_entry = mlxsw_afa_fwd_entry_get(block->afa, local_port); + if (IS_ERR(fwd_entry)) { + err = PTR_ERR(fwd_entry); + goto err_fwd_entry_get; + } + fwd_entry_ref->fwd_entry = fwd_entry; + list_add(&fwd_entry_ref->list, &block->fwd_entry_ref_list); + return fwd_entry_ref; + +err_fwd_entry_get: + kfree(fwd_entry_ref); + return ERR_PTR(err); +} + +static void +mlxsw_afa_fwd_entry_ref_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref) +{ + list_del(&fwd_entry_ref->list); + mlxsw_afa_fwd_entry_put(block->afa, fwd_entry_ref->fwd_entry); + kfree(fwd_entry_ref); +} + +static void mlxsw_afa_fwd_entry_refs_destroy(struct mlxsw_afa_block *block) +{ + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; + struct mlxsw_afa_fwd_entry_ref *tmp; + + list_for_each_entry_safe(fwd_entry_ref, tmp, + &block->fwd_entry_ref_list, list) + mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref); +} + +#define MLXSW_AFA_ONE_ACTION_LEN 32 +#define MLXSW_AFA_PAYLOAD_OFFSET 4 + +static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block, + u8 action_code, u8 action_size) +{ + char *oneact; + char *actions; + + if (WARN_ON(block->finished)) + return NULL; + if (block->cur_act_index + action_size > + block->afa->max_acts_per_set) { + struct mlxsw_afa_set *set; + + /* The appended action won't fit into the current action set, + * so create a new set. + */ + set = mlxsw_afa_set_create(false); + if (!set) + return NULL; + set->prev = block->cur_set; + block->cur_act_index = 0; + block->cur_set->next = set; + block->cur_set = set; + } + + actions = block->cur_set->ht_key.enc_actions; + oneact = actions + block->cur_act_index * MLXSW_AFA_ONE_ACTION_LEN; + block->cur_act_index += action_size; + mlxsw_afa_all_action_type_set(oneact, action_code); + return oneact + MLXSW_AFA_PAYLOAD_OFFSET; +} + +/* Trap / Discard Action + * --------------------- + * The Trap / Discard action enables trapping / mirroring packets to the CPU + * as well as discarding packets. + * The ACL Trap / Discard separates the forward/discard control from CPU + * trap control. In addition, the Trap / Discard action enables activating + * SPAN (port mirroring). + */ + +#define MLXSW_AFA_TRAPDISC_CODE 0x03 +#define MLXSW_AFA_TRAPDISC_SIZE 1 + +enum mlxsw_afa_trapdisc_forward_action { + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, +}; + +/* afa_trapdisc_forward_action + * Forward Action. + */ +MLXSW_ITEM32(afa, trapdisc, forward_action, 0x00, 0, 4); + +static inline void +mlxsw_afa_trapdisc_pack(char *payload, + enum mlxsw_afa_trapdisc_forward_action forward_action) +{ + mlxsw_afa_trapdisc_forward_action_set(payload, forward_action); +} + +int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_drop); + +/* Forwarding Action + * ----------------- + * Forwarding Action can be used to implement Policy Based Switching (PBS) + * as well as OpenFlow related "Output" action. + */ + +#define MLXSW_AFA_FORWARD_CODE 0x07 +#define MLXSW_AFA_FORWARD_SIZE 1 + +enum mlxsw_afa_forward_type { + /* PBS, Policy Based Switching */ + MLXSW_AFA_FORWARD_TYPE_PBS, + /* Output, OpenFlow output type */ + MLXSW_AFA_FORWARD_TYPE_OUTPUT, +}; + +/* afa_forward_type */ +MLXSW_ITEM32(afa, forward, type, 0x00, 24, 2); + +/* afa_forward_pbs_ptr + * A pointer to the PBS entry configured by PPBS register. + * Reserved when in_port is set. + */ +MLXSW_ITEM32(afa, forward, pbs_ptr, 0x08, 0, 24); + +/* afa_forward_in_port + * Packet is forwarded back to the ingress port. + */ +MLXSW_ITEM32(afa, forward, in_port, 0x0C, 0, 1); + +static inline void +mlxsw_afa_forward_pack(char *payload, enum mlxsw_afa_forward_type type, + u32 pbs_ptr, bool in_port) +{ + mlxsw_afa_forward_type_set(payload, type); + mlxsw_afa_forward_pbs_ptr_set(payload, pbs_ptr); + mlxsw_afa_forward_in_port_set(payload, in_port); +} + +int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, + u8 local_port, bool in_port) +{ + struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; + u32 kvdl_index; + char *act; + int err; + + if (in_port) + return -EOPNOTSUPP; + fwd_entry_ref = mlxsw_afa_fwd_entry_ref_create(block, local_port); + if (IS_ERR(fwd_entry_ref)) + return PTR_ERR(fwd_entry_ref); + kvdl_index = fwd_entry_ref->fwd_entry->kvdl_index; + + act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE, + MLXSW_AFA_FORWARD_SIZE); + if (!act) { + err = -ENOBUFS; + goto err_append_action; + } + mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS, + kvdl_index, in_port); + return 0; + +err_append_action: + mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref); + return err; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_fwd); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h new file mode 100644 index 000000000000..43f78dcfe394 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -0,0 +1,66 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_CORE_ACL_FLEX_ACTIONS_H +#define _MLXSW_CORE_ACL_FLEX_ACTIONS_H + +#include <linux/types.h> + +struct mlxsw_afa; +struct mlxsw_afa_block; + +struct mlxsw_afa_ops { + int (*kvdl_set_add)(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first); + void (*kvdl_set_del)(void *priv, u32 kvdl_index, bool is_first); + int (*kvdl_fwd_entry_add)(void *priv, u32 *p_kvdl_index, u8 local_port); + void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index); +}; + +struct mlxsw_afa *mlxsw_afa_create(unsigned int max_acts_per_set, + const struct mlxsw_afa_ops *ops, + void *ops_priv); +void mlxsw_afa_destroy(struct mlxsw_afa *mlxsw_afa); +struct mlxsw_afa_block *mlxsw_afa_block_create(struct mlxsw_afa *mlxsw_afa); +void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block); +int mlxsw_afa_block_commit(struct mlxsw_afa_block *block); +char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block); +u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); +void mlxsw_afa_block_continue(struct mlxsw_afa_block *block); +void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); +int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); +int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, + u8 local_port, bool in_port); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c new file mode 100644 index 000000000000..b32a00972e83 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -0,0 +1,475 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/errno.h> + +#include "item.h" +#include "core_acl_flex_keys.h" + +struct mlxsw_afk { + struct list_head key_info_list; + unsigned int max_blocks; + const struct mlxsw_afk_block *blocks; + unsigned int blocks_count; +}; + +static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk) +{ + int i; + int j; + + for (i = 0; i < mlxsw_afk->blocks_count; i++) { + const struct mlxsw_afk_block *block = &mlxsw_afk->blocks[i]; + + for (j = 0; j < block->instances_count; j++) { + struct mlxsw_afk_element_inst *elinst; + + elinst = &block->instances[j]; + if (elinst->type != elinst->info->type || + elinst->item.size.bits != + elinst->info->item.size.bits) + return false; + } + } + return true; +} + +struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks, + const struct mlxsw_afk_block *blocks, + unsigned int blocks_count) +{ + struct mlxsw_afk *mlxsw_afk; + + mlxsw_afk = kzalloc(sizeof(*mlxsw_afk), GFP_KERNEL); + if (!mlxsw_afk) + return NULL; + INIT_LIST_HEAD(&mlxsw_afk->key_info_list); + mlxsw_afk->max_blocks = max_blocks; + mlxsw_afk->blocks = blocks; + mlxsw_afk->blocks_count = blocks_count; + WARN_ON(!mlxsw_afk_blocks_check(mlxsw_afk)); + return mlxsw_afk; +} +EXPORT_SYMBOL(mlxsw_afk_create); + +void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk) +{ + WARN_ON(!list_empty(&mlxsw_afk->key_info_list)); + kfree(mlxsw_afk); +} +EXPORT_SYMBOL(mlxsw_afk_destroy); + +struct mlxsw_afk_key_info { + struct list_head list; + unsigned int ref_count; + unsigned int blocks_count; + int element_to_block[MLXSW_AFK_ELEMENT_MAX]; /* index is element, value + * is index inside "blocks" + */ + struct mlxsw_afk_element_usage elusage; + const struct mlxsw_afk_block *blocks[0]; +}; + +static bool +mlxsw_afk_key_info_elements_eq(struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_usage *elusage) +{ + return memcmp(&key_info->elusage, elusage, sizeof(*elusage)) == 0; +} + +static struct mlxsw_afk_key_info * +mlxsw_afk_key_info_find(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_key_info *key_info; + + list_for_each_entry(key_info, &mlxsw_afk->key_info_list, list) { + if (mlxsw_afk_key_info_elements_eq(key_info, elusage)) + return key_info; + } + return NULL; +} + +struct mlxsw_afk_picker { + struct { + DECLARE_BITMAP(element, MLXSW_AFK_ELEMENT_MAX); + unsigned int total; + } hits[0]; +}; + +static void mlxsw_afk_picker_count_hits(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_picker *picker, + enum mlxsw_afk_element element) +{ + int i; + int j; + + for (i = 0; i < mlxsw_afk->blocks_count; i++) { + const struct mlxsw_afk_block *block = &mlxsw_afk->blocks[i]; + + for (j = 0; j < block->instances_count; j++) { + struct mlxsw_afk_element_inst *elinst; + + elinst = &block->instances[j]; + if (elinst->info->element == element) { + __set_bit(element, picker->hits[i].element); + picker->hits[i].total++; + } + } + } +} + +static void mlxsw_afk_picker_subtract_hits(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_picker *picker, + int block_index) +{ + DECLARE_BITMAP(hits_element, MLXSW_AFK_ELEMENT_MAX); + int i; + int j; + + memcpy(&hits_element, &picker->hits[block_index].element, + sizeof(hits_element)); + + for (i = 0; i < mlxsw_afk->blocks_count; i++) { + for_each_set_bit(j, hits_element, MLXSW_AFK_ELEMENT_MAX) { + if (__test_and_clear_bit(j, picker->hits[i].element)) + picker->hits[i].total--; + } + } +} + +static int mlxsw_afk_picker_most_hits_get(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_picker *picker) +{ + int most_index = -EINVAL; /* Should never happen to return this */ + int most_hits = 0; + int i; + + for (i = 0; i < mlxsw_afk->blocks_count; i++) { + if (picker->hits[i].total > most_hits) { + most_hits = picker->hits[i].total; + most_index = i; + } + } + return most_index; +} + +static int mlxsw_afk_picker_key_info_add(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_picker *picker, + int block_index, + struct mlxsw_afk_key_info *key_info) +{ + enum mlxsw_afk_element element; + + if (key_info->blocks_count == mlxsw_afk->max_blocks) + return -EINVAL; + + for_each_set_bit(element, picker->hits[block_index].element, + MLXSW_AFK_ELEMENT_MAX) { + key_info->element_to_block[element] = key_info->blocks_count; + mlxsw_afk_element_usage_add(&key_info->elusage, element); + } + + key_info->blocks[key_info->blocks_count] = + &mlxsw_afk->blocks[block_index]; + key_info->blocks_count++; + return 0; +} + +static int mlxsw_afk_picker(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_picker *picker; + enum mlxsw_afk_element element; + size_t alloc_size; + int err; + + alloc_size = sizeof(picker->hits[0]) * mlxsw_afk->blocks_count; + picker = kzalloc(alloc_size, GFP_KERNEL); + if (!picker) + return -ENOMEM; + + /* Since the same elements could be present in multiple blocks, + * we must find out optimal block list in order to make the + * block count as low as possible. + * + * First, we count hits. We go over all available blocks and count + * how many of requested elements are covered by each. + * + * Then in loop, we find block with most hits and add it to + * output key_info. Then we have to subtract this block hits so + * the next iteration will find most suitable block for + * the rest of requested elements. + */ + + mlxsw_afk_element_usage_for_each(element, elusage) + mlxsw_afk_picker_count_hits(mlxsw_afk, picker, element); + + do { + int block_index; + + block_index = mlxsw_afk_picker_most_hits_get(mlxsw_afk, picker); + if (block_index < 0) { + err = block_index; + goto out; + } + err = mlxsw_afk_picker_key_info_add(mlxsw_afk, picker, + block_index, key_info); + if (err) + goto out; + mlxsw_afk_picker_subtract_hits(mlxsw_afk, picker, block_index); + } while (!mlxsw_afk_key_info_elements_eq(key_info, elusage)); + + err = 0; +out: + kfree(picker); + return err; +} + +static struct mlxsw_afk_key_info * +mlxsw_afk_key_info_create(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_key_info *key_info; + size_t alloc_size; + int err; + + alloc_size = sizeof(*key_info) + + sizeof(key_info->blocks[0]) * mlxsw_afk->max_blocks; + key_info = kzalloc(alloc_size, GFP_KERNEL); + if (!key_info) + return ERR_PTR(-ENOMEM); + err = mlxsw_afk_picker(mlxsw_afk, key_info, elusage); + if (err) + goto err_picker; + list_add(&key_info->list, &mlxsw_afk->key_info_list); + key_info->ref_count = 1; + return key_info; + +err_picker: + kfree(key_info); + return ERR_PTR(err); +} + +static void mlxsw_afk_key_info_destroy(struct mlxsw_afk_key_info *key_info) +{ + list_del(&key_info->list); + kfree(key_info); +} + +struct mlxsw_afk_key_info * +mlxsw_afk_key_info_get(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk_key_info *key_info; + + key_info = mlxsw_afk_key_info_find(mlxsw_afk, elusage); + if (key_info) { + key_info->ref_count++; + return key_info; + } + return mlxsw_afk_key_info_create(mlxsw_afk, elusage); +} +EXPORT_SYMBOL(mlxsw_afk_key_info_get); + +void mlxsw_afk_key_info_put(struct mlxsw_afk_key_info *key_info) +{ + if (--key_info->ref_count) + return; + mlxsw_afk_key_info_destroy(key_info); +} +EXPORT_SYMBOL(mlxsw_afk_key_info_put); + +bool mlxsw_afk_key_info_subset(struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_usage *elusage) +{ + return mlxsw_afk_element_usage_subset(elusage, &key_info->elusage); +} +EXPORT_SYMBOL(mlxsw_afk_key_info_subset); + +static const struct mlxsw_afk_element_inst * +mlxsw_afk_block_elinst_get(const struct mlxsw_afk_block *block, + enum mlxsw_afk_element element) +{ + int i; + + for (i = 0; i < block->instances_count; i++) { + struct mlxsw_afk_element_inst *elinst; + + elinst = &block->instances[i]; + if (elinst->info->element == element) + return elinst; + } + return NULL; +} + +static const struct mlxsw_afk_element_inst * +mlxsw_afk_key_info_elinst_get(struct mlxsw_afk_key_info *key_info, + enum mlxsw_afk_element element, + int *p_block_index) +{ + const struct mlxsw_afk_element_inst *elinst; + const struct mlxsw_afk_block *block; + int block_index; + + if (WARN_ON(!test_bit(element, key_info->elusage.usage))) + return NULL; + block_index = key_info->element_to_block[element]; + block = key_info->blocks[block_index]; + + elinst = mlxsw_afk_block_elinst_get(block, element); + if (WARN_ON(!elinst)) + return NULL; + + *p_block_index = block_index; + return elinst; +} + +u16 +mlxsw_afk_key_info_block_encoding_get(const struct mlxsw_afk_key_info *key_info, + int block_index) +{ + return key_info->blocks[block_index]->encoding; +} +EXPORT_SYMBOL(mlxsw_afk_key_info_block_encoding_get); + +unsigned int +mlxsw_afk_key_info_blocks_count_get(const struct mlxsw_afk_key_info *key_info) +{ + return key_info->blocks_count; +} +EXPORT_SYMBOL(mlxsw_afk_key_info_blocks_count_get); + +void mlxsw_afk_values_add_u32(struct mlxsw_afk_element_values *values, + enum mlxsw_afk_element element, + u32 key_value, u32 mask_value) +{ + const struct mlxsw_afk_element_info *elinfo = + &mlxsw_afk_element_infos[element]; + const struct mlxsw_item *storage_item = &elinfo->item; + + if (!mask_value) + return; + if (WARN_ON(elinfo->type != MLXSW_AFK_ELEMENT_TYPE_U32)) + return; + __mlxsw_item_set32(values->storage.key, storage_item, 0, key_value); + __mlxsw_item_set32(values->storage.mask, storage_item, 0, mask_value); + mlxsw_afk_element_usage_add(&values->elusage, element); +} +EXPORT_SYMBOL(mlxsw_afk_values_add_u32); + +void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values, + enum mlxsw_afk_element element, + const char *key_value, const char *mask_value, + unsigned int len) +{ + const struct mlxsw_afk_element_info *elinfo = + &mlxsw_afk_element_infos[element]; + const struct mlxsw_item *storage_item = &elinfo->item; + + if (!memchr_inv(mask_value, 0, len)) /* If mask is zero */ + return; + if (WARN_ON(elinfo->type != MLXSW_AFK_ELEMENT_TYPE_BUF) || + WARN_ON(elinfo->item.size.bytes != len)) + return; + __mlxsw_item_memcpy_to(values->storage.key, key_value, + storage_item, 0); + __mlxsw_item_memcpy_to(values->storage.mask, mask_value, + storage_item, 0); + mlxsw_afk_element_usage_add(&values->elusage, element); +} +EXPORT_SYMBOL(mlxsw_afk_values_add_buf); + +static void mlxsw_afk_encode_u32(const struct mlxsw_item *storage_item, + const struct mlxsw_item *output_item, + char *storage, char *output_indexed) +{ + u32 value; + + value = __mlxsw_item_get32(storage, storage_item, 0); + __mlxsw_item_set32(output_indexed, output_item, 0, value); +} + +static void mlxsw_afk_encode_buf(const struct mlxsw_item *storage_item, + const struct mlxsw_item *output_item, + char *storage, char *output_indexed) +{ + char *storage_data = __mlxsw_item_data(storage, storage_item, 0); + char *output_data = __mlxsw_item_data(output_indexed, output_item, 0); + size_t len = output_item->size.bytes; + + memcpy(output_data, storage_data, len); +} + +#define MLXSW_AFK_KEY_BLOCK_SIZE 16 + +static void mlxsw_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, + int block_index, char *storage, char *output) +{ + char *output_indexed = output + block_index * MLXSW_AFK_KEY_BLOCK_SIZE; + const struct mlxsw_item *storage_item = &elinst->info->item; + const struct mlxsw_item *output_item = &elinst->item; + + if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32) + mlxsw_afk_encode_u32(storage_item, output_item, + storage, output_indexed); + else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF) + mlxsw_afk_encode_buf(storage_item, output_item, + storage, output_indexed); +} + +void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_values *values, + char *key, char *mask) +{ + const struct mlxsw_afk_element_inst *elinst; + enum mlxsw_afk_element element; + int block_index; + + mlxsw_afk_element_usage_for_each(element, &values->elusage) { + elinst = mlxsw_afk_key_info_elinst_get(key_info, element, + &block_index); + if (!elinst) + continue; + mlxsw_afk_encode_one(elinst, block_index, + values->storage.key, key); + mlxsw_afk_encode_one(elinst, block_index, + values->storage.mask, mask); + } +} +EXPORT_SYMBOL(mlxsw_afk_encode); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h new file mode 100644 index 000000000000..e4fcba7c2af2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -0,0 +1,238 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_CORE_ACL_FLEX_KEYS_H +#define _MLXSW_CORE_ACL_FLEX_KEYS_H + +#include <linux/types.h> +#include <linux/bitmap.h> + +#include "item.h" + +enum mlxsw_afk_element { + MLXSW_AFK_ELEMENT_SRC_SYS_PORT, + MLXSW_AFK_ELEMENT_DMAC, + MLXSW_AFK_ELEMENT_SMAC, + MLXSW_AFK_ELEMENT_ETHERTYPE, + MLXSW_AFK_ELEMENT_IP_PROTO, + MLXSW_AFK_ELEMENT_SRC_IP4, + MLXSW_AFK_ELEMENT_DST_IP4, + MLXSW_AFK_ELEMENT_SRC_IP6_HI, + MLXSW_AFK_ELEMENT_SRC_IP6_LO, + MLXSW_AFK_ELEMENT_DST_IP6_HI, + MLXSW_AFK_ELEMENT_DST_IP6_LO, + MLXSW_AFK_ELEMENT_DST_L4_PORT, + MLXSW_AFK_ELEMENT_SRC_L4_PORT, + MLXSW_AFK_ELEMENT_MAX, +}; + +enum mlxsw_afk_element_type { + MLXSW_AFK_ELEMENT_TYPE_U32, + MLXSW_AFK_ELEMENT_TYPE_BUF, +}; + +struct mlxsw_afk_element_info { + enum mlxsw_afk_element element; /* element ID */ + enum mlxsw_afk_element_type type; + struct mlxsw_item item; /* element geometry in internal storage */ +}; + +#define MLXSW_AFK_ELEMENT_INFO(_type, _element, _offset, _shift, _size) \ + [MLXSW_AFK_ELEMENT_##_element] = { \ + .element = MLXSW_AFK_ELEMENT_##_element, \ + .type = _type, \ + .item = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _size}, \ + .name = #_element, \ + }, \ + } + +#define MLXSW_AFK_ELEMENT_INFO_U32(_element, _offset, _shift, _size) \ + MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_U32, \ + _element, _offset, _shift, _size) + +#define MLXSW_AFK_ELEMENT_INFO_BUF(_element, _offset, _size) \ + MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \ + _element, _offset, 0, _size) + +/* For the purpose of the driver, define a internal storage scratchpad + * that will be used to store key/mask values. For each defined element type + * define an internal storage geometry. + */ +static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { + MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16), + MLXSW_AFK_ELEMENT_INFO_BUF(DMAC, 0x04, 6), + MLXSW_AFK_ELEMENT_INFO_BUF(SMAC, 0x0A, 6), + MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16), + MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), + MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_LO, 0x20, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_HI, 0x28, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP6_LO, 0x30, 8), + MLXSW_AFK_ELEMENT_INFO_U32(DST_L4_PORT, 0x14, 0, 16), + MLXSW_AFK_ELEMENT_INFO_U32(SRC_L4_PORT, 0x14, 16, 16), +}; + +#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x38 + +struct mlxsw_afk_element_inst { /* element instance in actual block */ + const struct mlxsw_afk_element_info *info; + enum mlxsw_afk_element_type type; + struct mlxsw_item item; /* element geometry in block */ +}; + +#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, _shift, _size) \ + { \ + .info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element], \ + .type = _type, \ + .item = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _size}, \ + .name = #_element, \ + }, \ + } + +#define MLXSW_AFK_ELEMENT_INST_U32(_element, _offset, _shift, _size) \ + MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \ + _element, _offset, _shift, _size) + +#define MLXSW_AFK_ELEMENT_INST_BUF(_element, _offset, _size) \ + MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_BUF, \ + _element, _offset, 0, _size) + +struct mlxsw_afk_block { + u16 encoding; /* block ID */ + struct mlxsw_afk_element_inst *instances; + unsigned int instances_count; +}; + +#define MLXSW_AFK_BLOCK(_encoding, _instances) \ + { \ + .encoding = _encoding, \ + .instances = _instances, \ + .instances_count = ARRAY_SIZE(_instances), \ + } + +struct mlxsw_afk_element_usage { + DECLARE_BITMAP(usage, MLXSW_AFK_ELEMENT_MAX); +}; + +#define mlxsw_afk_element_usage_for_each(element, elusage) \ + for_each_set_bit(element, (elusage)->usage, MLXSW_AFK_ELEMENT_MAX) + +static inline void +mlxsw_afk_element_usage_add(struct mlxsw_afk_element_usage *elusage, + enum mlxsw_afk_element element) +{ + __set_bit(element, elusage->usage); +} + +static inline void +mlxsw_afk_element_usage_zero(struct mlxsw_afk_element_usage *elusage) +{ + bitmap_zero(elusage->usage, MLXSW_AFK_ELEMENT_MAX); +} + +static inline void +mlxsw_afk_element_usage_fill(struct mlxsw_afk_element_usage *elusage, + const enum mlxsw_afk_element *elements, + unsigned int elements_count) +{ + int i; + + mlxsw_afk_element_usage_zero(elusage); + for (i = 0; i < elements_count; i++) + mlxsw_afk_element_usage_add(elusage, elements[i]); +} + +static inline bool +mlxsw_afk_element_usage_subset(struct mlxsw_afk_element_usage *elusage_small, + struct mlxsw_afk_element_usage *elusage_big) +{ + int i; + + for (i = 0; i < MLXSW_AFK_ELEMENT_MAX; i++) + if (test_bit(i, elusage_small->usage) && + !test_bit(i, elusage_big->usage)) + return false; + return true; +} + +struct mlxsw_afk; + +struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks, + const struct mlxsw_afk_block *blocks, + unsigned int blocks_count); +void mlxsw_afk_destroy(struct mlxsw_afk *mlxsw_afk); + +struct mlxsw_afk_key_info; + +struct mlxsw_afk_key_info * +mlxsw_afk_key_info_get(struct mlxsw_afk *mlxsw_afk, + struct mlxsw_afk_element_usage *elusage); +void mlxsw_afk_key_info_put(struct mlxsw_afk_key_info *key_info); +bool mlxsw_afk_key_info_subset(struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_usage *elusage); + +u16 +mlxsw_afk_key_info_block_encoding_get(const struct mlxsw_afk_key_info *key_info, + int block_index); +unsigned int +mlxsw_afk_key_info_blocks_count_get(const struct mlxsw_afk_key_info *key_info); + +struct mlxsw_afk_element_values { + struct mlxsw_afk_element_usage elusage; + struct { + char key[MLXSW_AFK_ELEMENT_STORAGE_SIZE]; + char mask[MLXSW_AFK_ELEMENT_STORAGE_SIZE]; + } storage; +}; + +void mlxsw_afk_values_add_u32(struct mlxsw_afk_element_values *values, + enum mlxsw_afk_element element, + u32 key_value, u32 mask_value); +void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values, + enum mlxsw_afk_element element, + const char *key_value, const char *mask_value, + unsigned int len); +void mlxsw_afk_encode(struct mlxsw_afk_key_info *key_info, + struct mlxsw_afk_element_values *values, + char *key, char *mask); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index e50c8db2602a..12c3a4449120 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -338,7 +338,7 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, return -EIO; } - return err > 0 ? 0 : err; + return 0; } /* Routine executes I2C command. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h index 3c95e3ddd9c2..28427f0758c7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/item.h +++ b/drivers/net/ethernet/mellanox/mlxsw/item.h @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/item.h - * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> * * Redistribution and use in source and binary forms, with or without @@ -72,6 +72,40 @@ __mlxsw_item_offset(const struct mlxsw_item *item, unsigned short index, typesize); } +static inline u8 __mlxsw_item_get8(const char *buf, + const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u8)); + u8 *b = (u8 *) buf; + u8 tmp; + + tmp = b[offset]; + tmp >>= item->shift; + tmp &= GENMASK(item->size.bits - 1, 0); + if (item->no_real_shift) + tmp <<= item->shift; + return tmp; +} + +static inline void __mlxsw_item_set8(char *buf, const struct mlxsw_item *item, + unsigned short index, u8 val) +{ + unsigned int offset = __mlxsw_item_offset(item, index, + sizeof(u8)); + u8 *b = (u8 *) buf; + u8 mask = GENMASK(item->size.bits - 1, 0) << item->shift; + u8 tmp; + + if (!item->no_real_shift) + val <<= item->shift; + val &= mask; + tmp = b[offset]; + tmp &= ~mask; + tmp |= val; + b[offset] = tmp; +} + static inline u16 __mlxsw_item_get16(const char *buf, const struct mlxsw_item *item, unsigned short index) @@ -191,6 +225,14 @@ static inline void __mlxsw_item_memcpy_to(char *buf, const char *src, memcpy(&buf[offset], src, item->size.bytes); } +static inline char *__mlxsw_item_data(char *buf, const struct mlxsw_item *item, + unsigned short index) +{ + unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char)); + + return &buf[offset]; +} + static inline u16 __mlxsw_item_bit_array_offset(const struct mlxsw_item *item, u16 index, u8 *shift) @@ -253,6 +295,47 @@ static inline void __mlxsw_item_bit_array_set(char *buf, * _iname: item name within the container */ +#define MLXSW_ITEM8(_type, _cname, _iname, _offset, _shift, _sizebits) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .shift = _shift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u8 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf) \ +{ \ + return __mlxsw_item_get8(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline void mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, u8 val)\ +{ \ + __mlxsw_item_set8(buf, &__ITEM_NAME(_type, _cname, _iname), 0, val); \ +} + +#define MLXSW_ITEM8_INDEXED(_type, _cname, _iname, _offset, _shift, _sizebits, \ + _step, _instepoffset, _norealshift) \ +static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ + .offset = _offset, \ + .step = _step, \ + .in_step_offset = _instepoffset, \ + .shift = _shift, \ + .no_real_shift = _norealshift, \ + .size = {.bits = _sizebits,}, \ + .name = #_type "_" #_cname "_" #_iname, \ +}; \ +static inline u8 \ +mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\ +{ \ + return __mlxsw_item_get8(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index); \ +} \ +static inline void \ +mlxsw_##_type##_##_cname##_##_iname##_set(char *buf, unsigned short index, \ + u8 val) \ +{ \ + __mlxsw_item_set8(buf, &__ITEM_NAME(_type, _cname, _iname), \ + index, val); \ +} + #define MLXSW_ITEM16(_type, _cname, _iname, _offset, _shift, _sizebits) \ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = { \ .offset = _offset, \ @@ -393,6 +476,11 @@ mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, const char *src) \ { \ __mlxsw_item_memcpy_to(buf, src, \ &__ITEM_NAME(_type, _cname, _iname), 0); \ +} \ +static inline char * \ +mlxsw_##_type##_##_cname##_##_iname##_data(char *buf) \ +{ \ + return __mlxsw_item_data(buf, &__ITEM_NAME(_type, _cname, _iname), 0); \ } #define MLXSW_ITEM_BUF_INDEXED(_type, _cname, _iname, _offset, _sizebytes, \ @@ -419,6 +507,12 @@ mlxsw_##_type##_##_cname##_##_iname##_memcpy_to(char *buf, \ { \ __mlxsw_item_memcpy_to(buf, src, \ &__ITEM_NAME(_type, _cname, _iname), index); \ +} \ +static inline char * \ +mlxsw_##_type##_##_cname##_##_iname##_data(char *buf, unsigned short index) \ +{ \ + return __mlxsw_item_data(buf, \ + &__ITEM_NAME(_type, _cname, _iname), index); \ } #define MLXSW_ITEM_BIT_ARRAY(_type, _cname, _iname, _offset, _sizebytes, \ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1357fe04391b..0899e2d310e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1,9 +1,9 @@ /* * drivers/net/ethernet/mellanox/mlxsw/reg.h - * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> - * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> * * Redistribution and use in source and binary forms, with or without @@ -1757,6 +1757,505 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, } } +/* PPBT - Policy-Engine Port Binding Table + * --------------------------------------- + * This register is used for configuration of the Port Binding Table. + */ +#define MLXSW_REG_PPBT_ID 0x3002 +#define MLXSW_REG_PPBT_LEN 0x14 + +MLXSW_REG_DEFINE(ppbt, MLXSW_REG_PPBT_ID, MLXSW_REG_PPBT_LEN); + +enum mlxsw_reg_pxbt_e { + MLXSW_REG_PXBT_E_IACL, + MLXSW_REG_PXBT_E_EACL, +}; + +/* reg_ppbt_e + * Access: Index + */ +MLXSW_ITEM32(reg, ppbt, e, 0x00, 31, 1); + +enum mlxsw_reg_pxbt_op { + MLXSW_REG_PXBT_OP_BIND, + MLXSW_REG_PXBT_OP_UNBIND, +}; + +/* reg_ppbt_op + * Access: RW + */ +MLXSW_ITEM32(reg, ppbt, op, 0x00, 28, 3); + +/* reg_ppbt_local_port + * Local port. Not including CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, ppbt, local_port, 0x00, 16, 8); + +/* reg_ppbt_g + * group - When set, the binding is of an ACL group. When cleared, + * the binding is of an ACL. + * Must be set to 1 for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, ppbt, g, 0x10, 31, 1); + +/* reg_ppbt_acl_info + * ACL/ACL group identifier. If the g bit is set, this field should hold + * the acl_group_id, else it should hold the acl_id. + * Access: RW + */ +MLXSW_ITEM32(reg, ppbt, acl_info, 0x10, 0, 16); + +static inline void mlxsw_reg_ppbt_pack(char *payload, enum mlxsw_reg_pxbt_e e, + enum mlxsw_reg_pxbt_op op, + u8 local_port, u16 acl_info) +{ + MLXSW_REG_ZERO(ppbt, payload); + mlxsw_reg_ppbt_e_set(payload, e); + mlxsw_reg_ppbt_op_set(payload, op); + mlxsw_reg_ppbt_local_port_set(payload, local_port); + mlxsw_reg_ppbt_g_set(payload, true); + mlxsw_reg_ppbt_acl_info_set(payload, acl_info); +} + +/* PACL - Policy-Engine ACL Register + * --------------------------------- + * This register is used for configuration of the ACL. + */ +#define MLXSW_REG_PACL_ID 0x3004 +#define MLXSW_REG_PACL_LEN 0x70 + +MLXSW_REG_DEFINE(pacl, MLXSW_REG_PACL_ID, MLXSW_REG_PACL_LEN); + +/* reg_pacl_v + * Valid. Setting the v bit makes the ACL valid. It should not be cleared + * while the ACL is bounded to either a port, VLAN or ACL rule. + * Access: RW + */ +MLXSW_ITEM32(reg, pacl, v, 0x00, 24, 1); + +/* reg_pacl_acl_id + * An identifier representing the ACL (managed by software) + * Range 0 .. cap_max_acl_regions - 1 + * Access: Index + */ +MLXSW_ITEM32(reg, pacl, acl_id, 0x08, 0, 16); + +#define MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN 16 + +/* reg_pacl_tcam_region_info + * Opaque object that represents a TCAM region. + * Obtained through PTAR register. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, pacl, tcam_region_info, 0x30, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +static inline void mlxsw_reg_pacl_pack(char *payload, u16 acl_id, + bool valid, const char *tcam_region_info) +{ + MLXSW_REG_ZERO(pacl, payload); + mlxsw_reg_pacl_acl_id_set(payload, acl_id); + mlxsw_reg_pacl_v_set(payload, valid); + mlxsw_reg_pacl_tcam_region_info_memcpy_to(payload, tcam_region_info); +} + +/* PAGT - Policy-Engine ACL Group Table + * ------------------------------------ + * This register is used for configuration of the ACL Group Table. + */ +#define MLXSW_REG_PAGT_ID 0x3005 +#define MLXSW_REG_PAGT_BASE_LEN 0x30 +#define MLXSW_REG_PAGT_ACL_LEN 4 +#define MLXSW_REG_PAGT_ACL_MAX_NUM 16 +#define MLXSW_REG_PAGT_LEN (MLXSW_REG_PAGT_BASE_LEN + \ + MLXSW_REG_PAGT_ACL_MAX_NUM * MLXSW_REG_PAGT_ACL_LEN) + +MLXSW_REG_DEFINE(pagt, MLXSW_REG_PAGT_ID, MLXSW_REG_PAGT_LEN); + +/* reg_pagt_size + * Number of ACLs in the group. + * Size 0 invalidates a group. + * Range 0 .. cap_max_acl_group_size (hard coded to 16 for now) + * Total number of ACLs in all groups must be lower or equal + * to cap_max_acl_tot_groups + * Note: a group which is binded must not be invalidated + * Access: Index + */ +MLXSW_ITEM32(reg, pagt, size, 0x00, 0, 8); + +/* reg_pagt_acl_group_id + * An identifier (numbered from 0..cap_max_acl_groups-1) representing + * the ACL Group identifier (managed by software). + * Access: Index + */ +MLXSW_ITEM32(reg, pagt, acl_group_id, 0x08, 0, 16); + +/* reg_pagt_acl_id + * ACL identifier + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pagt, acl_id, 0x30, 0, 16, 0x04, 0x00, false); + +static inline void mlxsw_reg_pagt_pack(char *payload, u16 acl_group_id) +{ + MLXSW_REG_ZERO(pagt, payload); + mlxsw_reg_pagt_acl_group_id_set(payload, acl_group_id); +} + +static inline void mlxsw_reg_pagt_acl_id_pack(char *payload, int index, + u16 acl_id) +{ + u8 size = mlxsw_reg_pagt_size_get(payload); + + if (index >= size) + mlxsw_reg_pagt_size_set(payload, index + 1); + mlxsw_reg_pagt_acl_id_set(payload, index, acl_id); +} + +/* PTAR - Policy-Engine TCAM Allocation Register + * --------------------------------------------- + * This register is used for allocation of regions in the TCAM. + * Note: Query method is not supported on this register. + */ +#define MLXSW_REG_PTAR_ID 0x3006 +#define MLXSW_REG_PTAR_BASE_LEN 0x20 +#define MLXSW_REG_PTAR_KEY_ID_LEN 1 +#define MLXSW_REG_PTAR_KEY_ID_MAX_NUM 16 +#define MLXSW_REG_PTAR_LEN (MLXSW_REG_PTAR_BASE_LEN + \ + MLXSW_REG_PTAR_KEY_ID_MAX_NUM * MLXSW_REG_PTAR_KEY_ID_LEN) + +MLXSW_REG_DEFINE(ptar, MLXSW_REG_PTAR_ID, MLXSW_REG_PTAR_LEN); + +enum mlxsw_reg_ptar_op { + /* allocate a TCAM region */ + MLXSW_REG_PTAR_OP_ALLOC, + /* resize a TCAM region */ + MLXSW_REG_PTAR_OP_RESIZE, + /* deallocate TCAM region */ + MLXSW_REG_PTAR_OP_FREE, + /* test allocation */ + MLXSW_REG_PTAR_OP_TEST, +}; + +/* reg_ptar_op + * Access: OP + */ +MLXSW_ITEM32(reg, ptar, op, 0x00, 28, 4); + +/* reg_ptar_action_set_type + * Type of action set to be used on this region. + * For Spectrum, this is always type 2 - "flexible" + * Access: WO + */ +MLXSW_ITEM32(reg, ptar, action_set_type, 0x00, 16, 8); + +/* reg_ptar_key_type + * TCAM key type for the region. + * For Spectrum, this is always type 0x50 - "FLEX_KEY" + * Access: WO + */ +MLXSW_ITEM32(reg, ptar, key_type, 0x00, 0, 8); + +/* reg_ptar_region_size + * TCAM region size. When allocating/resizing this is the requested size, + * the response is the actual size. Note that actual size may be + * larger than requested. + * Allowed range 1 .. cap_max_rules-1 + * Reserved during op deallocate. + * Access: WO + */ +MLXSW_ITEM32(reg, ptar, region_size, 0x04, 0, 16); + +/* reg_ptar_region_id + * Region identifier + * Range 0 .. cap_max_regions-1 + * Access: Index + */ +MLXSW_ITEM32(reg, ptar, region_id, 0x08, 0, 16); + +/* reg_ptar_tcam_region_info + * Opaque object that represents the TCAM region. + * Returned when allocating a region. + * Provided by software for ACL generation and region deallocation and resize. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ptar, tcam_region_info, 0x10, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +/* reg_ptar_flexible_key_id + * Identifier of the Flexible Key. + * Only valid if key_type == "FLEX_KEY" + * The key size will be rounded up to one of the following values: + * 9B, 18B, 36B, 54B. + * This field is reserved for in resize operation. + * Access: WO + */ +MLXSW_ITEM8_INDEXED(reg, ptar, flexible_key_id, 0x20, 0, 8, + MLXSW_REG_PTAR_KEY_ID_LEN, 0x00, false); + +static inline void mlxsw_reg_ptar_pack(char *payload, enum mlxsw_reg_ptar_op op, + u16 region_size, u16 region_id, + const char *tcam_region_info) +{ + MLXSW_REG_ZERO(ptar, payload); + mlxsw_reg_ptar_op_set(payload, op); + mlxsw_reg_ptar_action_set_type_set(payload, 2); /* "flexible" */ + mlxsw_reg_ptar_key_type_set(payload, 0x50); /* "FLEX_KEY" */ + mlxsw_reg_ptar_region_size_set(payload, region_size); + mlxsw_reg_ptar_region_id_set(payload, region_id); + mlxsw_reg_ptar_tcam_region_info_memcpy_to(payload, tcam_region_info); +} + +static inline void mlxsw_reg_ptar_key_id_pack(char *payload, int index, + u16 key_id) +{ + mlxsw_reg_ptar_flexible_key_id_set(payload, index, key_id); +} + +static inline void mlxsw_reg_ptar_unpack(char *payload, char *tcam_region_info) +{ + mlxsw_reg_ptar_tcam_region_info_memcpy_from(payload, tcam_region_info); +} + +/* PPBS - Policy-Engine Policy Based Switching Register + * ---------------------------------------------------- + * This register retrieves and sets Policy Based Switching Table entries. + */ +#define MLXSW_REG_PPBS_ID 0x300C +#define MLXSW_REG_PPBS_LEN 0x14 + +MLXSW_REG_DEFINE(ppbs, MLXSW_REG_PPBS_ID, MLXSW_REG_PPBS_LEN); + +/* reg_ppbs_pbs_ptr + * Index into the PBS table. + * For Spectrum, the index points to the KVD Linear. + * Access: Index + */ +MLXSW_ITEM32(reg, ppbs, pbs_ptr, 0x08, 0, 24); + +/* reg_ppbs_system_port + * Unique port identifier for the final destination of the packet. + * Access: RW + */ +MLXSW_ITEM32(reg, ppbs, system_port, 0x10, 0, 16); + +static inline void mlxsw_reg_ppbs_pack(char *payload, u32 pbs_ptr, + u16 system_port) +{ + MLXSW_REG_ZERO(ppbs, payload); + mlxsw_reg_ppbs_pbs_ptr_set(payload, pbs_ptr); + mlxsw_reg_ppbs_system_port_set(payload, system_port); +} + +/* PRCR - Policy-Engine Rules Copy Register + * ---------------------------------------- + * This register is used for accessing rules within a TCAM region. + */ +#define MLXSW_REG_PRCR_ID 0x300D +#define MLXSW_REG_PRCR_LEN 0x40 + +MLXSW_REG_DEFINE(prcr, MLXSW_REG_PRCR_ID, MLXSW_REG_PRCR_LEN); + +enum mlxsw_reg_prcr_op { + /* Move rules. Moves the rules from "tcam_region_info" starting + * at offset "offset" to "dest_tcam_region_info" + * at offset "dest_offset." + */ + MLXSW_REG_PRCR_OP_MOVE, + /* Copy rules. Copies the rules from "tcam_region_info" starting + * at offset "offset" to "dest_tcam_region_info" + * at offset "dest_offset." + */ + MLXSW_REG_PRCR_OP_COPY, +}; + +/* reg_prcr_op + * Access: OP + */ +MLXSW_ITEM32(reg, prcr, op, 0x00, 28, 4); + +/* reg_prcr_offset + * Offset within the source region to copy/move from. + * Access: Index + */ +MLXSW_ITEM32(reg, prcr, offset, 0x00, 0, 16); + +/* reg_prcr_size + * The number of rules to copy/move. + * Access: WO + */ +MLXSW_ITEM32(reg, prcr, size, 0x04, 0, 16); + +/* reg_prcr_tcam_region_info + * Opaque object that represents the source TCAM region. + * Access: Index + */ +MLXSW_ITEM_BUF(reg, prcr, tcam_region_info, 0x10, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +/* reg_prcr_dest_offset + * Offset within the source region to copy/move to. + * Access: Index + */ +MLXSW_ITEM32(reg, prcr, dest_offset, 0x20, 0, 16); + +/* reg_prcr_dest_tcam_region_info + * Opaque object that represents the destination TCAM region. + * Access: Index + */ +MLXSW_ITEM_BUF(reg, prcr, dest_tcam_region_info, 0x30, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +static inline void mlxsw_reg_prcr_pack(char *payload, enum mlxsw_reg_prcr_op op, + const char *src_tcam_region_info, + u16 src_offset, + const char *dest_tcam_region_info, + u16 dest_offset, u16 size) +{ + MLXSW_REG_ZERO(prcr, payload); + mlxsw_reg_prcr_op_set(payload, op); + mlxsw_reg_prcr_offset_set(payload, src_offset); + mlxsw_reg_prcr_size_set(payload, size); + mlxsw_reg_prcr_tcam_region_info_memcpy_to(payload, + src_tcam_region_info); + mlxsw_reg_prcr_dest_offset_set(payload, dest_offset); + mlxsw_reg_prcr_dest_tcam_region_info_memcpy_to(payload, + dest_tcam_region_info); +} + +/* PEFA - Policy-Engine Extended Flexible Action Register + * ------------------------------------------------------ + * This register is used for accessing an extended flexible action entry + * in the central KVD Linear Database. + */ +#define MLXSW_REG_PEFA_ID 0x300F +#define MLXSW_REG_PEFA_LEN 0xB0 + +MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN); + +/* reg_pefa_index + * Index in the KVD Linear Centralized Database. + * Access: Index + */ +MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24); + +#define MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN 0xA8 + +/* reg_pefa_flex_action_set + * Action-set to perform when rule is matched. + * Must be zero padded if action set is shorter. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, + MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN); + +static inline void mlxsw_reg_pefa_pack(char *payload, u32 index, + const char *flex_action_set) +{ + MLXSW_REG_ZERO(pefa, payload); + mlxsw_reg_pefa_index_set(payload, index); + mlxsw_reg_pefa_flex_action_set_memcpy_to(payload, flex_action_set); +} + +/* PTCE-V2 - Policy-Engine TCAM Entry Register Version 2 + * ----------------------------------------------------- + * This register is used for accessing rules within a TCAM region. + * It is a new version of PTCE in order to support wider key, + * mask and action within a TCAM region. This register is not supported + * by SwitchX and SwitchX-2. + */ +#define MLXSW_REG_PTCE2_ID 0x3017 +#define MLXSW_REG_PTCE2_LEN 0x1D8 + +MLXSW_REG_DEFINE(ptce2, MLXSW_REG_PTCE2_ID, MLXSW_REG_PTCE2_LEN); + +/* reg_ptce2_v + * Valid. + * Access: RW + */ +MLXSW_ITEM32(reg, ptce2, v, 0x00, 31, 1); + +/* reg_ptce2_a + * Activity. Set if a packet lookup has hit on the specific entry. + * To clear the "a" bit, use "clear activity" op or "clear on read" op. + * Access: RO + */ +MLXSW_ITEM32(reg, ptce2, a, 0x00, 30, 1); + +enum mlxsw_reg_ptce2_op { + /* Read operation. */ + MLXSW_REG_PTCE2_OP_QUERY_READ = 0, + /* clear on read operation. Used to read entry + * and clear Activity bit. + */ + MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ = 1, + /* Write operation. Used to write a new entry to the table. + * All R/W fields are relevant for new entry. Activity bit is set + * for new entries - Note write with v = 0 will delete the entry. + */ + MLXSW_REG_PTCE2_OP_WRITE_WRITE = 0, + /* Update action. Only action set will be updated. */ + MLXSW_REG_PTCE2_OP_WRITE_UPDATE = 1, + /* Clear activity. A bit is cleared for the entry. */ + MLXSW_REG_PTCE2_OP_WRITE_CLEAR_ACTIVITY = 2, +}; + +/* reg_ptce2_op + * Access: OP + */ +MLXSW_ITEM32(reg, ptce2, op, 0x00, 20, 3); + +/* reg_ptce2_offset + * Access: Index + */ +MLXSW_ITEM32(reg, ptce2, offset, 0x00, 0, 16); + +/* reg_ptce2_tcam_region_info + * Opaque object that represents the TCAM region. + * Access: Index + */ +MLXSW_ITEM_BUF(reg, ptce2, tcam_region_info, 0x10, + MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN); + +#define MLXSW_REG_PTCE2_FLEX_KEY_BLOCKS_LEN 96 + +/* reg_ptce2_flex_key_blocks + * ACL Key. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ptce2, flex_key_blocks, 0x20, + MLXSW_REG_PTCE2_FLEX_KEY_BLOCKS_LEN); + +/* reg_ptce2_mask + * mask- in the same size as key. A bit that is set directs the TCAM + * to compare the corresponding bit in key. A bit that is clear directs + * the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80, + MLXSW_REG_PTCE2_FLEX_KEY_BLOCKS_LEN); + +/* reg_ptce2_flex_action_set + * ACL action set. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0, + MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN); + +static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid, + enum mlxsw_reg_ptce2_op op, + const char *tcam_region_info, + u16 offset) +{ + MLXSW_REG_ZERO(ptce2, payload); + mlxsw_reg_ptce2_v_set(payload, valid); + mlxsw_reg_ptce2_op_set(payload, op); + mlxsw_reg_ptce2_offset_set(payload, offset); + mlxsw_reg_ptce2_tcam_region_info_memcpy_to(payload, tcam_region_info); +} + /* QPCR - QoS Policer Configuration Register * ----------------------------------------- * The QPCR register is used to create policers - that limit @@ -3154,7 +3653,7 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port) * Configures the properties for forwarding to CPU. */ #define MLXSW_REG_HTGT_ID 0x7002 -#define MLXSW_REG_HTGT_LEN 0x100 +#define MLXSW_REG_HTGT_LEN 0x20 MLXSW_REG_DEFINE(htgt, MLXSW_REG_HTGT_ID, MLXSW_REG_HTGT_LEN); @@ -4965,6 +5464,46 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, MLXSW_REG_MLCR_DURATION_MAX : 0); } +/* MPSC - Monitoring Packet Sampling Configuration Register + * -------------------------------------------------------- + * MPSC Register is used to configure the Packet Sampling mechanism. + */ +#define MLXSW_REG_MPSC_ID 0x9080 +#define MLXSW_REG_MPSC_LEN 0x1C + +MLXSW_REG_DEFINE(mpsc, MLXSW_REG_MPSC_ID, MLXSW_REG_MPSC_LEN); + +/* reg_mpsc_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, mpsc, local_port, 0x00, 16, 8); + +/* reg_mpsc_e + * Enable sampling on port local_port + * Access: RW + */ +MLXSW_ITEM32(reg, mpsc, e, 0x04, 30, 1); + +#define MLXSW_REG_MPSC_RATE_MAX 3500000000UL + +/* reg_mpsc_rate + * Sampling rate = 1 out of rate packets (with randomization around + * the point). Valid values are: 1 to MLXSW_REG_MPSC_RATE_MAX + * Access: RW + */ +MLXSW_ITEM32(reg, mpsc, rate, 0x08, 0, 32); + +static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e, + u32 rate) +{ + MLXSW_REG_ZERO(mpsc, payload); + mlxsw_reg_mpsc_local_port_set(payload, local_port); + mlxsw_reg_mpsc_e_set(payload, e); + mlxsw_reg_mpsc_rate_set(payload, rate); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -5394,6 +5933,14 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(svpe), MLXSW_REG(sfmr), MLXSW_REG(spvmlr), + MLXSW_REG(ppbt), + MLXSW_REG(pacl), + MLXSW_REG(pagt), + MLXSW_REG(ptar), + MLXSW_REG(ppbs), + MLXSW_REG(prcr), + MLXSW_REG(pefa), + MLXSW_REG(ptce2), MLXSW_REG(qpcr), MLXSW_REG(qtct), MLXSW_REG(qeec), @@ -5429,6 +5976,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mpat), MLXSW_REG(mpar), MLXSW_REG(mlcr), + MLXSW_REG(mpsc), MLXSW_REG(sbpr), MLXSW_REG(sbcm), MLXSW_REG(sbpm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 3c2171dbdba4..bce8c2e00630 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/resources.h - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. - * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017 Jiri Pirko <jiri@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,6 +48,14 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_LAG, MLXSW_RES_ID_MAX_LAG_MEMBERS, MLXSW_RES_ID_MAX_BUFFER_SIZE, + MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS, + MLXSW_RES_ID_ACL_MAX_TCAM_RULES, + MLXSW_RES_ID_ACL_MAX_REGIONS, + MLXSW_RES_ID_ACL_MAX_GROUPS, + MLXSW_RES_ID_ACL_MAX_GROUP_SIZE, + MLXSW_RES_ID_ACL_FLEX_KEYS, + MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE, + MLXSW_RES_ID_ACL_ACTIONS_PER_SET, MLXSW_RES_ID_MAX_CPU_POLICERS, MLXSW_RES_ID_MAX_VRS, MLXSW_RES_ID_MAX_RIFS, @@ -72,6 +80,14 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_LAG] = 0x2520, [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802, /* Bytes */ + [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901, + [MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902, + [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903, + [MLXSW_RES_ID_ACL_MAX_GROUPS] = 0x2904, + [MLXSW_RES_ID_ACL_MAX_GROUP_SIZE] = 0x2905, + [MLXSW_RES_ID_ACL_FLEX_KEYS] = 0x2910, + [MLXSW_RES_ID_ACL_MAX_ACTION_PER_RULE] = 0x2911, + [MLXSW_RES_ID_ACL_ACTIONS_PER_SET] = 0x2912, [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, [MLXSW_RES_ID_MAX_VRS] = 0x2C01, [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 003093abb170..16484f24b7db 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum.c - * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> * @@ -57,6 +57,7 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> #include <net/netevent.h> +#include <net/tc_act/tc_sample.h> #include "spectrum.h" #include "pci.h" @@ -137,8 +138,6 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16); */ MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); -static bool mlxsw_sp_port_dev_check(const struct net_device *dev); - static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { @@ -469,6 +468,16 @@ static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, mlxsw_sp_span_inspected_port_unbind(from, span_entry, type); } +static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool enable, u32 rate) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char mpsc_pl[MLXSW_REG_MPSC_LEN]; + + mlxsw_reg_mpsc_pack(mpsc_pl, mlxsw_sp_port->local_port, enable, rate); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpsc), mpsc_pl); +} + static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, bool is_up) { @@ -948,15 +957,13 @@ out: /* Return the stats from a cache that is updated periodically, * as this function might get called in an atomic context. */ -static struct rtnl_link_stats64 * +static void mlxsw_sp_port_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); - - return stats; } int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, @@ -1164,8 +1171,8 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name, } static struct mlxsw_sp_port_mall_tc_entry * -mlxsw_sp_port_mirror_entry_find(struct mlxsw_sp_port *port, - unsigned long cookie) { +mlxsw_sp_port_mall_tc_entry_find(struct mlxsw_sp_port *port, + unsigned long cookie) { struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; list_for_each_entry(mall_tc_entry, &port->mall_tc_list, list) @@ -1177,17 +1184,15 @@ mlxsw_sp_port_mirror_entry_find(struct mlxsw_sp_port *port, static int mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_cls_matchall_offload *cls, + struct mlxsw_sp_port_mall_mirror_tc_entry *mirror, const struct tc_action *a, bool ingress) { - struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; struct net *net = dev_net(mlxsw_sp_port->dev); enum mlxsw_sp_span_type span_type; struct mlxsw_sp_port *to_port; struct net_device *to_dev; int ifindex; - int err; ifindex = tcf_mirred_ifindex(a); to_dev = __dev_get_by_index(net, ifindex); @@ -1198,90 +1203,149 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, if (!mlxsw_sp_port_dev_check(to_dev)) { netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port"); - return -ENOTSUPP; + return -EOPNOTSUPP; } to_port = netdev_priv(to_dev); - mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); - if (!mall_tc_entry) - return -ENOMEM; + mirror->to_local_port = to_port->local_port; + mirror->ingress = ingress; + span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; + return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type); +} - mall_tc_entry->cookie = cls->cookie; - mall_tc_entry->type = MLXSW_SP_PORT_MALL_MIRROR; - mall_tc_entry->mirror.to_local_port = to_port->local_port; - mall_tc_entry->mirror.ingress = ingress; - list_add_tail(&mall_tc_entry->list, &mlxsw_sp_port->mall_tc_list); +static void +mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_port_mall_mirror_tc_entry *mirror) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + enum mlxsw_sp_span_type span_type; + struct mlxsw_sp_port *to_port; - span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - err = mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type); + to_port = mlxsw_sp->ports[mirror->to_local_port]; + span_type = mirror->ingress ? + MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; + mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type); +} + +static int +mlxsw_sp_port_add_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_matchall_offload *cls, + const struct tc_action *a, + bool ingress) +{ + int err; + + if (!mlxsw_sp_port->sample) + return -EOPNOTSUPP; + if (rtnl_dereference(mlxsw_sp_port->sample->psample_group)) { + netdev_err(mlxsw_sp_port->dev, "sample already active\n"); + return -EEXIST; + } + if (tcf_sample_rate(a) > MLXSW_REG_MPSC_RATE_MAX) { + netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n"); + return -EOPNOTSUPP; + } + + rcu_assign_pointer(mlxsw_sp_port->sample->psample_group, + tcf_sample_psample_group(a)); + mlxsw_sp_port->sample->truncate = tcf_sample_truncate(a); + mlxsw_sp_port->sample->trunc_size = tcf_sample_trunc_size(a); + mlxsw_sp_port->sample->rate = tcf_sample_rate(a); + + err = mlxsw_sp_port_sample_set(mlxsw_sp_port, true, tcf_sample_rate(a)); if (err) - goto err_mirror_add; + goto err_port_sample_set; return 0; -err_mirror_add: - list_del(&mall_tc_entry->list); - kfree(mall_tc_entry); +err_port_sample_set: + RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL); return err; } +static void +mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port) +{ + if (!mlxsw_sp_port->sample) + return; + + mlxsw_sp_port_sample_set(mlxsw_sp_port, false, 1); + RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL); +} + static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, __be16 protocol, struct tc_cls_matchall_offload *cls, bool ingress) { + struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; const struct tc_action *a; LIST_HEAD(actions); int err; if (!tc_single_action(cls->exts)) { netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; } - tcf_exts_to_list(cls->exts, &actions); - list_for_each_entry(a, &actions, list) { - if (!is_tcf_mirred_egress_mirror(a) || - protocol != htons(ETH_P_ALL)) { - return -ENOTSUPP; - } + mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); + if (!mall_tc_entry) + return -ENOMEM; + mall_tc_entry->cookie = cls->cookie; - err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port, cls, + tcf_exts_to_list(cls->exts, &actions); + a = list_first_entry(&actions, struct tc_action, list); + + if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { + struct mlxsw_sp_port_mall_mirror_tc_entry *mirror; + + mall_tc_entry->type = MLXSW_SP_PORT_MALL_MIRROR; + mirror = &mall_tc_entry->mirror; + err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port, + mirror, a, ingress); + } else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) { + mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE; + err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, cls, a, ingress); - if (err) - return err; + } else { + err = -EOPNOTSUPP; } + if (err) + goto err_add_action; + + list_add_tail(&mall_tc_entry->list, &mlxsw_sp_port->mall_tc_list); return 0; + +err_add_action: + kfree(mall_tc_entry); + return err; } static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_cls_matchall_offload *cls) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; - enum mlxsw_sp_span_type span_type; - struct mlxsw_sp_port *to_port; - mall_tc_entry = mlxsw_sp_port_mirror_entry_find(mlxsw_sp_port, - cls->cookie); + mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port, + cls->cookie); if (!mall_tc_entry) { netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n"); return; } + list_del(&mall_tc_entry->list); switch (mall_tc_entry->type) { case MLXSW_SP_PORT_MALL_MIRROR: - to_port = mlxsw_sp->ports[mall_tc_entry->mirror.to_local_port]; - span_type = mall_tc_entry->mirror.ingress ? - MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - - mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type); + mlxsw_sp_port_del_cls_matchall_mirror(mlxsw_sp_port, + &mall_tc_entry->mirror); + break; + case MLXSW_SP_PORT_MALL_SAMPLE: + mlxsw_sp_port_del_cls_matchall_sample(mlxsw_sp_port); break; default: WARN_ON(1); } - list_del(&mall_tc_entry->list); kfree(mall_tc_entry); } @@ -1291,7 +1355,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); - if (tc->type == TC_SETUP_MATCHALL) { + switch (tc->type) { + case TC_SETUP_MATCHALL: switch (tc->cls_mall->command) { case TC_CLSMATCHALL_REPLACE: return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, @@ -1305,9 +1370,21 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, default: return -EINVAL; } + case TC_SETUP_CLSFLOWER: + switch (tc->cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, + proto, tc->cls_flower); + case TC_CLSFLOWER_DESTROY: + mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, + tc->cls_flower); + return 0; + default: + return -EOPNOTSUPP; + } } - return -ENOTSUPP; + return -EOPNOTSUPP; } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { @@ -1323,8 +1400,6 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, - .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, - .ndo_neigh_destroy = mlxsw_sp_router_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, @@ -1650,7 +1725,7 @@ mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats, break; default: WARN_ON(1); - return -ENOTSUPP; + return -EOPNOTSUPP; } return 0; } @@ -2256,6 +2331,13 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_stats; } + mlxsw_sp_port->sample = kzalloc(sizeof(*mlxsw_sp_port->sample), + GFP_KERNEL); + if (!mlxsw_sp_port->sample) { + err = -ENOMEM; + goto err_alloc_sample; + } + mlxsw_sp_port->hw_stats.cache = kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL); @@ -2384,6 +2466,8 @@ err_dev_addr_init: err_port_swid_set: kfree(mlxsw_sp_port->hw_stats.cache); err_alloc_hw_stats: + kfree(mlxsw_sp_port->sample); +err_alloc_sample: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: kfree(mlxsw_sp_port->untagged_vlans); @@ -2429,8 +2513,9 @@ static void __mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); - free_percpu(mlxsw_sp_port->pcpu_stats); kfree(mlxsw_sp_port->hw_stats.cache); + kfree(mlxsw_sp_port->sample); + free_percpu(mlxsw_sp_port->pcpu_stats); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); @@ -2731,6 +2816,41 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } +static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port, + void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; + struct psample_group *psample_group; + u32 size; + + if (unlikely(!mlxsw_sp_port)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received for non-existent port\n", + local_port); + goto out; + } + if (unlikely(!mlxsw_sp_port->sample)) { + dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received on unsupported port\n", + local_port); + goto out; + } + + size = mlxsw_sp_port->sample->truncate ? + mlxsw_sp_port->sample->trunc_size : skb->len; + + rcu_read_lock(); + psample_group = rcu_dereference(mlxsw_sp_port->sample->psample_group); + if (!psample_group) + goto out_unlock; + psample_sample_packet(psample_group, skb, size, + mlxsw_sp_port->dev->ifindex, 0, + mlxsw_sp_port->sample->rate); +out_unlock: + rcu_read_unlock(); +out: + consume_skb(skb); +} + #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) @@ -2766,6 +2886,9 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false), MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false), + /* PKT Sample trap */ + MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, + false, SP_IP2ME, DISCARD) }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -2950,10 +3073,16 @@ static int __mlxsw_sp_flood_init(struct mlxsw_core *mlxsw_core, else table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; - if (type == MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST) + switch (type) { + case MLXSW_REG_SFGC_TYPE_UNKNOWN_UNICAST: flood_table = MLXSW_SP_FLOOD_TABLE_UC; - else - flood_table = MLXSW_SP_FLOOD_TABLE_BM; + break; + case MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4: + flood_table = MLXSW_SP_FLOOD_TABLE_MC; + break; + default: + flood_table = MLXSW_SP_FLOOD_TABLE_BC; + } mlxsw_reg_sfgc_pack(sfgc_pl, type, bridge_type, table_type, flood_table); @@ -3089,6 +3218,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_span_init; } + err = mlxsw_sp_acl_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n"); + goto err_acl_init; + } + err = mlxsw_sp_ports_create(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); @@ -3098,6 +3233,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return 0; err_ports_create: + mlxsw_sp_acl_fini(mlxsw_sp); +err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); err_span_init: mlxsw_sp_router_fini(mlxsw_sp); @@ -3118,6 +3255,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sp_ports_remove(mlxsw_sp); + mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); @@ -3138,9 +3276,9 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_flood_tables = 1, .used_flood_mode = 1, .flood_mode = 3, - .max_fid_offset_flood_tables = 2, + .max_fid_offset_flood_tables = 3, .fid_offset_flood_table_size = VLAN_N_VID - 1, - .max_fid_flood_tables = 2, + .max_fid_flood_tables = 3, .fid_flood_table_size = MLXSW_SP_VFID_MAX, .used_max_ib_mc = 1, .max_ib_mc = 0, @@ -3183,7 +3321,7 @@ static struct mlxsw_driver mlxsw_sp_driver = { .profile = &mlxsw_sp_config_profile, }; -static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +bool mlxsw_sp_port_dev_check(const struct net_device *dev) { return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; } @@ -3341,6 +3479,8 @@ mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f) if (!r) return NULL; + INIT_LIST_HEAD(&r->nexthop_list); + INIT_LIST_HEAD(&r->neigh_list); ether_addr_copy(r->addr, l3_dev->dev_addr); r->mtu = l3_dev->mtu; r->ref_count = 1; @@ -3409,6 +3549,8 @@ static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid = f->fid; u16 rif = r->rif; + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r); + mlxsw_sp->rifs[rif] = NULL; f->r = NULL; @@ -3553,7 +3695,7 @@ static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, table_type = mlxsw_sp_flood_table_type_get(fid); index = mlxsw_sp_flood_table_index_get(fid); - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, index, table_type, + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type, 1, MLXSW_PORT_ROUTER_PORT, set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); @@ -3638,6 +3780,8 @@ void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f = r->f; u16 rif = r->rif; + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r); + mlxsw_sp->rifs[rif] = NULL; f->r = NULL; @@ -3927,6 +4071,9 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->learning = 1; mlxsw_sp_port->learning_sync = 1; mlxsw_sp_port->uc_flood = 1; + mlxsw_sp_port->mc_flood = 1; + mlxsw_sp_port->mc_router = 0; + mlxsw_sp_port->mc_disabled = 1; mlxsw_sp_port->bridged = 1; return 0; @@ -3943,6 +4090,8 @@ static void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_port->learning = 0; mlxsw_sp_port->learning_sync = 0; mlxsw_sp_port->uc_flood = 0; + mlxsw_sp_port->mc_flood = 0; + mlxsw_sp_port->mc_router = 0; mlxsw_sp_port->bridged = 0; /* Add implicit VLAN interface in the device, so that untagged @@ -4605,6 +4754,9 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, mlxsw_sp_vport->learning = 1; mlxsw_sp_vport->learning_sync = 1; mlxsw_sp_vport->uc_flood = 1; + mlxsw_sp_vport->mc_flood = 1; + mlxsw_sp_vport->mc_router = 0; + mlxsw_sp_vport->mc_disabled = 1; mlxsw_sp_vport->bridged = 1; return 0; @@ -4625,6 +4777,8 @@ static void mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport) mlxsw_sp_vport->learning = 0; mlxsw_sp_vport->learning_sync = 0; mlxsw_sp_vport->uc_flood = 0; + mlxsw_sp_vport->mc_flood = 0; + mlxsw_sp_vport->mc_router = 0; mlxsw_sp_vport->bridged = 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index cc1af19d699a..13ec85e7c392 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum.h - * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> * @@ -46,12 +46,16 @@ #include <linux/dcbnl.h> #include <linux/in6.h> #include <linux/notifier.h> +#include <net/psample.h> +#include <net/pkt_cls.h> #include "port.h" #include "core.h" +#include "core_acl_flex_keys.h" +#include "core_acl_flex_actions.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID -#define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */ +#define MLXSW_SP_VFID_MAX 1024 /* Bridged VLAN interfaces */ #define MLXSW_SP_RFID_BASE 15360 #define MLXSW_SP_INVALID_RIF 0xffff @@ -104,6 +108,8 @@ struct mlxsw_sp_fid { }; struct mlxsw_sp_rif { + struct list_head nexthop_list; + struct list_head neigh_list; struct net_device *dev; unsigned int ref_count; struct mlxsw_sp_fid *f; @@ -229,6 +235,7 @@ struct mlxsw_sp_span_entry { enum mlxsw_sp_port_mall_action_type { MLXSW_SP_PORT_MALL_MIRROR, + MLXSW_SP_PORT_MALL_SAMPLE, }; struct mlxsw_sp_port_mall_mirror_tc_entry { @@ -249,17 +256,20 @@ struct mlxsw_sp_router { struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; struct mlxsw_sp_vr *vrs; struct rhashtable neigh_ht; + struct rhashtable nexthop_group_ht; + struct rhashtable nexthop_ht; struct { struct delayed_work dw; unsigned long interval; /* ms */ } neighs_update; struct delayed_work nexthop_probe_dw; #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ - struct list_head nexthop_group_list; struct list_head nexthop_neighs_list; bool aborted; }; +struct mlxsw_sp_acl; + struct mlxsw_sp { struct { struct list_head list; @@ -289,6 +299,7 @@ struct mlxsw_sp { u8 port_to_module[MLXSW_PORT_MAX_PORTS]; struct mlxsw_sp_sb sb; struct mlxsw_sp_router router; + struct mlxsw_sp_acl *acl; struct { DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); } kvdl; @@ -315,15 +326,25 @@ struct mlxsw_sp_port_pcpu_stats { u32 tx_dropped; }; +struct mlxsw_sp_port_sample { + struct psample_group __rcu *psample_group; + u32 trunc_size; + u32 rate; + bool truncate; +}; + struct mlxsw_sp_port { struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sp *mlxsw_sp; u8 local_port; u8 stp_state; - u8 learning:1, + u16 learning:1, learning_sync:1, uc_flood:1, + mc_flood:1, + mc_router:1, + mc_disabled:1, bridged:1, lagged:1, split:1; @@ -361,8 +382,10 @@ struct mlxsw_sp_port { struct rtnl_link_stats64 *cache; struct delayed_work update_dw; } hw_stats; + struct mlxsw_sp_port_sample *sample; }; +bool mlxsw_sp_port_dev_check(const struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); @@ -489,7 +512,8 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, - MLXSW_SP_FLOOD_TABLE_BM, + MLXSW_SP_FLOOD_TABLE_BC, + MLXSW_SP_FLOOD_TABLE_MC, }; int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp); @@ -582,14 +606,107 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_router_neigh_construct(struct net_device *dev, - struct neighbour *n); -void mlxsw_sp_router_neigh_destroy(struct net_device *dev, - struct neighbour *n); int mlxsw_sp_router_netevent_event(struct notifier_block *unused, unsigned long event, void *ptr); +void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r); int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); +struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); + +struct mlxsw_sp_acl_rule_info { + unsigned int priority; + struct mlxsw_afk_element_values values; + struct mlxsw_afa_block *act_block; +}; + +enum mlxsw_sp_acl_profile { + MLXSW_SP_ACL_PROFILE_FLOWER, +}; + +struct mlxsw_sp_acl_profile_ops { + size_t ruleset_priv_size; + int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp, + void *priv, void *ruleset_priv); + void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv); + int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + struct net_device *dev, bool ingress); + void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv); + size_t rule_priv_size; + int (*rule_add)(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei); + void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv); +}; + +struct mlxsw_sp_acl_ops { + size_t priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + void (*fini)(struct mlxsw_sp *mlxsw_sp, void *priv); + const struct mlxsw_sp_acl_profile_ops * + (*profile_ops)(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_acl_profile profile); +}; + +struct mlxsw_sp_acl_ruleset; + +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, bool ingress, + enum mlxsw_sp_acl_profile profile); +void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset); + +struct mlxsw_sp_acl_rule_info * +mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl); +void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei); +void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, + unsigned int priority); +void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_afk_element element, + u32 key_value, u32 mask_value); +void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_afk_element element, + const char *key_value, + const char *mask_value, unsigned int len); +void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); +void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id); +int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct net_device *out_dev); + +struct mlxsw_sp_acl_rule; + +struct mlxsw_sp_acl_rule * +mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + unsigned long cookie); +void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule); +int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule); +void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule); +struct mlxsw_sp_acl_rule * +mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + unsigned long cookie); +struct mlxsw_sp_acl_rule_info * +mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule); + +int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); + +extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops; + +int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, + __be16 protocol, struct tc_cls_flower_offload *f); +void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, + struct tc_cls_flower_offload *f); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c new file mode 100644 index 000000000000..8a18b3aa70dc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -0,0 +1,572 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/string.h> +#include <linux/rhashtable.h> +#include <linux/netdevice.h> + +#include "reg.h" +#include "core.h" +#include "resources.h" +#include "spectrum.h" +#include "core_acl_flex_keys.h" +#include "core_acl_flex_actions.h" +#include "spectrum_acl_flex_keys.h" + +struct mlxsw_sp_acl { + struct mlxsw_afk *afk; + struct mlxsw_afa *afa; + const struct mlxsw_sp_acl_ops *ops; + struct rhashtable ruleset_ht; + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl) +{ + return acl->afk; +} + +struct mlxsw_sp_acl_ruleset_ht_key { + struct net_device *dev; /* dev this ruleset is bound to */ + bool ingress; + const struct mlxsw_sp_acl_profile_ops *ops; +}; + +struct mlxsw_sp_acl_ruleset { + struct rhash_head ht_node; /* Member of acl HT */ + struct mlxsw_sp_acl_ruleset_ht_key ht_key; + struct rhashtable rule_ht; + unsigned int ref_count; + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_acl_rule { + struct rhash_head ht_node; /* Member of rule HT */ + unsigned long cookie; /* HT key */ + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule_info *rulei; + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +static const struct rhashtable_params mlxsw_sp_acl_ruleset_ht_params = { + .key_len = sizeof(struct mlxsw_sp_acl_ruleset_ht_key), + .key_offset = offsetof(struct mlxsw_sp_acl_ruleset, ht_key), + .head_offset = offsetof(struct mlxsw_sp_acl_ruleset, ht_node), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params mlxsw_sp_acl_rule_ht_params = { + .key_len = sizeof(unsigned long), + .key_offset = offsetof(struct mlxsw_sp_acl_rule, cookie), + .head_offset = offsetof(struct mlxsw_sp_acl_rule, ht_node), + .automatic_shrinking = true, +}; + +static struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_acl_profile_ops *ops) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + struct mlxsw_sp_acl_ruleset *ruleset; + size_t alloc_size; + int err; + + alloc_size = sizeof(*ruleset) + ops->ruleset_priv_size; + ruleset = kzalloc(alloc_size, GFP_KERNEL); + if (!ruleset) + return ERR_PTR(-ENOMEM); + ruleset->ref_count = 1; + ruleset->ht_key.ops = ops; + + err = rhashtable_init(&ruleset->rule_ht, &mlxsw_sp_acl_rule_ht_params); + if (err) + goto err_rhashtable_init; + + err = ops->ruleset_add(mlxsw_sp, acl->priv, ruleset->priv); + if (err) + goto err_ops_ruleset_add; + + return ruleset; + +err_ops_ruleset_add: + rhashtable_destroy(&ruleset->rule_ht); +err_rhashtable_init: + kfree(ruleset); + return ERR_PTR(err); +} + +static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + + ops->ruleset_del(mlxsw_sp, ruleset->priv); + rhashtable_destroy(&ruleset->rule_ht); + kfree(ruleset); +} + +static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + struct net_device *dev, bool ingress) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + int err; + + ruleset->ht_key.dev = dev; + ruleset->ht_key.ingress = ingress; + err = rhashtable_insert_fast(&acl->ruleset_ht, &ruleset->ht_node, + mlxsw_sp_acl_ruleset_ht_params); + if (err) + return err; + err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress); + if (err) + goto err_ops_ruleset_bind; + return 0; + +err_ops_ruleset_bind: + rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node, + mlxsw_sp_acl_ruleset_ht_params); + return err; +} + +static void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + ops->ruleset_unbind(mlxsw_sp, ruleset->priv); + rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node, + mlxsw_sp_acl_ruleset_ht_params); +} + +static void mlxsw_sp_acl_ruleset_ref_inc(struct mlxsw_sp_acl_ruleset *ruleset) +{ + ruleset->ref_count++; +} + +static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + if (--ruleset->ref_count) + return; + mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, ruleset); + mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset); +} + +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, bool ingress, + enum mlxsw_sp_acl_profile profile) +{ + const struct mlxsw_sp_acl_profile_ops *ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + struct mlxsw_sp_acl_ruleset_ht_key ht_key; + struct mlxsw_sp_acl_ruleset *ruleset; + int err; + + ops = acl->ops->profile_ops(mlxsw_sp, profile); + if (!ops) + return ERR_PTR(-EINVAL); + + memset(&ht_key, 0, sizeof(ht_key)); + ht_key.dev = dev; + ht_key.ingress = ingress; + ht_key.ops = ops; + ruleset = rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, + mlxsw_sp_acl_ruleset_ht_params); + if (ruleset) { + mlxsw_sp_acl_ruleset_ref_inc(ruleset); + return ruleset; + } + ruleset = mlxsw_sp_acl_ruleset_create(mlxsw_sp, ops); + if (IS_ERR(ruleset)) + return ruleset; + err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev, ingress); + if (err) + goto err_ruleset_bind; + return ruleset; + +err_ruleset_bind: + mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); +} + +struct mlxsw_sp_acl_rule_info * +mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) +{ + struct mlxsw_sp_acl_rule_info *rulei; + int err; + + rulei = kzalloc(sizeof(*rulei), GFP_KERNEL); + if (!rulei) + return NULL; + rulei->act_block = mlxsw_afa_block_create(acl->afa); + if (IS_ERR(rulei->act_block)) { + err = PTR_ERR(rulei->act_block); + goto err_afa_block_create; + } + return rulei; + +err_afa_block_create: + kfree(rulei); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei) +{ + mlxsw_afa_block_destroy(rulei->act_block); + kfree(rulei); +} + +int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_commit(rulei->act_block); +} + +void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, + unsigned int priority) +{ + rulei->priority = priority; +} + +void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_afk_element element, + u32 key_value, u32 mask_value) +{ + mlxsw_afk_values_add_u32(&rulei->values, element, + key_value, mask_value); +} + +void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, + enum mlxsw_afk_element element, + const char *key_value, + const char *mask_value, unsigned int len) +{ + mlxsw_afk_values_add_buf(&rulei->values, element, + key_value, mask_value, len); +} + +void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) +{ + mlxsw_afa_block_continue(rulei->act_block); +} + +void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id) +{ + mlxsw_afa_block_jump(rulei->act_block, group_id); +} + +int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_append_drop(rulei->act_block); +} + +int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct net_device *out_dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u8 local_port; + bool in_port; + + if (out_dev) { + if (!mlxsw_sp_port_dev_check(out_dev)) + return -EINVAL; + mlxsw_sp_port = netdev_priv(out_dev); + if (mlxsw_sp_port->mlxsw_sp != mlxsw_sp) + return -EINVAL; + local_port = mlxsw_sp_port->local_port; + in_port = false; + } else { + /* If out_dev is NULL, the called wants to + * set forward to ingress port. + */ + local_port = 0; + in_port = true; + } + return mlxsw_afa_block_append_fwd(rulei->act_block, + local_port, in_port); +} + +struct mlxsw_sp_acl_rule * +mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + unsigned long cookie) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_rule *rule; + int err; + + mlxsw_sp_acl_ruleset_ref_inc(ruleset); + rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, GFP_KERNEL); + if (!rule) { + err = -ENOMEM; + goto err_alloc; + } + rule->cookie = cookie; + rule->ruleset = ruleset; + + rule->rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl); + if (IS_ERR(rule->rulei)) { + err = PTR_ERR(rule->rulei); + goto err_rulei_create; + } + return rule; + +err_rulei_create: + kfree(rule); +err_alloc: + mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + + mlxsw_sp_acl_rulei_destroy(rule->rulei); + kfree(rule); + mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); +} + +int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + int err; + + err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei); + if (err) + return err; + + err = rhashtable_insert_fast(&ruleset->rule_ht, &rule->ht_node, + mlxsw_sp_acl_rule_ht_params); + if (err) + goto err_rhashtable_insert; + + return 0; + +err_rhashtable_insert: + ops->rule_del(mlxsw_sp, rule->priv); + return err; +} + +void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + + rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node, + mlxsw_sp_acl_rule_ht_params); + ops->rule_del(mlxsw_sp, rule->priv); +} + +struct mlxsw_sp_acl_rule * +mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ruleset *ruleset, + unsigned long cookie) +{ + return rhashtable_lookup_fast(&ruleset->rule_ht, &cookie, + mlxsw_sp_acl_rule_ht_params); +} + +struct mlxsw_sp_acl_rule_info * +mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule) +{ + return rule->rulei; +} + +#define MLXSW_SP_KDVL_ACT_EXT_SIZE 1 + +static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char pefa_pl[MLXSW_REG_PEFA_LEN]; + u32 kvdl_index; + int ret; + int err; + + /* The first action set of a TCAM entry is stored directly in TCAM, + * not KVD linear area. + */ + if (is_first) + return 0; + + ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE); + if (ret < 0) + return ret; + kvdl_index = ret; + mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); + if (err) + goto err_pefa_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_pefa_write: + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, + bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + if (is_first) + return; + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); +} + +static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, + u8 local_port) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char ppbs_pl[MLXSW_REG_PPBS_LEN]; + u32 kvdl_index; + int ret; + int err; + + ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1); + if (ret < 0) + return ret; + kvdl_index = ret; + mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); + if (err) + goto err_ppbs_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_ppbs_write: + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); +} + +static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { + .kvdl_set_add = mlxsw_sp_act_kvdl_set_add, + .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, + .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, + .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, +}; + +int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops; + struct mlxsw_sp_acl *acl; + int err; + + acl = kzalloc(sizeof(*acl) + acl_ops->priv_size, GFP_KERNEL); + if (!acl) + return -ENOMEM; + mlxsw_sp->acl = acl; + + acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_FLEX_KEYS), + mlxsw_sp_afk_blocks, + MLXSW_SP_AFK_BLOCKS_COUNT); + if (!acl->afk) { + err = -ENOMEM; + goto err_afk_create; + } + + acl->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_ACTIONS_PER_SET), + &mlxsw_sp_act_afa_ops, mlxsw_sp); + if (IS_ERR(acl->afa)) { + err = PTR_ERR(acl->afa); + goto err_afa_create; + } + + err = rhashtable_init(&acl->ruleset_ht, + &mlxsw_sp_acl_ruleset_ht_params); + if (err) + goto err_rhashtable_init; + + err = acl_ops->init(mlxsw_sp, acl->priv); + if (err) + goto err_acl_ops_init; + + acl->ops = acl_ops; + return 0; + +err_acl_ops_init: + rhashtable_destroy(&acl->ruleset_ht); +err_rhashtable_init: + mlxsw_afa_destroy(acl->afa); +err_afa_create: + mlxsw_afk_destroy(acl->afk); +err_afk_create: + kfree(acl); + return err; +} + +void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + const struct mlxsw_sp_acl_ops *acl_ops = acl->ops; + + acl_ops->fini(mlxsw_sp, acl->priv); + rhashtable_destroy(&acl->ruleset_ht); + mlxsw_afa_destroy(acl->afa); + mlxsw_afk_destroy(acl->afk); + kfree(acl); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h new file mode 100644 index 000000000000..82b81cf7f4a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h @@ -0,0 +1,109 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H +#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H + +#include "core_acl_flex_keys.h" + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DMAC, 0x00, 6), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x00, 6), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x02, 6), + MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = { + MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32), + MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { + MLXSW_AFK_ELEMENT_INST_U32(DST_IP4, 0x00, 0, 32), + MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = { + MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16), + MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_dip[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP6_LO, 0x00, 8), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_ex1[] = { + MLXSW_AFK_ELEMENT_INST_BUF(DST_IP6_HI, 0x00, 8), + MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP6_LO, 0x00, 8), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_sip_ex[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP6_HI, 0x00, 8), +}; + +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_packet_type[] = { + MLXSW_AFK_ELEMENT_INST_U32(ETHERTYPE, 0x00, 0, 16), +}; + +static const struct mlxsw_afk_block mlxsw_sp_afk_blocks[] = { + MLXSW_AFK_BLOCK(0x10, mlxsw_sp_afk_element_info_l2_dmac), + MLXSW_AFK_BLOCK(0x11, mlxsw_sp_afk_element_info_l2_smac), + MLXSW_AFK_BLOCK(0x12, mlxsw_sp_afk_element_info_l2_smac_ex), + MLXSW_AFK_BLOCK(0x30, mlxsw_sp_afk_element_info_ipv4_sip), + MLXSW_AFK_BLOCK(0x31, mlxsw_sp_afk_element_info_ipv4_dip), + MLXSW_AFK_BLOCK(0x33, mlxsw_sp_afk_element_info_ipv4_ex), + MLXSW_AFK_BLOCK(0x60, mlxsw_sp_afk_element_info_ipv6_dip), + MLXSW_AFK_BLOCK(0x65, mlxsw_sp_afk_element_info_ipv6_ex1), + MLXSW_AFK_BLOCK(0x62, mlxsw_sp_afk_element_info_ipv6_sip), + MLXSW_AFK_BLOCK(0x63, mlxsw_sp_afk_element_info_ipv6_sip_ex), + MLXSW_AFK_BLOCK(0xB0, mlxsw_sp_afk_element_info_packet_type), +}; + +#define MLXSW_SP_AFK_BLOCKS_COUNT ARRAY_SIZE(mlxsw_sp_afk_blocks) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c new file mode 100644 index 000000000000..7382832215fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -0,0 +1,1084 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/rhashtable.h> +#include <linux/netdevice.h> +#include <linux/parman.h> + +#include "reg.h" +#include "core.h" +#include "resources.h" +#include "spectrum.h" +#include "core_acl_flex_keys.h" + +struct mlxsw_sp_acl_tcam { + unsigned long *used_regions; /* bit array */ + unsigned int max_regions; + unsigned long *used_groups; /* bit array */ + unsigned int max_groups; + unsigned int max_group_size; +}; + +static int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp_acl_tcam *tcam = priv; + u64 max_tcam_regions; + u64 max_regions; + u64 max_groups; + size_t alloc_size; + int err; + + max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_TCAM_REGIONS); + max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS); + + /* Use 1:1 mapping between ACL region and TCAM region */ + if (max_tcam_regions < max_regions) + max_regions = max_tcam_regions; + + alloc_size = sizeof(tcam->used_regions[0]) * BITS_TO_LONGS(max_regions); + tcam->used_regions = kzalloc(alloc_size, GFP_KERNEL); + if (!tcam->used_regions) + return -ENOMEM; + tcam->max_regions = max_regions; + + max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS); + alloc_size = sizeof(tcam->used_groups[0]) * BITS_TO_LONGS(max_groups); + tcam->used_groups = kzalloc(alloc_size, GFP_KERNEL); + if (!tcam->used_groups) { + err = -ENOMEM; + goto err_alloc_used_groups; + } + tcam->max_groups = max_groups; + tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_MAX_GROUP_SIZE); + return 0; + +err_alloc_used_groups: + kfree(tcam->used_regions); + return err; +} + +static void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp_acl_tcam *tcam = priv; + + kfree(tcam->used_groups); + kfree(tcam->used_regions); +} + +static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam, + u16 *p_id) +{ + u16 id; + + id = find_first_zero_bit(tcam->used_regions, tcam->max_regions); + if (id < tcam->max_regions) { + __set_bit(id, tcam->used_regions); + *p_id = id; + return 0; + } + return -ENOBUFS; +} + +static void mlxsw_sp_acl_tcam_region_id_put(struct mlxsw_sp_acl_tcam *tcam, + u16 id) +{ + __clear_bit(id, tcam->used_regions); +} + +static int mlxsw_sp_acl_tcam_group_id_get(struct mlxsw_sp_acl_tcam *tcam, + u16 *p_id) +{ + u16 id; + + id = find_first_zero_bit(tcam->used_groups, tcam->max_groups); + if (id < tcam->max_groups) { + __set_bit(id, tcam->used_groups); + *p_id = id; + return 0; + } + return -ENOBUFS; +} + +static void mlxsw_sp_acl_tcam_group_id_put(struct mlxsw_sp_acl_tcam *tcam, + u16 id) +{ + __clear_bit(id, tcam->used_groups); +} + +struct mlxsw_sp_acl_tcam_pattern { + const enum mlxsw_afk_element *elements; + unsigned int elements_count; +}; + +struct mlxsw_sp_acl_tcam_group { + struct mlxsw_sp_acl_tcam *tcam; + u16 id; + struct list_head region_list; + unsigned int region_count; + struct rhashtable chunk_ht; + struct { + u16 local_port; + bool ingress; + } bound; + struct mlxsw_sp_acl_tcam_group_ops *ops; + const struct mlxsw_sp_acl_tcam_pattern *patterns; + unsigned int patterns_count; +}; + +struct mlxsw_sp_acl_tcam_region { + struct list_head list; /* Member of a TCAM group */ + struct list_head chunk_list; /* List of chunks under this region */ + struct parman *parman; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_acl_tcam_group *group; + u16 id; /* ACL ID and region ID - they are same */ + char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; + struct mlxsw_afk_key_info *key_info; + struct { + struct parman_prio parman_prio; + struct parman_item parman_item; + struct mlxsw_sp_acl_rule_info *rulei; + } catchall; +}; + +struct mlxsw_sp_acl_tcam_chunk { + struct list_head list; /* Member of a TCAM region */ + struct rhash_head ht_node; /* Member of a chunk HT */ + unsigned int priority; /* Priority within the region and group */ + struct parman_prio parman_prio; + struct mlxsw_sp_acl_tcam_group *group; + struct mlxsw_sp_acl_tcam_region *region; + unsigned int ref_count; +}; + +struct mlxsw_sp_acl_tcam_entry { + struct parman_item parman_item; + struct mlxsw_sp_acl_tcam_chunk *chunk; +}; + +static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = { + .key_len = sizeof(unsigned int), + .key_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, priority), + .head_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, ht_node), + .automatic_shrinking = true, +}; + +static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group) +{ + struct mlxsw_sp_acl_tcam_region *region; + char pagt_pl[MLXSW_REG_PAGT_LEN]; + int acl_index = 0; + + mlxsw_reg_pagt_pack(pagt_pl, group->id); + list_for_each_entry(region, &group->region_list, list) + mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, region->id); + mlxsw_reg_pagt_size_set(pagt_pl, acl_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl); +} + +static int +mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_sp_acl_tcam_group *group, + const struct mlxsw_sp_acl_tcam_pattern *patterns, + unsigned int patterns_count) +{ + int err; + + group->tcam = tcam; + group->patterns = patterns; + group->patterns_count = patterns_count; + INIT_LIST_HEAD(&group->region_list); + err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id); + if (err) + return err; + + err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + if (err) + goto err_group_update; + + err = rhashtable_init(&group->chunk_ht, + &mlxsw_sp_acl_tcam_chunk_ht_params); + if (err) + goto err_rhashtable_init; + + return 0; + +err_rhashtable_init: +err_group_update: + mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); + return err; +} + +static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group) +{ + struct mlxsw_sp_acl_tcam *tcam = group->tcam; + + rhashtable_destroy(&group->chunk_ht); + mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); + WARN_ON(!list_empty(&group->region_list)); +} + +static int +mlxsw_sp_acl_tcam_group_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct net_device *dev, bool ingress) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + char ppbt_pl[MLXSW_REG_PPBT_LEN]; + + if (!mlxsw_sp_port_dev_check(dev)) + return -EINVAL; + + mlxsw_sp_port = netdev_priv(dev); + group->bound.local_port = mlxsw_sp_port->local_port; + group->bound.ingress = ingress; + mlxsw_reg_ppbt_pack(ppbt_pl, + group->bound.ingress ? MLXSW_REG_PXBT_E_IACL : + MLXSW_REG_PXBT_E_EACL, + MLXSW_REG_PXBT_OP_BIND, group->bound.local_port, + group->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl); +} + +static void +mlxsw_sp_acl_tcam_group_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group) +{ + char ppbt_pl[MLXSW_REG_PPBT_LEN]; + + mlxsw_reg_ppbt_pack(ppbt_pl, + group->bound.ingress ? MLXSW_REG_PXBT_E_IACL : + MLXSW_REG_PXBT_E_EACL, + MLXSW_REG_PXBT_OP_UNBIND, group->bound.local_port, + group->id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl); +} + +static unsigned int +mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_tcam_chunk *chunk; + + if (list_empty(®ion->chunk_list)) + return 0; + /* As a priority of a region, return priority of the first chunk */ + chunk = list_first_entry(®ion->chunk_list, typeof(*chunk), list); + return chunk->priority; +} + +static unsigned int +mlxsw_sp_acl_tcam_region_max_prio(struct mlxsw_sp_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_tcam_chunk *chunk; + + if (list_empty(®ion->chunk_list)) + return 0; + chunk = list_last_entry(®ion->chunk_list, typeof(*chunk), list); + return chunk->priority; +} + +static void +mlxsw_sp_acl_tcam_group_list_add(struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_tcam_region *region2; + struct list_head *pos; + + /* Position the region inside the list according to priority */ + list_for_each(pos, &group->region_list) { + region2 = list_entry(pos, typeof(*region2), list); + if (mlxsw_sp_acl_tcam_region_prio(region2) > + mlxsw_sp_acl_tcam_region_prio(region)) + break; + } + list_add_tail(®ion->list, pos); + group->region_count++; +} + +static void +mlxsw_sp_acl_tcam_group_list_del(struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_region *region) +{ + group->region_count--; + list_del(®ion->list); +} + +static int +mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_region *region) +{ + int err; + + if (group->region_count == group->tcam->max_group_size) + return -ENOBUFS; + + mlxsw_sp_acl_tcam_group_list_add(group, region); + + err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + if (err) + goto err_group_update; + region->group = group; + + return 0; + +err_group_update: + mlxsw_sp_acl_tcam_group_list_del(group, region); + mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + return err; +} + +static void +mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + struct mlxsw_sp_acl_tcam_group *group = region->group; + + mlxsw_sp_acl_tcam_group_list_del(group, region); + mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); +} + +static struct mlxsw_sp_acl_tcam_region * +mlxsw_sp_acl_tcam_group_region_find(struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage, + bool *p_need_split) +{ + struct mlxsw_sp_acl_tcam_region *region, *region2; + struct list_head *pos; + bool issubset; + + list_for_each(pos, &group->region_list) { + region = list_entry(pos, typeof(*region), list); + + /* First, check if the requested priority does not rather belong + * under some of the next regions. + */ + if (pos->next != &group->region_list) { /* not last */ + region2 = list_entry(pos->next, typeof(*region2), list); + if (priority >= mlxsw_sp_acl_tcam_region_prio(region2)) + continue; + } + + issubset = mlxsw_afk_key_info_subset(region->key_info, elusage); + + /* If requested element usage would not fit and the priority + * is lower than the currently inspected region we cannot + * use this region, so return NULL to indicate new region has + * to be created. + */ + if (!issubset && + priority < mlxsw_sp_acl_tcam_region_prio(region)) + return NULL; + + /* If requested element usage would not fit and the priority + * is higher than the currently inspected region we cannot + * use this region. There is still some hope that the next + * region would be the fit. So let it be processed and + * eventually break at the check right above this. + */ + if (!issubset && + priority > mlxsw_sp_acl_tcam_region_max_prio(region)) + continue; + + /* Indicate if the region needs to be split in order to add + * the requested priority. Split is needed when requested + * element usage won't fit into the found region. + */ + *p_need_split = !issubset; + return region; + } + return NULL; /* New region has to be created. */ +} + +static void +mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_afk_element_usage *elusage, + struct mlxsw_afk_element_usage *out) +{ + const struct mlxsw_sp_acl_tcam_pattern *pattern; + int i; + + for (i = 0; i < group->patterns_count; i++) { + pattern = &group->patterns[i]; + mlxsw_afk_element_usage_fill(out, pattern->elements, + pattern->elements_count); + if (mlxsw_afk_element_usage_subset(elusage, out)) + return; + } + memcpy(out, elusage, sizeof(*out)); +} + +#define MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP 16 + +static int +mlxsw_sp_acl_tcam_region_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + struct mlxsw_afk_key_info *key_info = region->key_info; + char ptar_pl[MLXSW_REG_PTAR_LEN]; + unsigned int encodings_count; + int i; + int err; + + mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_ALLOC, + MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT, + region->id, region->tcam_region_info); + encodings_count = mlxsw_afk_key_info_blocks_count_get(key_info); + for (i = 0; i < encodings_count; i++) { + u16 encoding; + + encoding = mlxsw_afk_key_info_block_encoding_get(key_info, i); + mlxsw_reg_ptar_key_id_pack(ptar_pl, i, encoding); + } + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl); + if (err) + return err; + mlxsw_reg_ptar_unpack(ptar_pl, region->tcam_region_info); + return 0; +} + +static void +mlxsw_sp_acl_tcam_region_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + char ptar_pl[MLXSW_REG_PTAR_LEN]; + + mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_FREE, 0, region->id, + region->tcam_region_info); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl); +} + +static int +mlxsw_sp_acl_tcam_region_resize(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + u16 new_size) +{ + char ptar_pl[MLXSW_REG_PTAR_LEN]; + + mlxsw_reg_ptar_pack(ptar_pl, MLXSW_REG_PTAR_OP_RESIZE, + new_size, region->id, region->tcam_region_info); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptar), ptar_pl); +} + +static int +mlxsw_sp_acl_tcam_region_enable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + char pacl_pl[MLXSW_REG_PACL_LEN]; + + mlxsw_reg_pacl_pack(pacl_pl, region->id, true, + region->tcam_region_info); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl); +} + +static void +mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + char pacl_pl[MLXSW_REG_PACL_LEN]; + + mlxsw_reg_pacl_pack(pacl_pl, region->id, false, + region->tcam_region_info); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pacl), pacl_pl); +} + +static int +mlxsw_sp_acl_tcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + unsigned int offset, + struct mlxsw_sp_acl_rule_info *rulei) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + char *act_set; + char *mask; + char *key; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_WRITE, + region->tcam_region_info, offset); + key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl); + mask = mlxsw_reg_ptce2_mask_data(ptce2_pl); + mlxsw_afk_encode(region->key_info, &rulei->values, key, mask); + + /* Only the first action set belongs here, the rest is in KVD */ + act_set = mlxsw_afa_block_first_set(rulei->act_block); + mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); +} + +static void +mlxsw_sp_acl_tcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + unsigned int offset) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + + mlxsw_reg_ptce2_pack(ptce2_pl, false, MLXSW_REG_PTCE2_OP_WRITE_WRITE, + region->tcam_region_info, offset); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); +} + +#define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U) + +static int +mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + struct parman_prio *parman_prio = ®ion->catchall.parman_prio; + struct parman_item *parman_item = ®ion->catchall.parman_item; + struct mlxsw_sp_acl_rule_info *rulei; + int err; + + parman_prio_init(region->parman, parman_prio, + MLXSW_SP_ACL_TCAM_CATCHALL_PRIO); + err = parman_item_add(region->parman, parman_prio, parman_item); + if (err) + goto err_parman_item_add; + + rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl); + if (IS_ERR(rulei)) { + err = PTR_ERR(rulei); + goto err_rulei_create; + } + + mlxsw_sp_acl_rulei_act_continue(rulei); + err = mlxsw_sp_acl_rulei_commit(rulei); + if (err) + goto err_rulei_commit; + + err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region, + parman_item->index, rulei); + region->catchall.rulei = rulei; + if (err) + goto err_rule_insert; + + return 0; + +err_rule_insert: +err_rulei_commit: + mlxsw_sp_acl_rulei_destroy(rulei); +err_rulei_create: + parman_item_remove(region->parman, parman_prio, parman_item); +err_parman_item_add: + parman_prio_fini(parman_prio); + return err; +} + +static void +mlxsw_sp_acl_tcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + struct parman_prio *parman_prio = ®ion->catchall.parman_prio; + struct parman_item *parman_item = ®ion->catchall.parman_item; + struct mlxsw_sp_acl_rule_info *rulei = region->catchall.rulei; + + mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region, + parman_item->index); + mlxsw_sp_acl_rulei_destroy(rulei); + parman_item_remove(region->parman, parman_prio, parman_item); + parman_prio_fini(parman_prio); +} + +static void +mlxsw_sp_acl_tcam_region_move(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + u16 src_offset, u16 dst_offset, u16 size) +{ + char prcr_pl[MLXSW_REG_PRCR_LEN]; + + mlxsw_reg_prcr_pack(prcr_pl, MLXSW_REG_PRCR_OP_MOVE, + region->tcam_region_info, src_offset, + region->tcam_region_info, dst_offset, size); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(prcr), prcr_pl); +} + +static int mlxsw_sp_acl_tcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp_acl_tcam_region *region = priv; + struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + return mlxsw_sp_acl_tcam_region_resize(mlxsw_sp, region, new_count); +} + +static void mlxsw_sp_acl_tcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp_acl_tcam_region *region = priv; + struct mlxsw_sp *mlxsw_sp = region->mlxsw_sp; + + mlxsw_sp_acl_tcam_region_move(mlxsw_sp, region, + from_index, to_index, count); +} + +static const struct parman_ops mlxsw_sp_acl_tcam_region_parman_ops = { + .base_count = MLXSW_SP_ACL_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP_ACL_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp_acl_tcam_region_parman_resize, + .move = mlxsw_sp_acl_tcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +static struct mlxsw_sp_acl_tcam_region * +mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + struct mlxsw_sp_acl_tcam_region *region; + int err; + + region = kzalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(®ion->chunk_list); + region->mlxsw_sp = mlxsw_sp; + + region->parman = parman_create(&mlxsw_sp_acl_tcam_region_parman_ops, + region); + if (!region->parman) { + err = -ENOMEM; + goto err_parman_create; + } + + region->key_info = mlxsw_afk_key_info_get(afk, elusage); + if (IS_ERR(region->key_info)) { + err = PTR_ERR(region->key_info); + goto err_key_info_get; + } + + err = mlxsw_sp_acl_tcam_region_id_get(tcam, ®ion->id); + if (err) + goto err_region_id_get; + + err = mlxsw_sp_acl_tcam_region_alloc(mlxsw_sp, region); + if (err) + goto err_tcam_region_alloc; + + err = mlxsw_sp_acl_tcam_region_enable(mlxsw_sp, region); + if (err) + goto err_tcam_region_enable; + + err = mlxsw_sp_acl_tcam_region_catchall_add(mlxsw_sp, region); + if (err) + goto err_tcam_region_catchall_add; + + return region; + +err_tcam_region_catchall_add: + mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); +err_tcam_region_enable: + mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); +err_tcam_region_alloc: + mlxsw_sp_acl_tcam_region_id_put(tcam, region->id); +err_region_id_get: + mlxsw_afk_key_info_put(region->key_info); +err_key_info_get: + parman_destroy(region->parman); +err_parman_create: + kfree(region); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) +{ + mlxsw_sp_acl_tcam_region_catchall_del(mlxsw_sp, region); + mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); + mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); + mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id); + mlxsw_afk_key_info_put(region->key_info); + parman_destroy(region->parman); + kfree(region); +} + +static int +mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + struct mlxsw_sp_acl_tcam_region *region; + bool region_created = false; + bool need_split; + int err; + + region = mlxsw_sp_acl_tcam_group_region_find(group, priority, elusage, + &need_split); + if (region && need_split) { + /* According to priority, the chunk should belong to an + * existing region. However, this chunk needs elements + * that region does not contain. We need to split the existing + * region into two and create a new region for this chunk + * in between. This is not supported now. + */ + return -EOPNOTSUPP; + } + if (!region) { + struct mlxsw_afk_element_usage region_elusage; + + mlxsw_sp_acl_tcam_group_use_patterns(group, elusage, + ®ion_elusage); + region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, group->tcam, + ®ion_elusage); + if (IS_ERR(region)) + return PTR_ERR(region); + region_created = true; + } + + chunk->region = region; + list_add_tail(&chunk->list, ®ion->chunk_list); + + if (!region_created) + return 0; + + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, group, region); + if (err) + goto err_group_region_attach; + + return 0; + +err_group_region_attach: + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region); + return err; +} + +static void +mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + struct mlxsw_sp_acl_tcam_region *region = chunk->region; + + list_del(&chunk->list); + if (list_empty(®ion->chunk_list)) { + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, region); + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region); + } +} + +static struct mlxsw_sp_acl_tcam_chunk * +mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_sp_acl_tcam_chunk *chunk; + int err; + + if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO) + return ERR_PTR(-EINVAL); + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) + return ERR_PTR(-ENOMEM); + chunk->priority = priority; + chunk->group = group; + chunk->ref_count = 1; + + err = mlxsw_sp_acl_tcam_chunk_assoc(mlxsw_sp, group, priority, + elusage, chunk); + if (err) + goto err_chunk_assoc; + + parman_prio_init(chunk->region->parman, &chunk->parman_prio, priority); + + err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node, + mlxsw_sp_acl_tcam_chunk_ht_params); + if (err) + goto err_rhashtable_insert; + + return chunk; + +err_rhashtable_insert: + parman_prio_fini(&chunk->parman_prio); + mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); +err_chunk_assoc: + kfree(chunk); + return ERR_PTR(err); +} + +static void +mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + struct mlxsw_sp_acl_tcam_group *group = chunk->group; + + rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node, + mlxsw_sp_acl_tcam_chunk_ht_params); + parman_prio_fini(&chunk->parman_prio); + mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); + kfree(chunk); +} + +static struct mlxsw_sp_acl_tcam_chunk * +mlxsw_sp_acl_tcam_chunk_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_sp_acl_tcam_chunk *chunk; + + chunk = rhashtable_lookup_fast(&group->chunk_ht, &priority, + mlxsw_sp_acl_tcam_chunk_ht_params); + if (chunk) { + if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->region->key_info, + elusage))) + return ERR_PTR(-EINVAL); + chunk->ref_count++; + return chunk; + } + return mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, group, + priority, elusage); +} + +static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + if (--chunk->ref_count) + return; + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk); +} + +static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_entry *entry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_region *region; + int err; + + chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, group, rulei->priority, + &rulei->values.elusage); + if (IS_ERR(chunk)) + return PTR_ERR(chunk); + + region = chunk->region; + err = parman_item_add(region->parman, &chunk->parman_prio, + &entry->parman_item); + if (err) + goto err_parman_item_add; + + err = mlxsw_sp_acl_tcam_region_entry_insert(mlxsw_sp, region, + entry->parman_item.index, + rulei); + if (err) + goto err_rule_insert; + entry->chunk = chunk; + + return 0; + +err_rule_insert: + parman_item_remove(region->parman, &chunk->parman_prio, + &entry->parman_item); +err_parman_item_add: + mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); + return err; +} + +static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_entry *entry) +{ + struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; + struct mlxsw_sp_acl_tcam_region *region = chunk->region; + + mlxsw_sp_acl_tcam_region_entry_remove(mlxsw_sp, region, + entry->parman_item.index); + parman_item_remove(region->parman, &chunk->parman_prio, + &entry->parman_item); + mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); +} + +static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { + MLXSW_AFK_ELEMENT_SRC_SYS_PORT, + MLXSW_AFK_ELEMENT_DMAC, + MLXSW_AFK_ELEMENT_SMAC, + MLXSW_AFK_ELEMENT_ETHERTYPE, + MLXSW_AFK_ELEMENT_IP_PROTO, + MLXSW_AFK_ELEMENT_SRC_IP4, + MLXSW_AFK_ELEMENT_DST_IP4, + MLXSW_AFK_ELEMENT_DST_L4_PORT, + MLXSW_AFK_ELEMENT_SRC_L4_PORT, +}; + +static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { + MLXSW_AFK_ELEMENT_ETHERTYPE, + MLXSW_AFK_ELEMENT_IP_PROTO, + MLXSW_AFK_ELEMENT_SRC_IP6_HI, + MLXSW_AFK_ELEMENT_SRC_IP6_LO, + MLXSW_AFK_ELEMENT_DST_IP6_HI, + MLXSW_AFK_ELEMENT_DST_IP6_LO, + MLXSW_AFK_ELEMENT_DST_L4_PORT, + MLXSW_AFK_ELEMENT_SRC_L4_PORT, +}; + +static const struct mlxsw_sp_acl_tcam_pattern mlxsw_sp_acl_tcam_patterns[] = { + { + .elements = mlxsw_sp_acl_tcam_pattern_ipv4, + .elements_count = ARRAY_SIZE(mlxsw_sp_acl_tcam_pattern_ipv4), + }, + { + .elements = mlxsw_sp_acl_tcam_pattern_ipv6, + .elements_count = ARRAY_SIZE(mlxsw_sp_acl_tcam_pattern_ipv6), + }, +}; + +#define MLXSW_SP_ACL_TCAM_PATTERNS_COUNT \ + ARRAY_SIZE(mlxsw_sp_acl_tcam_patterns) + +struct mlxsw_sp_acl_tcam_flower_ruleset { + struct mlxsw_sp_acl_tcam_group group; +}; + +struct mlxsw_sp_acl_tcam_flower_rule { + struct mlxsw_sp_acl_tcam_entry entry; +}; + +static int +mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp, + void *priv, void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + struct mlxsw_sp_acl_tcam *tcam = priv; + + return mlxsw_sp_acl_tcam_group_add(mlxsw_sp, tcam, &ruleset->group, + mlxsw_sp_acl_tcam_patterns, + MLXSW_SP_ACL_TCAM_PATTERNS_COUNT); +} + +static void +mlxsw_sp_acl_tcam_flower_ruleset_del(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + mlxsw_sp_acl_tcam_group_del(mlxsw_sp, &ruleset->group); +} + +static int +mlxsw_sp_acl_tcam_flower_ruleset_bind(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, + struct net_device *dev, bool ingress) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_group_bind(mlxsw_sp, &ruleset->group, + dev, ingress); +} + +static void +mlxsw_sp_acl_tcam_flower_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->group); +} + +static int +mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group, + &rule->entry, rulei); +} + +static void +mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) +{ + struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; + + mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry); +} + +static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { + .ruleset_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_ruleset), + .ruleset_add = mlxsw_sp_acl_tcam_flower_ruleset_add, + .ruleset_del = mlxsw_sp_acl_tcam_flower_ruleset_del, + .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, + .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), + .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, + .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, +}; + +static const struct mlxsw_sp_acl_profile_ops * +mlxsw_sp_acl_tcam_profile_ops_arr[] = { + [MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops, +}; + +static const struct mlxsw_sp_acl_profile_ops * +mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_acl_profile profile) +{ + const struct mlxsw_sp_acl_profile_ops *ops; + + if (WARN_ON(profile >= ARRAY_SIZE(mlxsw_sp_acl_tcam_profile_ops_arr))) + return NULL; + ops = mlxsw_sp_acl_tcam_profile_ops_arr[profile]; + if (WARN_ON(!ops)) + return NULL; + return ops; +} + +const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp_acl_tcam), + .init = mlxsw_sp_acl_tcam_init, + .fini = mlxsw_sp_acl_tcam_fini, + .profile_ops = mlxsw_sp_acl_tcam_profile_ops, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c new file mode 100644 index 000000000000..22ab42925377 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -0,0 +1,316 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <net/flow_dissector.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> + +#include "spectrum.h" +#include "core_acl_flex_keys.h" + +static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + struct mlxsw_sp_acl_rule_info *rulei, + struct tcf_exts *exts) +{ + const struct tc_action *a; + LIST_HEAD(actions); + int err; + + if (tc_no_actions(exts)) + return 0; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) { + err = mlxsw_sp_acl_rulei_act_drop(rulei); + if (err) + return err; + } else if (is_tcf_mirred_egress_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *out_dev; + + out_dev = __dev_get_by_index(dev_net(dev), ifindex); + if (out_dev == dev) + out_dev = NULL; + + err = mlxsw_sp_acl_rulei_act_fwd(mlxsw_sp, rulei, + out_dev); + if (err) + return err; + } else { + dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); + return -EOPNOTSUPP; + } + } + return 0; +} + +static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f) +{ + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_IP4, + ntohl(key->src), ntohl(mask->src)); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_IP4, + ntohl(key->dst), ntohl(mask->dst)); +} + +static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f) +{ + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + size_t addr_half_size = sizeof(key->src) / 2; + + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP6_HI, + &key->src.s6_addr[0], + &mask->src.s6_addr[0], + addr_half_size); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP6_LO, + &key->src.s6_addr[addr_half_size], + &mask->src.s6_addr[addr_half_size], + addr_half_size); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP6_HI, + &key->dst.s6_addr[0], + &mask->dst.s6_addr[0], + addr_half_size); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP6_LO, + &key->dst.s6_addr[addr_half_size], + &mask->dst.s6_addr[addr_half_size], + addr_half_size); +} + +static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f, + u8 ip_proto) +{ + struct flow_dissector_key_ports *key, *mask; + + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) + return 0; + + if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + dev_err(mlxsw_sp->bus_info->dev, "Only UDP and TCP keys are supported\n"); + return -EINVAL; + } + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_L4_PORT, + ntohs(key->dst), ntohs(mask->dst)); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_L4_PORT, + ntohs(key->src), ntohs(mask->src)); + return 0; +} + +static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f) +{ + u16 addr_type = 0; + u8 ip_proto = 0; + int err; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS))) { + dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); + return -EOPNOTSUPP; + } + + mlxsw_sp_acl_rulei_priority(rulei, f->prio); + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->key); + addr_type = key->addr_type; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + u16 n_proto_key = ntohs(key->n_proto); + u16 n_proto_mask = ntohs(mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_ETHERTYPE, + n_proto_key, n_proto_mask); + + ip_proto = key->ip_proto; + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_IP_PROTO, + key->ip_proto, mask->ip_proto); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + + mlxsw_sp_acl_rulei_keymask_buf(rulei, + MLXSW_AFK_ELEMENT_DMAC, + key->dst, mask->dst, + sizeof(key->dst)); + mlxsw_sp_acl_rulei_keymask_buf(rulei, + MLXSW_AFK_ELEMENT_SMAC, + key->src, mask->src, + sizeof(key->src)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) + mlxsw_sp_flower_parse_ipv4(rulei, f); + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) + mlxsw_sp_flower_parse_ipv6(rulei, f); + + err = mlxsw_sp_flower_parse_ports(mlxsw_sp, rulei, f, ip_proto); + if (err) + return err; + + return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts); +} + +int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, + __be16 protocol, struct tc_cls_flower_offload *f) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct net_device *dev = mlxsw_sp_port->dev; + struct mlxsw_sp_acl_rule_info *rulei; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + int err; + + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, dev, ingress, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (IS_ERR(ruleset)) + return PTR_ERR(ruleset); + + rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_rule_create; + } + + rulei = mlxsw_sp_acl_rule_rulei(rule); + err = mlxsw_sp_flower_parse(mlxsw_sp, dev, rulei, f); + if (err) + goto err_flower_parse; + + err = mlxsw_sp_acl_rulei_commit(rulei); + if (err) + goto err_rulei_commit; + + err = mlxsw_sp_acl_rule_add(mlxsw_sp, rule); + if (err) + goto err_rule_add; + + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return 0; + +err_rule_add: +err_rulei_commit: +err_flower_parse: + mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); +err_rule_create: + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return err; +} + +void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, + struct tc_cls_flower_offload *f) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, + ingress, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (WARN_ON(IS_ERR(ruleset))) + return; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie); + if (!WARN_ON(!rule)) { + mlxsw_sp_acl_rule_del(mlxsw_sp, rule); + mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); + } + + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9e494a446b7e..d7ac22d7f940 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -40,6 +40,7 @@ #include <linux/bitops.h> #include <linux/in6.h> #include <linux/notifier.h> +#include <linux/inetdevice.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -108,7 +109,6 @@ mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, } struct mlxsw_sp_fib_key { - struct net_device *dev; unsigned char addr[sizeof(struct in6_addr)]; unsigned char prefix_len; }; @@ -121,95 +121,39 @@ enum mlxsw_sp_fib_entry_type { struct mlxsw_sp_nexthop_group; -struct mlxsw_sp_fib_entry { - struct rhash_head ht_node; +struct mlxsw_sp_fib_node { + struct list_head entry_list; struct list_head list; + struct rhash_head ht_node; + struct mlxsw_sp_vr *vr; struct mlxsw_sp_fib_key key; +}; + +struct mlxsw_sp_fib_entry_params { + u32 tb_id; + u32 prio; + u8 tos; + u8 type; +}; + +struct mlxsw_sp_fib_entry { + struct list_head list; + struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; - unsigned int ref_count; - u16 rif; /* used for action local */ - struct mlxsw_sp_vr *vr; - struct fib_info *fi; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; + struct mlxsw_sp_fib_entry_params params; + bool offloaded; }; struct mlxsw_sp_fib { struct rhashtable ht; - struct list_head entry_list; + struct list_head node_list; unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; struct mlxsw_sp_prefix_usage prefix_usage; }; -static const struct rhashtable_params mlxsw_sp_fib_ht_params = { - .key_offset = offsetof(struct mlxsw_sp_fib_entry, key), - .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node), - .key_len = sizeof(struct mlxsw_sp_fib_key), - .automatic_shrinking = true, -}; - -static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, - struct mlxsw_sp_fib_entry *fib_entry) -{ - unsigned char prefix_len = fib_entry->key.prefix_len; - int err; - - err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node, - mlxsw_sp_fib_ht_params); - if (err) - return err; - list_add_tail(&fib_entry->list, &fib->entry_list); - if (fib->prefix_ref_count[prefix_len]++ == 0) - mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); - return 0; -} - -static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, - struct mlxsw_sp_fib_entry *fib_entry) -{ - unsigned char prefix_len = fib_entry->key.prefix_len; - - if (--fib->prefix_ref_count[prefix_len] == 0) - mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); - list_del(&fib_entry->list); - rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, - mlxsw_sp_fib_ht_params); -} - -static struct mlxsw_sp_fib_entry * -mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr, - size_t addr_len, unsigned char prefix_len, - struct net_device *dev) -{ - struct mlxsw_sp_fib_entry *fib_entry; - - fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); - if (!fib_entry) - return NULL; - fib_entry->key.dev = dev; - memcpy(fib_entry->key.addr, addr, addr_len); - fib_entry->key.prefix_len = prefix_len; - return fib_entry; -} - -static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry) -{ - kfree(fib_entry); -} - -static struct mlxsw_sp_fib_entry * -mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr, - size_t addr_len, unsigned char prefix_len, - struct net_device *dev) -{ - struct mlxsw_sp_fib_key key; - - memset(&key, 0, sizeof(key)); - key.dev = dev; - memcpy(key.addr, addr, addr_len); - key.prefix_len = prefix_len; - return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); -} +static const struct rhashtable_params mlxsw_sp_fib_ht_params; static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) { @@ -222,7 +166,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); if (err) goto err_rhashtable_init; - INIT_LIST_HEAD(&fib->entry_list); + INIT_LIST_HEAD(&fib->node_list); return fib; err_rhashtable_init: @@ -232,6 +176,7 @@ err_rhashtable_init: static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) { + WARN_ON(!list_empty(&fib->node_list)); rhashtable_destroy(&fib->ht); kfree(fib); } @@ -610,12 +555,11 @@ struct mlxsw_sp_neigh_key { }; struct mlxsw_sp_neigh_entry { + struct list_head rif_list_node; struct rhash_head ht_node; struct mlxsw_sp_neigh_key key; u16 rif; - bool offloaded; - struct delayed_work dw; - struct mlxsw_sp_port *mlxsw_sp_port; + bool connected; unsigned char ha[ETH_ALEN]; struct list_head nexthop_list; /* list of nexthops using * this neigh entry @@ -629,105 +573,91 @@ static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { .key_len = sizeof(struct mlxsw_sp_neigh_key), }; -static int -mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_neigh_entry *neigh_entry) -{ - return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, - &neigh_entry->ht_node, - mlxsw_sp_neigh_ht_params); -} - -static void -mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_neigh_entry *neigh_entry) -{ - rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, - &neigh_entry->ht_node, - mlxsw_sp_neigh_ht_params); -} - -static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); - static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif) +mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, + u16 rif) { struct mlxsw_sp_neigh_entry *neigh_entry; - neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); + neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL); if (!neigh_entry) return NULL; + neigh_entry->key.n = n; neigh_entry->rif = rif; - INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); INIT_LIST_HEAD(&neigh_entry->nexthop_list); + return neigh_entry; } -static void -mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) +static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry) { kfree(neigh_entry); } -static struct mlxsw_sp_neigh_entry * -mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) +static int +mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) { - struct mlxsw_sp_neigh_key key; + return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} - key.n = n; - return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, - &key, mlxsw_sp_neigh_ht_params); +static void +mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); } -int mlxsw_sp_router_neigh_construct(struct net_device *dev, - struct neighbour *n) +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_rif *r; int err; - if (n->tbl != &arp_tbl) - return 0; - - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); - if (neigh_entry) - return 0; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); - if (WARN_ON(!r)) - return -EINVAL; + if (!r) + return ERR_PTR(-EINVAL); - neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif); + neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif); if (!neigh_entry) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); if (err) goto err_neigh_entry_insert; - return 0; + + list_add(&neigh_entry->rif_list_node, &r->neigh_list); + + return neigh_entry; err_neigh_entry_insert: - mlxsw_sp_neigh_entry_destroy(neigh_entry); - return err; + mlxsw_sp_neigh_entry_free(neigh_entry); + return ERR_PTR(err); } -void mlxsw_sp_router_neigh_destroy(struct net_device *dev, - struct neighbour *n) +static void +mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_neigh_entry *neigh_entry; + list_del(&neigh_entry->rif_list_node); + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_free(neigh_entry); +} - if (n->tbl != &arp_tbl) - return; +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) +{ + struct mlxsw_sp_neigh_key key; - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); - if (!neigh_entry) - return; - mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); - mlxsw_sp_neigh_entry_destroy(neigh_entry); + key.n = n; + return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, + &key, mlxsw_sp_neigh_ht_params); } static void @@ -866,13 +796,11 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) /* Take RTNL mutex here to prevent lists from changes */ rtnl_lock(); list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, - nexthop_neighs_list_node) { + nexthop_neighs_list_node) /* If this neigh have nexthops, make the kernel think this neigh * is active regardless of the traffic. */ - if (!list_empty(&neigh_entry->nexthop_list)) - neigh_event_send(neigh_entry->key.n, NULL); - } + neigh_event_send(neigh_entry->key.n, NULL); rtnl_unlock(); } @@ -916,11 +844,9 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) */ rtnl_lock(); list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, - nexthop_neighs_list_node) { - if (!(neigh_entry->key.n->nud_state & NUD_VALID) && - !list_empty(&neigh_entry->nexthop_list)) + nexthop_neighs_list_node) + if (!neigh_entry->connected) neigh_event_send(neigh_entry->key.n, NULL); - } rtnl_unlock(); mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, @@ -932,79 +858,101 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool removing); -static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) +static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) +{ + return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD : + MLXSW_REG_RAUHT_OP_WRITE_DELETE; +} + +static void +mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) { - struct mlxsw_sp_neigh_entry *neigh_entry = - container_of(work, struct mlxsw_sp_neigh_entry, dw.work); struct neighbour *n = neigh_entry->key.n; - struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 dip = ntohl(*((__be32 *) n->primary_key)); char rauht_pl[MLXSW_REG_RAUHT_LEN]; - struct net_device *dev; + + mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +static void +mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding) +{ + if (!adding && !neigh_entry->connected) + return; + neigh_entry->connected = adding; + if (neigh_entry->key.n->tbl == &arp_tbl) + mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, + mlxsw_sp_rauht_op(adding)); + else + WARN_ON_ONCE(1); +} + +struct mlxsw_sp_neigh_event_work { + struct work_struct work; + struct mlxsw_sp *mlxsw_sp; + struct neighbour *n; +}; + +static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) +{ + struct mlxsw_sp_neigh_event_work *neigh_work = + container_of(work, struct mlxsw_sp_neigh_event_work, work); + struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + struct neighbour *n = neigh_work->n; + unsigned char ha[ETH_ALEN]; bool entry_connected; u8 nud_state, dead; - bool updating; - bool removing; - bool adding; - u32 dip; - int err; + /* If these parameters are changed after we release the lock, + * then we are guaranteed to receive another event letting us + * know about it. + */ read_lock_bh(&n->lock); - dip = ntohl(*((__be32 *) n->primary_key)); - memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); + memcpy(ha, n->ha, ETH_ALEN); nud_state = n->nud_state; dead = n->dead; - dev = n->dev; read_unlock_bh(&n->lock); + rtnl_lock(); entry_connected = nud_state & NUD_VALID && !dead; - adding = (!neigh_entry->offloaded) && entry_connected; - updating = neigh_entry->offloaded && entry_connected; - removing = neigh_entry->offloaded && !entry_connected; - - if (adding || updating) { - mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, - neigh_entry->rif, - neigh_entry->ha, dip); - err = mlxsw_reg_write(mlxsw_sp->core, - MLXSW_REG(rauht), rauht_pl); - if (err) { - netdev_err(dev, "Could not add neigh %pI4h\n", &dip); - neigh_entry->offloaded = false; - } else { - neigh_entry->offloaded = true; - } - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); - } else if (removing) { - mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, - neigh_entry->rif, - neigh_entry->ha, dip); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), - rauht_pl); - if (err) { - netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); - neigh_entry->offloaded = true; - } else { - neigh_entry->offloaded = false; - } - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); + if (!entry_connected && !neigh_entry) + goto out; + if (!neigh_entry) { + neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); + if (IS_ERR(neigh_entry)) + goto out; } + memcpy(neigh_entry->ha, ha, ETH_ALEN); + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); + + if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + +out: + rtnl_unlock(); neigh_release(n); - mlxsw_sp_port_dev_put(mlxsw_sp_port); + kfree(neigh_work); } int mlxsw_sp_router_netevent_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_neigh_event_work *neigh_work; struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp; unsigned long interval; - struct net_device *dev; struct neigh_parms *p; struct neighbour *n; - u32 dip; switch (event) { case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -1029,33 +977,31 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, break; case NETEVENT_NEIGH_UPDATE: n = ptr; - dev = n->dev; if (n->tbl != &arp_tbl) return NOTIFY_DONE; - mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); if (!mlxsw_sp_port) return NOTIFY_DONE; - mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - dip = ntohl(*((__be32 *) n->primary_key)); - neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); - if (WARN_ON(!neigh_entry)) { + neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC); + if (!neigh_work) { mlxsw_sp_port_dev_put(mlxsw_sp_port); - return NOTIFY_DONE; + return NOTIFY_BAD; } - neigh_entry->mlxsw_sp_port = mlxsw_sp_port; + + INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work); + neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + neigh_work->n = n; /* Take a reference to ensure the neighbour won't be * destructed until we drop the reference in delayed * work. */ neigh_clone(n); - if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { - neigh_release(n); - mlxsw_sp_port_dev_put(mlxsw_sp_port); - } + mlxsw_core_schedule_work(&neigh_work->work); + mlxsw_sp_port_dev_put(mlxsw_sp_port); break; } @@ -1093,11 +1039,40 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&mlxsw_sp->router.neigh_ht); } +static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif *r) +{ + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + + mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, + r->rif, r->addr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r) +{ + struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; + + mlxsw_sp_neigh_rif_flush(mlxsw_sp, r); + list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list, + rif_list_node) + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); +} + +struct mlxsw_sp_nexthop_key { + struct fib_nh *fib_nh; +}; + struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ + struct list_head rif_list_node; struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group * this belongs to */ + struct rhash_head ht_node; + struct mlxsw_sp_nexthop_key key; + struct mlxsw_sp_rif *r; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. */ @@ -1110,16 +1085,81 @@ struct mlxsw_sp_nexthop { struct mlxsw_sp_neigh_entry *neigh_entry; }; +struct mlxsw_sp_nexthop_group_key { + struct fib_info *fi; +}; + struct mlxsw_sp_nexthop_group { - struct list_head list; /* node in mlxsw->router.nexthop_group_list */ + struct rhash_head ht_node; struct list_head fib_list; /* list of fib entries that use this group */ - u8 adj_index_valid:1; + struct mlxsw_sp_nexthop_group_key key; + u8 adj_index_valid:1, + gateway:1; /* routes using the group use a gateway */ u32 adj_index; u16 ecmp_size; u16 count; struct mlxsw_sp_nexthop nexthops[0]; +#define nh_rif nexthops[0].r }; +static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key), + .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node), + .key_len = sizeof(struct mlxsw_sp_nexthop_group_key), +}; + +static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht, + &nh_grp->ht_node, + mlxsw_sp_nexthop_group_ht_params); +} + +static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht, + &nh_grp->ht_node, + mlxsw_sp_nexthop_group_ht_params); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group_key key) +{ + return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key, + mlxsw_sp_nexthop_group_ht_params); +} + +static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_nexthop, key), + .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node), + .key_len = sizeof(struct mlxsw_sp_nexthop_key), +}; + +static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht, + &nh->ht_node, mlxsw_sp_nexthop_ht_params); +} + +static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node, + mlxsw_sp_nexthop_ht_params); +} + +static struct mlxsw_sp_nexthop * +mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_key key) +{ + return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key, + mlxsw_sp_nexthop_ht_params); +} + static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, u32 adj_index, u16 ecmp_size, @@ -1144,9 +1184,9 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (vr == fib_entry->vr) + if (vr == fib_entry->fib_node->vr) continue; - vr = fib_entry->vr; + vr = fib_entry->fib_node->vr; err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, old_adj_index, old_ecmp_size, @@ -1234,6 +1274,11 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, int i; int err; + if (!nh_grp->gateway) { + mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + return; + } + for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; @@ -1336,42 +1381,63 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop *nh; - /* Take RTNL mutex here to prevent lists from changes */ - rtnl_lock(); list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); } - rtnl_unlock(); } -static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_rif *r) +{ + if (nh->r) + return; + + nh->r = r; + list_add(&nh->rif_list_node, &r->nexthop_list); +} + +static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) +{ + if (!nh->r) + return; + + list_del(&nh->rif_list_node); + nh->r = NULL; +} + +static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct net_device *dev = fib_nh->nh_dev; + struct fib_nh *fib_nh = nh->key.fib_nh; struct neighbour *n; u8 nud_state, dead; + int err; + + if (!nh->nh_grp->gateway || nh->neigh_entry) + return 0; /* Take a reference of neigh here ensuring that neigh would * not be detructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or - * in neith_create() in case n is not found. + * in neigh_create() in case n is not found. */ - n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev); + n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); if (!n) { - n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev); + n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); } neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); if (!neigh_entry) { - neigh_release(n); - return -EINVAL; + neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n); + if (IS_ERR(neigh_entry)) { + err = -EINVAL; + goto err_neigh_entry_create; + } } /* If that is the first nexthop connected to that neigh, add to @@ -1381,7 +1447,6 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, list_add_tail(&neigh_entry->nexthop_neighs_list_node, &mlxsw_sp->router.nexthop_neighs_list); - nh->nh_grp = nh_grp; nh->neigh_entry = neigh_entry; list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); read_lock_bh(&n->lock); @@ -1391,23 +1456,126 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead)); return 0; + +err_neigh_entry_create: + neigh_release(n); + return err; } -static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + struct neighbour *n; + + if (!neigh_entry) + return; + n = neigh_entry->key.n; __mlxsw_sp_nexthop_neigh_update(nh, true); list_del(&nh->neigh_list_node); + nh->neigh_entry = NULL; /* If that is the last nexthop connected to that neigh, remove from * nexthop_neighs_list */ - if (list_empty(&nh->neigh_entry->nexthop_list)) - list_del(&nh->neigh_entry->nexthop_neighs_list_node); + if (list_empty(&neigh_entry->nexthop_list)) + list_del(&neigh_entry->nexthop_neighs_list_node); - neigh_release(neigh_entry->key.n); + if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) + mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + + neigh_release(n); +} + +static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct net_device *dev = fib_nh->nh_dev; + struct in_device *in_dev; + struct mlxsw_sp_rif *r; + int err; + + nh->nh_grp = nh_grp; + nh->key.fib_nh = fib_nh; + err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); + if (err) + return err; + + in_dev = __in_dev_get_rtnl(dev); + if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->nh_flags & RTNH_F_LINKDOWN) + return 0; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!r) + return 0; + mlxsw_sp_nexthop_rif_init(nh, r); + + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_nexthop_neigh_init; + + return 0; + +err_nexthop_neigh_init: + mlxsw_sp_nexthop_remove(mlxsw_sp, nh); + return err; +} + +static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_remove(mlxsw_sp, nh); +} + +static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, struct fib_nh *fib_nh) +{ + struct mlxsw_sp_nexthop_key key; + struct mlxsw_sp_nexthop *nh; + struct mlxsw_sp_rif *r; + + if (mlxsw_sp->router.aborted) + return; + + key.fib_nh = fib_nh; + nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key); + if (WARN_ON_ONCE(!nh)) + return; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); + if (!r) + return; + + switch (event) { + case FIB_EVENT_NH_ADD: + mlxsw_sp_nexthop_rif_init(nh, r); + mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + break; + case FIB_EVENT_NH_DEL: + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + break; + } + + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); +} + +static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r) +{ + struct mlxsw_sp_nexthop *nh, *tmp; + + list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) { + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } } static struct mlxsw_sp_nexthop_group * @@ -1426,7 +1594,9 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) if (!nh_grp) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; nh_grp->count = fi->fib_nhs; + nh_grp->key.fi = fi; for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; fib_nh = &fi->fib_nh[i]; @@ -1434,13 +1604,18 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) if (err) goto err_nexthop_init; } - list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_insert; mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); return nh_grp; +err_nexthop_group_insert: err_nexthop_init: - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + } kfree(nh_grp); return ERR_PTR(err); } @@ -1452,7 +1627,7 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh; int i; - list_del(&nh_grp->list); + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; mlxsw_sp_nexthop_fini(mlxsw_sp, nh); @@ -1462,59 +1637,15 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, kfree(nh_grp); } -static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, - struct fib_info *fi) -{ - int i; - - for (i = 0; i < fi->fib_nhs; i++) { - struct fib_nh *fib_nh = &fi->fib_nh[i]; - struct neighbour *n = nh->neigh_entry->key.n; - - if (memcmp(n->primary_key, &fib_nh->nh_gw, - sizeof(fib_nh->nh_gw)) == 0 && - n->dev == fib_nh->nh_dev) - return true; - } - return false; -} - -static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, - struct fib_info *fi) -{ - int i; - - if (nh_grp->count != fi->fib_nhs) - return false; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; - - if (!mlxsw_sp_nexthop_match(nh, fi)) - return false; - } - return true; -} - -static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) -{ - struct mlxsw_sp_nexthop_group *nh_grp; - - list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, - list) { - if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) - return nh_grp; - } - return NULL; -} - static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, struct fib_info *fi) { + struct mlxsw_sp_nexthop_group_key key; struct mlxsw_sp_nexthop_group *nh_grp; - nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); + key.fi = fi; + nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); if (!nh_grp) { nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); if (IS_ERR(nh_grp)) @@ -1536,13 +1667,82 @@ static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); } +static bool +mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; + + if (fib_entry->params.tos) + return false; + + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: + return !!nh_group->adj_index_valid; + case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: + return !!nh_group->nh_rif; + default: + return false; + } +} + +static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ + fib_entry->offloaded = true; + + switch (fib_entry->fib_node->vr->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + fib_info_offload_inc(fib_entry->nh_group->key.fi); + break; + case MLXSW_SP_L3_PROTO_IPV6: + WARN_ON_ONCE(1); + } +} + +static void +mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +{ + switch (fib_entry->fib_node->vr->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + fib_info_offload_dec(fib_entry->nh_group->key.fi); + break; + case MLXSW_SP_L3_PROTO_IPV6: + WARN_ON_ONCE(1); + } + + fib_entry->offloaded = false; +} + +static void +mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op, int err) +{ + switch (op) { + case MLXSW_REG_RALUE_OP_WRITE_DELETE: + if (!fib_entry->offloaded) + return; + return mlxsw_sp_fib_entry_offload_unset(fib_entry); + case MLXSW_REG_RALUE_OP_WRITE_WRITE: + if (err) + return; + if (mlxsw_sp_fib_entry_should_offload(fib_entry) && + !fib_entry->offloaded) + mlxsw_sp_fib_entry_offload_set(fib_entry); + else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) && + fib_entry->offloaded) + mlxsw_sp_fib_entry_offload_unset(fib_entry); + return; + default: + return; + } +} + static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->vr; + u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -1552,7 +1752,7 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, * with provided ECMP size. Otherwise, setup trap and pass * traffic to kernel. */ - if (fib_entry->nh_group->adj_index_valid) { + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; adjacency_index = fib_entry->nh_group->adj_index; ecmp_size = fib_entry->nh_group->ecmp_size; @@ -1563,7 +1763,8 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_ralue_pack4(ralue_pl, (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->key.prefix_len, *p_dip); + vr->id, fib_entry->fib_node->key.prefix_len, + *p_dip); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -1573,16 +1774,27 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { + struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif; + enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->vr; + u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; + u16 trap_id = 0; + u16 rif = 0; + + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + rif = r->rif; + } else { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; + } mlxsw_reg_ralue_pack4(ralue_pl, (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->key.prefix_len, *p_dip); - mlxsw_reg_ralue_act_local_pack(ralue_pl, - MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, - fib_entry->rif); + vr->id, fib_entry->fib_node->key.prefix_len, + *p_dip); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } @@ -1591,12 +1803,13 @@ static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, enum mlxsw_reg_ralue_op op) { char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->vr; + u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; mlxsw_reg_ralue_pack4(ralue_pl, (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->key.prefix_len, *p_dip); + vr->id, fib_entry->fib_node->key.prefix_len, + *p_dip); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } @@ -1620,13 +1833,17 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - switch (fib_entry->vr->proto) { + int err = -EINVAL; + + switch (fib_entry->fib_node->vr->proto) { case MLXSW_SP_L3_PROTO_IPV4: - return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); + err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); + break; case MLXSW_SP_L3_PROTO_IPV6: - return -EINVAL; + return err; } - return -EINVAL; + mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + return err; } static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, @@ -1644,14 +1861,11 @@ static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info, - struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info, + struct mlxsw_sp_fib_entry *fib_entry) { struct fib_info *fi = fen_info->fi; - struct mlxsw_sp_rif *r = NULL; - int nhsel; - int err; if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; @@ -1659,58 +1873,177 @@ mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, } if (fen_info->type != RTN_UNICAST) return -EINVAL; + if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + return 0; +} - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + const struct fib_entry_notifier_info *fen_info) +{ + struct mlxsw_sp_fib_entry *fib_entry; + int err; - if (!nh->nh_dev) - continue; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev); - if (!r) { - /* In case router interface is not found for - * at least one of the nexthops, that means - * the nexthop points to some device unrelated - * to us. Set trap and pass the packets for - * this prefix to kernel. - */ - break; - } + fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); + if (!fib_entry) { + err = -ENOMEM; + goto err_fib_entry_alloc; } - if (!r) { - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; - return 0; - } + err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); + if (err) + goto err_fib4_entry_type_set; - if (fi->fib_scope != RT_SCOPE_UNIVERSE) { - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - fib_entry->rif = r->rif; - } else { - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; - err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); - if (err) - return err; - } - fib_info_offload_inc(fen_info->fi); - return 0; + err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi); + if (err) + goto err_nexthop_group_get; + + fib_entry->params.prio = fen_info->fi->fib_priority; + fib_entry->params.tb_id = fen_info->tb_id; + fib_entry->params.type = fen_info->type; + fib_entry->params.tos = fen_info->tos; + + fib_entry->fib_node = fib_node; + + return fib_entry; + +err_nexthop_group_get: +err_fib4_entry_type_set: + kfree(fib_entry); +err_fib_entry_alloc: + return ERR_PTR(err); } -static void -mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) - fib_info_offload_dec(fib_entry->fi); - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); + kfree(fib_entry); } +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info); + static struct mlxsw_sp_fib_entry * -mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info) +mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_entry *fib_entry; - struct fib_info *fi = fen_info->fi; + struct mlxsw_sp_fib_node *fib_node; + + fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); + if (IS_ERR(fib_node)) + return NULL; + + list_for_each_entry(fib_entry, &fib_node->entry_list, list) { + if (fib_entry->params.tb_id == fen_info->tb_id && + fib_entry->params.tos == fen_info->tos && + fib_entry->params.type == fen_info->type && + fib_entry->nh_group->key.fi == fen_info->fi) { + return fib_entry; + } + } + + return NULL; +} + +static const struct rhashtable_params mlxsw_sp_fib_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_fib_node, key), + .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node), + .key_len = sizeof(struct mlxsw_sp_fib_key), + .automatic_shrinking = true, +}; + +static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_node *fib_node) +{ + return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node, + mlxsw_sp_fib_ht_params); +} + +static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_node *fib_node) +{ + rhashtable_remove_fast(&fib->ht, &fib_node->ht_node, + mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len) +{ + struct mlxsw_sp_fib_key key; + + memset(&key, 0, sizeof(key)); + memcpy(key.addr, addr, addr_len); + key.prefix_len = prefix_len; + return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr, + size_t addr_len, unsigned char prefix_len) +{ + struct mlxsw_sp_fib_node *fib_node; + + fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL); + if (!fib_node) + return NULL; + + INIT_LIST_HEAD(&fib_node->entry_list); + list_add(&fib_node->list, &vr->fib->node_list); + memcpy(fib_node->key.addr, addr, addr_len); + fib_node->key.prefix_len = prefix_len; + mlxsw_sp_fib_node_insert(vr->fib, fib_node); + fib_node->vr = vr; + + return fib_node; +} + +static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) +{ + mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node); + list_del(&fib_node->list); + WARN_ON(!list_empty(&fib_node->entry_list)); + kfree(fib_node); +} + +static bool +mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, + const struct mlxsw_sp_fib_entry *fib_entry) +{ + return list_first_entry(&fib_node->entry_list, + struct mlxsw_sp_fib_entry, list) == fib_entry; +} + +static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) +{ + unsigned char prefix_len = fib_node->key.prefix_len; + struct mlxsw_sp_fib *fib = fib_node->vr->fib; + + if (fib->prefix_ref_count[prefix_len]++ == 0) + mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); +} + +static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node) +{ + unsigned char prefix_len = fib_node->key.prefix_len; + struct mlxsw_sp_fib *fib = fib_node->vr->fib; + + if (--fib->prefix_ref_count[prefix_len] == 0) + mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info) +{ + struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_vr *vr; int err; @@ -1719,113 +2052,258 @@ mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(vr)) return ERR_CAST(vr); - fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len, fi->fib_dev); - if (fib_entry) { - /* Already exists, just take a reference */ - fib_entry->ref_count++; - return fib_entry; - } - fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len, fi->fib_dev); - if (!fib_entry) { + fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len); + if (fib_node) + return fib_node; + + fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len); + if (!fib_node) { err = -ENOMEM; - goto err_fib_entry_create; + goto err_fib_node_create; } - fib_entry->vr = vr; - fib_entry->fi = fi; - fib_entry->ref_count = 1; - err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry); - if (err) - goto err_fib4_entry_init; + return fib_node; - return fib_entry; - -err_fib4_entry_init: - mlxsw_sp_fib_entry_destroy(fib_entry); -err_fib_entry_create: +err_fib_node_create: mlxsw_sp_vr_put(mlxsw_sp, vr); - return ERR_PTR(err); } +static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_vr *vr = fib_node->vr; + + if (!list_empty(&fib_node->entry_list)) + return; + mlxsw_sp_fib_node_destroy(fib_node); + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + static struct mlxsw_sp_fib_entry * -mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info) +mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct mlxsw_sp_fib_entry_params *params) { - struct mlxsw_sp_vr *vr; + struct mlxsw_sp_fib_entry *fib_entry; - vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id, - MLXSW_SP_L3_PROTO_IPV4); - if (!vr) - return NULL; + list_for_each_entry(fib_entry, &fib_node->entry_list, list) { + if (fib_entry->params.tb_id > params->tb_id) + continue; + if (fib_entry->params.tb_id != params->tb_id) + break; + if (fib_entry->params.tos > params->tos) + continue; + if (fib_entry->params.prio >= params->prio || + fib_entry->params.tos < params->tos) + return fib_entry; + } - return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len, - fen_info->fi->fib_dev); + return NULL; } -static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib_entry *new_entry) { - struct mlxsw_sp_vr *vr = fib_entry->vr; + struct mlxsw_sp_fib_node *fib_node; + + if (WARN_ON(!fib_entry)) + return -EINVAL; - if (--fib_entry->ref_count == 0) { - mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); - mlxsw_sp_fib_entry_destroy(fib_entry); + fib_node = fib_entry->fib_node; + list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) { + if (fib_entry->params.tb_id != new_entry->params.tb_id || + fib_entry->params.tos != new_entry->params.tos || + fib_entry->params.prio != new_entry->params.prio) + break; } - mlxsw_sp_vr_put(mlxsw_sp, vr); + + list_add_tail(&new_entry->list, &fib_entry->list); + return 0; } -static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static int +mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node, + struct mlxsw_sp_fib_entry *new_entry, + bool replace, bool append) { - unsigned int last_ref_count; + struct mlxsw_sp_fib_entry *fib_entry; - do { - last_ref_count = fib_entry->ref_count; - mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); - } while (last_ref_count != 1); + fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params); + + if (append) + return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry); + if (replace && WARN_ON(!fib_entry)) + return -EINVAL; + + /* Insert new entry before replaced one, so that we can later + * remove the second. + */ + if (fib_entry) { + list_add_tail(&new_entry->list, &fib_entry->list); + } else { + struct mlxsw_sp_fib_entry *last; + + list_for_each_entry(last, &fib_node->entry_list, list) { + if (new_entry->params.tb_id > last->params.tb_id) + break; + fib_entry = last; + } + + if (fib_entry) + list_add(&new_entry->list, &fib_entry->list); + else + list_add(&new_entry->list, &fib_node->entry_list); + } + + return 0; +} + +static void +mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry) +{ + list_del(&fib_entry->list); +} + +static int +mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fib_node *fib_node, + struct mlxsw_sp_fib_entry *fib_entry) +{ + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) + return 0; + + /* To prevent packet loss, overwrite the previously offloaded + * entry. + */ + if (!list_is_singular(&fib_node->entry_list)) { + enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; + struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); + + mlxsw_sp_fib_entry_offload_refresh(n, op, 0); + } + + return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); } -static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, - struct fib_entry_notifier_info *fen_info) +static void +mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fib_node *fib_node, + struct mlxsw_sp_fib_entry *fib_entry) +{ + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) + return; + + /* Promote the next entry by overwriting the deleted entry */ + if (!list_is_singular(&fib_node->entry_list)) { + struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list); + enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE; + + mlxsw_sp_fib_entry_update(mlxsw_sp, n); + mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); + return; + } + + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); +} + +static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + bool replace, bool append) +{ + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + int err; + + err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace, + append); + if (err) + return err; + + err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry); + if (err) + goto err_fib4_node_entry_add; + + mlxsw_sp_fib_node_prefix_inc(fib_node); + + return 0; + +err_fib4_node_entry_add: + mlxsw_sp_fib4_node_list_remove(fib_entry); + return err; +} + +static void +mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + + mlxsw_sp_fib_node_prefix_dec(fib_node); + mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); + mlxsw_sp_fib4_node_list_remove(fib_entry); +} + +static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + bool replace) +{ + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + struct mlxsw_sp_fib_entry *replaced; + + if (!replace) + return; + + /* We inserted the new entry before replaced one */ + replaced = list_next_entry(fib_entry, list); + + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); + mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); +} + +static int +mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, + const struct fib_entry_notifier_info *fen_info, + bool replace, bool append) { struct mlxsw_sp_fib_entry *fib_entry; - struct mlxsw_sp_vr *vr; + struct mlxsw_sp_fib_node *fib_node; int err; if (mlxsw_sp->router.aborted) return 0; - fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info); - if (IS_ERR(fib_entry)) { - dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n"); - return PTR_ERR(fib_entry); + fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); + if (IS_ERR(fib_node)) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); + return PTR_ERR(fib_node); } - if (fib_entry->ref_count != 1) - return 0; + fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); + if (IS_ERR(fib_entry)) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); + err = PTR_ERR(fib_entry); + goto err_fib4_entry_create; + } - vr = fib_entry->vr; - err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); + err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace, + append); if (err) { - dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n"); - goto err_fib_entry_insert; + dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); + goto err_fib4_node_entry_link; } - err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); - if (err) - goto err_fib_entry_add; + + mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace); + return 0; -err_fib_entry_add: - mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); -err_fib_entry_insert: - mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); +err_fib4_node_entry_link: + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); +err_fib4_entry_create: + mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); return err; } @@ -1833,20 +2311,19 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node; if (mlxsw_sp->router.aborted) return; - fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); - if (!fib_entry) + fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); + if (WARN_ON(!fib_entry)) return; + fib_node = fib_entry->fib_node; - if (fib_entry->ref_count == 1) { - mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); - mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry); - } - - mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); } static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) @@ -1880,10 +2357,42 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } +static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_fib_entry *fib_entry, *tmp; + + list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) { + bool do_break = &tmp->list == &fib_node->entry_list; + + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + /* Break when entry list is empty and node was freed. + * Otherwise, we'll access freed memory in the next + * iteration. + */ + if (do_break) + break; + } +} + +static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + switch (fib_node->vr->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); + break; + case MLXSW_SP_L3_PROTO_IPV6: + WARN_ON_ONCE(1); + break; + } +} + static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_sp_fib_entry *fib_entry; - struct mlxsw_sp_fib_entry *tmp; + struct mlxsw_sp_fib_node *fib_node, *tmp; struct mlxsw_sp_vr *vr; int i; @@ -1893,14 +2402,11 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!vr->used) continue; - list_for_each_entry_safe(fib_entry, tmp, - &vr->fib->entry_list, list) { - bool do_break = &tmp->list == &vr->fib->entry_list; + list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list, + list) { + bool do_break = &tmp->list == &vr->fib->node_list; - mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); - mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, - fib_entry); - mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry); + mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node); if (do_break) break; } @@ -1921,6 +2427,28 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); } +static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (WARN_ON_ONCE(err)) + return err; + + mlxsw_reg_ritr_enable_set(ritr_pl, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r) +{ + mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif); + mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r); + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r); +} + static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; @@ -1964,8 +2492,11 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) } struct mlxsw_sp_fib_event_work { - struct delayed_work dw; - struct fib_entry_notifier_info fen_info; + struct work_struct work; + union { + struct fib_entry_notifier_info fen_info; + struct fib_nh_notifier_info fnh_info; + }; struct mlxsw_sp *mlxsw_sp; unsigned long event; }; @@ -1973,15 +2504,21 @@ struct mlxsw_sp_fib_event_work { static void mlxsw_sp_router_fib_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, dw.work); + container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + bool replace, append; int err; /* Protect internal structures from changes */ rtnl_lock(); switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: - err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info); + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + append = fib_work->event == FIB_EVENT_ENTRY_APPEND; + err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, + replace, append); if (err) mlxsw_sp_router_fib4_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); @@ -1994,6 +2531,12 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) case FIB_EVENT_RULE_DEL: mlxsw_sp_router_fib4_abort(mlxsw_sp); break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, + fib_work->fnh_info.fib_nh); + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + break; } rtnl_unlock(); kfree(fib_work); @@ -2014,11 +2557,13 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, if (WARN_ON(!fib_work)) return NOTIFY_BAD; - INIT_DELAYED_WORK(&fib_work->dw, mlxsw_sp_router_fib_event_work); + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work); fib_work->mlxsw_sp = mlxsw_sp; fib_work->event = event; switch (event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); @@ -2027,9 +2572,14 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, */ fib_info_hold(fib_work->fen_info.fi); break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; } - mlxsw_core_schedule_odw(&fib_work->dw, 0); + mlxsw_core_schedule_work(&fib_work->work); return NOTIFY_DONE; } @@ -2051,11 +2601,20 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) int err; INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) return err; + err = rhashtable_init(&mlxsw_sp->router.nexthop_ht, + &mlxsw_sp_nexthop_ht_params); + if (err) + goto err_nexthop_ht_init; + + err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht, + &mlxsw_sp_nexthop_group_ht_params); + if (err) + goto err_nexthop_group_ht_init; + mlxsw_sp_lpm_init(mlxsw_sp); err = mlxsw_sp_vrs_init(mlxsw_sp); if (err) @@ -2078,6 +2637,10 @@ err_register_fib_notifier: err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: + rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); +err_nexthop_group_ht_init: + rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); +err_nexthop_ht_init: __mlxsw_sp_router_fini(mlxsw_sp); return err; } @@ -2087,5 +2650,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_fib_notifier(&mlxsw_sp->fib_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); + rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); + rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); __mlxsw_sp_router_fini(mlxsw_sp); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index b87ba7d36bc4..598727d578c1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -71,8 +71,21 @@ mlxsw_sp_port_orig_get(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_fid *fid; u16 vid; + if (netif_is_bridge_master(dev)) { + fid = mlxsw_sp_vfid_find(mlxsw_sp_port->mlxsw_sp, + dev); + if (fid) { + mlxsw_sp_vport = + mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port, + fid->fid); + WARN_ON(!mlxsw_sp_vport); + return mlxsw_sp_vport; + } + } + if (!is_vlan_dev(dev)) return mlxsw_sp_port; @@ -166,9 +179,10 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state); } -static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 idx_begin, u16 idx_end, bool uc_set, - bool bm_set) +static int __mlxsw_sp_port_flood_table_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 idx_begin, u16 idx_end, + enum mlxsw_sp_flood_table table, + bool set) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u16 local_port = mlxsw_sp_port->local_port; @@ -186,31 +200,48 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, if (!sftr_pl) return -ENOMEM; - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, - table_type, range, local_port, uc_set); + mlxsw_reg_sftr_pack(sftr_pl, table, idx_begin, + table_type, range, local_port, set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + + kfree(sftr_pl); + return err; +} + +static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 idx_begin, u16 idx_end, bool uc_set, + bool bc_set, bool mc_set) +{ + int err; + + err = __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, + MLXSW_SP_FLOOD_TABLE_UC, uc_set); if (err) - goto buffer_out; + return err; - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin, - table_type, range, local_port, bm_set); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + err = __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, + MLXSW_SP_FLOOD_TABLE_BC, bc_set); if (err) goto err_flood_bm_set; - goto buffer_out; + err = __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, + MLXSW_SP_FLOOD_TABLE_MC, mc_set); + if (err) + goto err_flood_mc_set; + return 0; +err_flood_mc_set: + __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, + MLXSW_SP_FLOOD_TABLE_BC, !bc_set); err_flood_bm_set: - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, - table_type, range, local_port, !uc_set); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); -buffer_out: - kfree(sftr_pl); + __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, idx_begin, idx_end, + MLXSW_SP_FLOOD_TABLE_UC, !uc_set); return err; } -static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool set) +static int mlxsw_sp_port_flood_table_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_sp_flood_table table, + bool set) { struct net_device *dev = mlxsw_sp_port->dev; u16 vid, last_visited_vid; @@ -220,13 +251,13 @@ static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid = mlxsw_sp_vport_fid_get(mlxsw_sp_port)->fid; u16 vfid = mlxsw_sp_fid_to_vfid(fid); - return __mlxsw_sp_port_flood_set(mlxsw_sp_port, vfid, vfid, - set, true); + return __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, vfid, + vfid, table, set); } for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { - err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid, vid, set, - true); + err = __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, vid, vid, + table, set); if (err) { last_visited_vid = vid; goto err_port_flood_set; @@ -237,21 +268,53 @@ static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, err_port_flood_set: for_each_set_bit(vid, mlxsw_sp_port->active_vlans, last_visited_vid) - __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid, vid, !set, true); + __mlxsw_sp_port_flood_table_set(mlxsw_sp_port, vid, vid, table, + !set); netdev_err(dev, "Failed to configure unicast flooding\n"); return err; } +static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + bool mc_disabled) +{ + int set; + int err = 0; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + if (mlxsw_sp_port->mc_router != mlxsw_sp_port->mc_flood) { + set = mc_disabled ? + mlxsw_sp_port->mc_flood : mlxsw_sp_port->mc_router; + err = mlxsw_sp_port_flood_table_set(mlxsw_sp_port, + MLXSW_SP_FLOOD_TABLE_MC, + set); + } + + if (!err) + mlxsw_sp_port->mc_disabled = mc_disabled; + + return err; +} + int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, bool set) { + bool mc_set = set; u16 vfid; /* In case of vFIDs, index into the flooding table is relative to * the start of the vFIDs range. */ vfid = mlxsw_sp_fid_to_vfid(fid); - return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set); + + if (set) + mc_set = mlxsw_sp_vport->mc_disabled ? + mlxsw_sp_vport->mc_flood : mlxsw_sp_vport->mc_router; + + return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set, + mc_set); } static int mlxsw_sp_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -297,8 +360,9 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; if ((uc_flood ^ brport_flags) & BR_FLOOD) { - err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, - !mlxsw_sp_port->uc_flood); + err = mlxsw_sp_port_flood_table_set(mlxsw_sp_port, + MLXSW_SP_FLOOD_TABLE_UC, + !mlxsw_sp_port->uc_flood); if (err) return err; } @@ -318,8 +382,9 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, err_port_learning_set: if ((uc_flood ^ brport_flags) & BR_FLOOD) - mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, - mlxsw_sp_port->uc_flood); + mlxsw_sp_port_flood_table_set(mlxsw_sp_port, + MLXSW_SP_FLOOD_TABLE_UC, + mlxsw_sp_port->uc_flood); return err; } @@ -371,6 +436,22 @@ static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + bool is_port_mc_router) +{ + if (switchdev_trans_ph_prepare(trans)) + return 0; + + mlxsw_sp_port->mc_router = is_port_mc_router; + if (!mlxsw_sp_port->mc_disabled) + return mlxsw_sp_port_flood_table_set(mlxsw_sp_port, + MLXSW_SP_FLOOD_TABLE_MC, + is_port_mc_router); + + return 0; +} + static int mlxsw_sp_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -400,6 +481,14 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, attr->orig_dev, attr->u.vlan_filtering); break; + case SWITCHDEV_ATTR_ID_PORT_MROUTER: + err = mlxsw_sp_port_attr_mc_router_set(mlxsw_sp_port, trans, + attr->u.mrouter); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: + err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans, + attr->u.mc_disabled); + break; default: err = -EOPNOTSUPP; break; @@ -545,6 +634,7 @@ static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid, static int mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid_begin, u16 fid_end) { + bool mc_flood; int fid, err; for (fid = fid_begin; fid <= fid_end; fid++) { @@ -553,8 +643,12 @@ static int mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, goto err_port_fid_join; } + mc_flood = mlxsw_sp_port->mc_disabled ? + mlxsw_sp_port->mc_flood : mlxsw_sp_port->mc_router; + err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, - mlxsw_sp_port->uc_flood, true); + mlxsw_sp_port->uc_flood, true, + mc_flood); if (err) goto err_port_flood_set; @@ -570,7 +664,7 @@ err_port_fid_map: for (fid--; fid >= fid_begin; fid--) mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false); __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false, - false); + false, false); err_port_flood_set: fid = fid_end; err_port_fid_join: @@ -588,7 +682,7 @@ static void mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false); __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false, - false); + false, false); for (fid = fid_begin; fid <= fid_end; fid++) __mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid); diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 2e88115e8735..ec1e886d4566 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -382,7 +382,7 @@ static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu) return 0; } -static struct rtnl_link_stats64 * +static void mlxsw_sx_port_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -411,7 +411,6 @@ mlxsw_sx_port_get_stats64(struct net_device *dev, tx_dropped += p->tx_dropped; } stats->tx_dropped = tx_dropped; - return stats; } static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name, @@ -734,7 +733,7 @@ static u32 mlxsw_sx_from_ptys_advert_link(u32 ptys_eth_proto) } static void mlxsw_sx_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; @@ -751,8 +750,8 @@ static void mlxsw_sx_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, } } out: - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + cmd->base.speed = speed; + cmd->base.duplex = duplex; } static u8 mlxsw_sx_port_connector_port(u32 ptys_eth_proto) @@ -777,8 +776,9 @@ static u8 mlxsw_sx_port_connector_port(u32 ptys_eth_proto) return PORT_OTHER; } -static int mlxsw_sx_port_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int +mlxsw_sx_port_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; @@ -786,6 +786,7 @@ static int mlxsw_sx_port_get_settings(struct net_device *dev, u32 eth_proto_cap; u32 eth_proto_admin; u32 eth_proto_oper; + u32 supported, advertising, lp_advertising; int err; mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0); @@ -797,18 +798,24 @@ static int mlxsw_sx_port_get_settings(struct net_device *dev, mlxsw_reg_ptys_eth_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, ð_proto_oper); - cmd->supported = mlxsw_sx_from_ptys_supported_port(eth_proto_cap) | + supported = mlxsw_sx_from_ptys_supported_port(eth_proto_cap) | mlxsw_sx_from_ptys_supported_link(eth_proto_cap) | SUPPORTED_Pause | SUPPORTED_Asym_Pause; - cmd->advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_admin); + advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_admin); mlxsw_sx_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, cmd); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - cmd->port = mlxsw_sx_port_connector_port(eth_proto_oper); - cmd->lp_advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_oper); + cmd->base.port = mlxsw_sx_port_connector_port(eth_proto_oper); + lp_advertising = mlxsw_sx_from_ptys_advert_link(eth_proto_oper); + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + lp_advertising); - cmd->transceiver = XCVR_INTERNAL; return 0; } @@ -848,8 +855,9 @@ static u32 mlxsw_sx_to_ptys_upper_speed(u32 upper_speed) return ptys_proto; } -static int mlxsw_sx_port_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int +mlxsw_sx_port_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; @@ -858,13 +866,17 @@ static int mlxsw_sx_port_set_settings(struct net_device *dev, u32 eth_proto_new; u32 eth_proto_cap; u32 eth_proto_admin; + u32 advertising; bool is_up; int err; - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); - eth_proto_new = cmd->autoneg == AUTONEG_ENABLE ? - mlxsw_sx_to_ptys_advert_link(cmd->advertising) : + eth_proto_new = cmd->base.autoneg == AUTONEG_ENABLE ? + mlxsw_sx_to_ptys_advert_link(advertising) : mlxsw_sx_to_ptys_speed(speed); mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0); @@ -921,8 +933,8 @@ static const struct ethtool_ops mlxsw_sx_port_ethtool_ops = { .get_strings = mlxsw_sx_port_get_strings, .get_ethtool_stats = mlxsw_sx_port_get_stats, .get_sset_count = mlxsw_sx_port_get_sset_count, - .get_settings = mlxsw_sx_port_get_settings, - .set_settings = mlxsw_sx_port_set_settings, + .get_link_ksettings = mlxsw_sx_port_get_link_ksettings, + .set_link_ksettings = mlxsw_sx_port_set_link_ksettings, }; static int mlxsw_sx_port_attr_get(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 7ab275deacac..02ea48b15eb5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -54,6 +54,7 @@ enum { MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33, MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, + MLXSW_TRAP_ID_PKT_SAMPLE = 0x38, MLXSW_TRAP_ID_ARPBC = 0x50, MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, |