// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies. */ #include #include #include #include #include #include #include #include #include #include "neigh.h" #include "tc.h" #include "en_rep.h" #include "fs_core.h" #include "diag/en_rep_tracepoint.h" static unsigned long mlx5e_rep_ipv6_interval(void) { if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl) return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME); return ~0UL; } static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) { unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); unsigned long ipv6_interval = mlx5e_rep_ipv6_interval(); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); } void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; mlx5_fc_queue_stats_work(priv->mdev, &neigh_update->neigh_stats_work, neigh_update->min_interval); } static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) { return refcount_inc_not_zero(&nhe->refcnt); } static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe); void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) { if (refcount_dec_and_test(&nhe->refcnt)) { mlx5e_rep_neigh_entry_remove(nhe); kfree_rcu(nhe, rcu); } } static struct mlx5e_neigh_hash_entry * mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv, struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh_hash_entry *next = NULL; rcu_read_lock(); for (next = nhe ? list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, &nhe->neigh_list, struct mlx5e_neigh_hash_entry, neigh_list) : list_first_or_null_rcu(&rpriv->neigh_update.neigh_list, struct mlx5e_neigh_hash_entry, neigh_list); next; next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list, &next->neigh_list, struct mlx5e_neigh_hash_entry, neigh_list)) if (mlx5e_rep_neigh_entry_hold(next)) break; rcu_read_unlock(); if (nhe) mlx5e_rep_neigh_entry_release(nhe); return next; } static void mlx5e_rep_neigh_stats_work(struct work_struct *work) { struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, neigh_update.neigh_stats_work.work); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_neigh_hash_entry *nhe = NULL; rtnl_lock(); if (!list_empty(&rpriv->neigh_update.neigh_list)) mlx5e_rep_queue_neigh_stats_work(priv); while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL) mlx5e_tc_update_neigh_used_value(nhe); rtnl_unlock(); } struct neigh_update_work { struct work_struct work; struct neighbour *n; struct mlx5e_neigh_hash_entry *nhe; }; static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) { neigh_release(update_work->n); mlx5e_rep_neigh_entry_release(update_work->nhe); kfree(update_work); } static void mlx5e_rep_neigh_update(struct work_struct *work) { struct neigh_update_work *update_work = container_of(work, struct neigh_update_work, work); struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; struct neighbour *n = update_work->n; bool neigh_connected, same_dev; struct mlx5e_encap_entry *e; unsigned char ha[ETH_ALEN]; struct mlx5e_priv *priv; u8 nud_state, dead; rtnl_lock(); /* If these parameters are changed after we release the lock, * we'll receive another event letting us know about it. * We use this lock to avoid inconsistency between the neigh validity * and it's hw address. */ read_lock_bh(&n->lock); memcpy(ha, n->ha, ETH_ALEN); nud_state = n->nud_state; dead = n->dead; same_dev = READ_ONCE(nhe->neigh_dev) == n->dev; read_unlock_bh(&n->lock); neigh_connected = (nud_state & NUD_VALID) && !dead; trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); if (!same_dev) goto out; list_for_each_entry(e, &nhe->encap_list, encap_list) { if (!mlx5e_encap_take(e)) continue; priv = netdev_priv(e->out_dev); mlx5e_rep_update_flows(priv, e, neigh_connected, ha); mlx5e_encap_put(priv, e); } out: rtnl_unlock(); mlx5e_release_neigh_update_work(update_work); } static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv, struct neighbour *n) { struct neigh_update_work *update_work; struct mlx5e_neigh_hash_entry *nhe; struct mlx5e_neigh m_neigh = {}; update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); if (WARN_ON(!update_work)) return NULL; m_neigh.family = n->ops->family; memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); /* Obtain reference to nhe as last step in order not to release it in * atomic context. */ rcu_read_lock(); nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); rcu_read_unlock(); if (!nhe) { kfree(update_work); return NULL; } INIT_WORK(&update_work->work, mlx5e_rep_neigh_update); neigh_hold(n); update_work->n = n; update_work->nhe = nhe; return update_work; } static int mlx5e_rep_netevent_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, neigh_update.netevent_nb); struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_neigh_hash_entry *nhe = NULL; struct neigh_update_work *update_work; struct neigh_parms *p; struct neighbour *n; bool found = false; switch (event) { case NETEVENT_NEIGH_UPDATE: n = ptr; #if IS_ENABLED(CONFIG_IPV6) if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) #else if (n->tbl != &arp_tbl) #endif return NOTIFY_DONE; update_work = mlx5e_alloc_neigh_update_work(priv, n); if (!update_work) return NOTIFY_DONE; queue_work(priv->wq, &update_work->work); break; case NETEVENT_DELAY_PROBE_TIME_UPDATE: p = ptr; /* We check the device is present since we don't care about * changes in the default table, we only care about changes * done per device delay prob time parameter. */ #if IS_ENABLED(CONFIG_IPV6) if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) #else if (!p->dev || p->tbl != &arp_tbl) #endif return NOTIFY_DONE; rcu_read_lock(); list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, neigh_list) { if (p->dev == READ_ONCE(nhe->neigh_dev)) { found = true; break; } } rcu_read_unlock(); if (!found) return NOTIFY_DONE; neigh_update->min_interval = min_t(unsigned long, NEIGH_VAR(p, DELAY_PROBE_TIME), neigh_update->min_interval); mlx5_fc_update_sampling_interval(priv->mdev, neigh_update->min_interval); break; } return NOTIFY_DONE; } static const struct rhashtable_params mlx5e_neigh_ht_params = { .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), .key_len = sizeof(struct mlx5e_neigh), .automatic_shrinking = true, }; int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; int err; err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); if (err) goto out_err; INIT_LIST_HEAD(&neigh_update->neigh_list); mutex_init(&neigh_update->encap_lock); INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, mlx5e_rep_neigh_stats_work); mlx5e_rep_neigh_update_init_interval(rpriv); neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event; err = register_netevent_notifier(&neigh_update->netevent_nb); if (err) goto out_notifier; return 0; out_notifier: neigh_update->netevent_nb.notifier_call = NULL; rhashtable_destroy(&neigh_update->neigh_ht); out_err: netdev_warn(rpriv->netdev, "Failed to initialize neighbours handling for vport %d\n", rpriv->rep->vport); return err; } void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) { struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); if (!rpriv->neigh_update.netevent_nb.notifier_call) return; unregister_netevent_notifier(&neigh_update->netevent_nb); flush_workqueue(priv->wq); /* flush neigh update works */ cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); mutex_destroy(&neigh_update->encap_lock); rhashtable_destroy(&neigh_update->neigh_ht); } static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_rep_priv *rpriv = priv->ppriv; int err; err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, &nhe->rhash_node, mlx5e_neigh_ht_params); if (err) return err; list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); return err; } static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv; mutex_lock(&rpriv->neigh_update.encap_lock); list_del_rcu(&nhe->neigh_list); rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, &nhe->rhash_node, mlx5e_neigh_ht_params); mutex_unlock(&rpriv->neigh_update.encap_lock); } /* This function must only be called under the representor's encap_lock or * inside rcu read lock section. */ struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; struct mlx5e_neigh_hash_entry *nhe; nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, mlx5e_neigh_ht_params); return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL; } int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh, struct net_device *neigh_dev, struct mlx5e_neigh_hash_entry **nhe) { int err; *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); if (!*nhe) return -ENOMEM; (*nhe)->priv = priv; memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh)); spin_lock_init(&(*nhe)->encap_list_lock); INIT_LIST_HEAD(&(*nhe)->encap_list); refcount_set(&(*nhe)->refcnt, 1); WRITE_ONCE((*nhe)->neigh_dev, neigh_dev); err = mlx5e_rep_neigh_entry_insert(priv, *nhe); if (err) goto out_free; return 0; out_free: kfree(*nhe); return err; }