// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2019 Mellanox Technologies. */ #include #include #include #include #include "mlx5_core.h" #ifdef CONFIG_RFS_ACCEL #include #endif #define MLX5_MAX_IRQ_NAME (32) struct mlx5_irq { struct atomic_notifier_head nh; cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; struct mlx5_irq_table { struct mlx5_irq *irq; int nvec; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif }; int mlx5_irq_table_init(struct mlx5_core_dev *dev) { struct mlx5_irq_table *irq_table; if (mlx5_core_is_sf(dev)) return 0; irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); if (!irq_table) return -ENOMEM; dev->priv.irq_table = irq_table; return 0; } void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) { if (mlx5_core_is_sf(dev)) return; kvfree(dev->priv.irq_table); } int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) { return table->nvec - MLX5_IRQ_VEC_COMP_BASE; } static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) { struct mlx5_irq_table *irq_table = dev->priv.irq_table; return &irq_table->irq[vecidx]; } /** * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors * to be ssigned to each VF. * @dev: PF to work on * @num_vfs: Number of enabled VFs */ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) { int num_vf_msix, min_msix, max_msix; num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); if (!num_vf_msix) return 0; min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); /* Limit maximum number of MSI-X vectors so the default configuration * has some available in the pool. This will allow the user to increase * the number of vectors in a VF without having to first size-down other * VFs. */ return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); } /** * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF * @dev: PF to work on * @function_id: Internal PCI VF function IDd * @msix_vec_count: Number of MSI-X vectors to set */ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, int msix_vec_count) { int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *hca_cap = NULL, *query_cap = NULL, *cap; int num_vf_msix, min_msix, max_msix; int ret; num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); if (!num_vf_msix) return 0; if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) return -EOPNOTSUPP; min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); if (msix_vec_count < min_msix) return -EINVAL; if (msix_vec_count > max_msix) return -EOVERFLOW; query_cap = kzalloc(query_sz, GFP_KERNEL); hca_cap = kzalloc(set_sz, GFP_KERNEL); if (!hca_cap || !query_cap) { ret = -ENOMEM; goto out; } ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap); if (ret) goto out; cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), MLX5_UN_SZ_BYTES(hca_cap_union)); MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); MLX5_SET(set_hca_cap_in, hca_cap, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); out: kfree(hca_cap); kfree(query_cap); return ret; } int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { struct mlx5_irq *irq; irq = &irq_table->irq[vecidx]; return atomic_notifier_chain_register(&irq->nh, nb); } int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { struct mlx5_irq *irq; irq = &irq_table->irq[vecidx]; return atomic_notifier_chain_unregister(&irq->nh, nb); } static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) { atomic_notifier_call_chain(nh, 0, NULL); return IRQ_HANDLED; } static void irq_set_name(char *name, int vecidx) { if (vecidx == 0) { snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async"); return; } snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx - MLX5_IRQ_VEC_COMP_BASE); } static int request_irqs(struct mlx5_core_dev *dev, int nvec) { char name[MLX5_MAX_IRQ_NAME]; int err; int i; for (i = 0; i < nvec; i++) { struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); irq_set_name(name, i); ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); snprintf(irq->name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name, &irq->nh); if (err) { mlx5_core_err(dev, "Failed to request irq\n"); goto err_request_irq; } } return 0; err_request_irq: while (i--) { struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); free_irq(irqn, &irq->nh); } return err; } static void irq_clear_rmap(struct mlx5_core_dev *dev) { #ifdef CONFIG_RFS_ACCEL struct mlx5_irq_table *irq_table = dev->priv.irq_table; free_irq_cpu_rmap(irq_table->rmap); #endif } static int irq_set_rmap(struct mlx5_core_dev *mdev) { int err = 0; #ifdef CONFIG_RFS_ACCEL struct mlx5_irq_table *irq_table = mdev->priv.irq_table; int num_affinity_vec; int vecidx; num_affinity_vec = mlx5_irq_get_num_comp(irq_table); irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); if (!irq_table->rmap) { err = -ENOMEM; mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); goto err_out; } vecidx = MLX5_IRQ_VEC_COMP_BASE; for (; vecidx < irq_table->nvec; vecidx++) { err = irq_cpu_rmap_add(irq_table->rmap, pci_irq_vector(mdev->pdev, vecidx)); if (err) { mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", err); goto err_irq_cpu_rmap_add; } } return 0; err_irq_cpu_rmap_add: irq_clear_rmap(mdev); err_out: #endif return err; } /* Completion IRQ vectors */ static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; struct mlx5_irq *irq; int irqn; irq = mlx5_irq_get(mdev, vecidx); irqn = pci_irq_vector(mdev->pdev, vecidx); if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); return -ENOMEM; } cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), irq->mask); if (IS_ENABLED(CONFIG_SMP) && irq_set_affinity_hint(irqn, irq->mask)) mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irqn); return 0; } static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) { int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; struct mlx5_irq *irq; int irqn; irq = mlx5_irq_get(mdev, vecidx); irqn = pci_irq_vector(mdev->pdev, vecidx); irq_set_affinity_hint(irqn, NULL); free_cpumask_var(irq->mask); } static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) { int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); int err; int i; for (i = 0; i < nvec; i++) { err = set_comp_irq_affinity_hint(mdev, i); if (err) goto err_out; } return 0; err_out: for (i--; i >= 0; i--) clear_comp_irq_affinity_hint(mdev, i); return err; } static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) { int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); int i; for (i = 0; i < nvec; i++) clear_comp_irq_affinity_hint(mdev, i); } struct cpumask * mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) { return irq_table->irq[vecidx].mask; } #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) { return irq_table->rmap; } #endif static void unrequest_irqs(struct mlx5_core_dev *dev) { struct mlx5_irq_table *table = dev->priv.irq_table; int i; for (i = 0; i < table->nvec; i++) free_irq(pci_irq_vector(dev->pdev, i), &mlx5_irq_get(dev, i)->nh); } int mlx5_irq_table_create(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_irq_table *table = priv->irq_table; int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; int err; if (mlx5_core_is_sf(dev)) return 0; nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + MLX5_IRQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); if (nvec <= MLX5_IRQ_VEC_COMP_BASE) return -ENOMEM; table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); if (!table->irq) return -ENOMEM; nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, nvec, PCI_IRQ_MSIX); if (nvec < 0) { err = nvec; goto err_free_irq; } table->nvec = nvec; err = irq_set_rmap(dev); if (err) goto err_set_rmap; err = request_irqs(dev, nvec); if (err) goto err_request_irqs; err = set_comp_irq_affinity_hints(dev); if (err) { mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); goto err_set_affinity; } return 0; err_set_affinity: unrequest_irqs(dev); err_request_irqs: irq_clear_rmap(dev); err_set_rmap: pci_free_irq_vectors(dev->pdev); err_free_irq: kfree(table->irq); return err; } void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) { struct mlx5_irq_table *table = dev->priv.irq_table; int i; if (mlx5_core_is_sf(dev)) return; /* free_irq requires that affinity and rmap will be cleared * before calling it. This is why there is asymmetry with set_rmap * which should be called after alloc_irq but before request_irq. */ irq_clear_rmap(dev); clear_comp_irqs_affinity_hints(dev); for (i = 0; i < table->nvec; i++) free_irq(pci_irq_vector(dev->pdev, i), &mlx5_irq_get(dev, i)->nh); pci_free_irq_vectors(dev->pdev); kfree(table->irq); } struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) { #ifdef CONFIG_MLX5_SF if (mlx5_core_is_sf(dev)) return dev->priv.parent_mdev->priv.irq_table; #endif return dev->priv.irq_table; }