diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-28 16:02:33 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-28 16:02:33 -0800 |
commit | bd2463ac7d7ec51d432f23bf0e893fb371a908cd (patch) | |
tree | 3da32c23be83adb9d9bda7e51b51fa39f69f2447 /drivers/net/ethernet/sfc | |
parent | a78208e2436963d0b2c7d186277d6e1a9755029a (diff) | |
parent | f76e4c167ea2212e23c15ee7e601a865e822c291 (diff) | |
download | linux-bd2463ac7d7ec51d432f23bf0e893fb371a908cd.tar.gz linux-bd2463ac7d7ec51d432f23bf0e893fb371a908cd.tar.bz2 linux-bd2463ac7d7ec51d432f23bf0e893fb371a908cd.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller:
1) Add WireGuard
2) Add HE and TWT support to ath11k driver, from John Crispin.
3) Add ESP in TCP encapsulation support, from Sabrina Dubroca.
4) Add variable window congestion control to TIPC, from Jon Maloy.
5) Add BCM84881 PHY driver, from Russell King.
6) Start adding netlink support for ethtool operations, from Michal
Kubecek.
7) Add XDP drop and TX action support to ena driver, from Sameeh
Jubran.
8) Add new ipv4 route notifications so that mlxsw driver does not have
to handle identical routes itself. From Ido Schimmel.
9) Add BPF dynamic program extensions, from Alexei Starovoitov.
10) Support RX and TX timestamping in igc, from Vinicius Costa Gomes.
11) Add support for macsec HW offloading, from Antoine Tenart.
12) Add initial support for MPTCP protocol, from Christoph Paasch,
Matthieu Baerts, Florian Westphal, Peter Krystad, and many others.
13) Add Octeontx2 PF support, from Sunil Goutham, Geetha sowjanya, Linu
Cherian, and others.
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1469 commits)
net: phy: add default ARCH_BCM_IPROC for MDIO_BCM_IPROC
udp: segment looped gso packets correctly
netem: change mailing list
qed: FW 8.42.2.0 debug features
qed: rt init valid initialization changed
qed: Debug feature: ilt and mdump
qed: FW 8.42.2.0 Add fw overlay feature
qed: FW 8.42.2.0 HSI changes
qed: FW 8.42.2.0 iscsi/fcoe changes
qed: Add abstraction for different hsi values per chip
qed: FW 8.42.2.0 Additional ll2 type
qed: Use dmae to write to widebus registers in fw_funcs
qed: FW 8.42.2.0 Parser offsets modified
qed: FW 8.42.2.0 Queue Manager changes
qed: FW 8.42.2.0 Expose new registers and change windows
qed: FW 8.42.2.0 Internal ram offsets modifications
MAINTAINERS: Add entry for Marvell OcteonTX2 Physical Function driver
Documentation: net: octeontx2: Add RVU HW and drivers overview
octeontx2-pf: ethtool RSS config support
octeontx2-pf: Add basic ethtool support
...
Diffstat (limited to 'drivers/net/ethernet/sfc')
35 files changed, 7972 insertions, 7282 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 5f36774bf4b8..ea5a9220196c 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -21,8 +21,6 @@ config SFC depends on PCI select MDIO select CRC32 - select I2C - select I2C_ALGOBIT imply PTP_1588_CLOCK ---help--- This driver supports 10/40-gigabit Ethernet cards based on diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index c5c297e78d06..87d093da22ca 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -sfc-y += efx.o nic.o farch.o siena.o ef10.o tx.o rx.o \ - selftest.o ethtool.o ptp.o tx_tso.o \ - mcdi.o mcdi_port.o mcdi_mon.o +sfc-y += efx.o efx_common.o efx_channels.o nic.o \ + farch.o siena.o ef10.o \ + tx.o tx_common.o tx_tso.o rx.o rx_common.o \ + selftest.o ethtool.o ethtool_common.o ptp.o \ + mcdi.o mcdi_port.o mcdi_port_common.o \ + mcdi_functions.o mcdi_filters.o mcdi_mon.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += sriov.o siena_sriov.o ef10_sriov.o diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index a6ae2cdc1986..52113b7529d6 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5,11 +5,15 @@ */ #include "net_driver.h" +#include "rx_common.h" #include "ef10_regs.h" #include "io.h" #include "mcdi.h" #include "mcdi_pcol.h" +#include "mcdi_port_common.h" +#include "mcdi_functions.h" #include "nic.h" +#include "mcdi_filters.h" #include "workarounds.h" #include "selftest.h" #include "ef10_sriov.h" @@ -25,28 +29,6 @@ enum { EFX_EF10_TEST = 1, EFX_EF10_REFILL, }; -/* The maximum size of a shared RSS context */ -/* TODO: this should really be from the mcdi protocol export */ -#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL - -/* The filter table(s) are managed by firmware and we have write-only - * access. When removing filters we must identify them to the - * firmware by a 64-bit handle, but this is too wide for Linux kernel - * interfaces (32-bit for RX NFC, 16-bit for RFS). Also, we need to - * be able to tell in advance whether a requested insertion will - * replace an existing filter. Therefore we maintain a software hash - * table, which should be at least as large as the hardware hash - * table. - * - * Huntington has a single 8K filter table shared between all filter - * types and both ports. - */ -#define HUNT_FILTER_TBL_ROWS 8192 - -#define EFX_EF10_FILTER_ID_INVALID 0xffff - -#define EFX_EF10_FILTER_DEV_UC_MAX 32 -#define EFX_EF10_FILTER_DEV_MC_MAX 256 /* VLAN list entry */ struct efx_ef10_vlan { @@ -54,95 +36,8 @@ struct efx_ef10_vlan { u16 vid; }; -enum efx_ef10_default_filters { - EFX_EF10_BCAST, - EFX_EF10_UCDEF, - EFX_EF10_MCDEF, - EFX_EF10_VXLAN4_UCDEF, - EFX_EF10_VXLAN4_MCDEF, - EFX_EF10_VXLAN6_UCDEF, - EFX_EF10_VXLAN6_MCDEF, - EFX_EF10_NVGRE4_UCDEF, - EFX_EF10_NVGRE4_MCDEF, - EFX_EF10_NVGRE6_UCDEF, - EFX_EF10_NVGRE6_MCDEF, - EFX_EF10_GENEVE4_UCDEF, - EFX_EF10_GENEVE4_MCDEF, - EFX_EF10_GENEVE6_UCDEF, - EFX_EF10_GENEVE6_MCDEF, - - EFX_EF10_NUM_DEFAULT_FILTERS -}; - -/* Per-VLAN filters information */ -struct efx_ef10_filter_vlan { - struct list_head list; - u16 vid; - u16 uc[EFX_EF10_FILTER_DEV_UC_MAX]; - u16 mc[EFX_EF10_FILTER_DEV_MC_MAX]; - u16 default_filters[EFX_EF10_NUM_DEFAULT_FILTERS]; -}; - -struct efx_ef10_dev_addr { - u8 addr[ETH_ALEN]; -}; - -struct efx_ef10_filter_table { -/* The MCDI match masks supported by this fw & hw, in order of priority */ - u32 rx_match_mcdi_flags[ - MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2]; - unsigned int rx_match_count; - - struct rw_semaphore lock; /* Protects entries */ - struct { - unsigned long spec; /* pointer to spec plus flag bits */ -/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */ -/* unused flag 1UL */ -#define EFX_EF10_FILTER_FLAG_AUTO_OLD 2UL -#define EFX_EF10_FILTER_FLAGS 3UL - u64 handle; /* firmware handle */ - } *entry; -/* Shadow of net_device address lists, guarded by mac_lock */ - struct efx_ef10_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX]; - struct efx_ef10_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX]; - int dev_uc_count; - int dev_mc_count; - bool uc_promisc; - bool mc_promisc; -/* Whether in multicast promiscuous mode when last changed */ - bool mc_promisc_last; - bool mc_overflow; /* Too many MC addrs; should always imply mc_promisc */ - bool vlan_filter; - struct list_head vlan_list; -}; - -/* An arbitrary search limit for the software hash table */ -#define EFX_EF10_FILTER_SEARCH_LIMIT 200 - -static void efx_ef10_rx_free_indir_table(struct efx_nic *efx); -static void efx_ef10_filter_table_remove(struct efx_nic *efx); -static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid); -static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx, - struct efx_ef10_filter_vlan *vlan); -static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid); static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading); -static u32 efx_ef10_filter_get_unsafe_id(u32 filter_id) -{ - WARN_ON_ONCE(filter_id == EFX_EF10_FILTER_ID_INVALID); - return filter_id & (HUNT_FILTER_TBL_ROWS - 1); -} - -static unsigned int efx_ef10_filter_get_unsafe_pri(u32 filter_id) -{ - return filter_id / (HUNT_FILTER_TBL_ROWS * 2); -} - -static u32 efx_ef10_make_filter_id(unsigned int pri, u16 idx) -{ - return pri * HUNT_FILTER_TBL_ROWS * 2 + idx; -} - static int efx_ef10_get_warm_boot_count(struct efx_nic *efx) { efx_dword_t reg; @@ -185,24 +80,6 @@ static bool efx_ef10_is_vf(struct efx_nic *efx) return efx->type->is_vf; } -static int efx_ef10_get_pf_index(struct efx_nic *efx) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); - struct efx_ef10_nic_data *nic_data = efx->nic_data; - size_t outlen; - int rc; - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf, - sizeof(outbuf), &outlen); - if (rc) - return rc; - if (outlen < sizeof(outbuf)) - return -EIO; - - nic_data->pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF); - return 0; -} - #ifdef CONFIG_SFC_SRIOV static int efx_ef10_get_vf_index(struct efx_nic *efx) { @@ -273,24 +150,9 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) u8 vi_window_mode = MCDI_BYTE(outbuf, GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE); - switch (vi_window_mode) { - case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K: - efx->vi_stride = 8192; - break; - case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K: - efx->vi_stride = 16384; - break; - case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K: - efx->vi_stride = 65536; - break; - default: - netif_err(efx, probe, efx->net_dev, - "Unrecognised VI window mode %d\n", - vi_window_mode); - return -EIO; - } - netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n", - efx->vi_stride); + rc = efx_mcdi_window_mode_to_stride(efx, vi_window_mode); + if (rc) + return rc; } else { /* keep default VI stride */ netif_dbg(efx, probe, efx->net_dev, @@ -576,7 +438,7 @@ static int efx_ef10_add_vlan(struct efx_nic *efx, u16 vid) if (efx->filter_state) { mutex_lock(&efx->mac_lock); down_write(&efx->filter_sem); - rc = efx_ef10_filter_add_vlan(efx, vlan->vid); + rc = efx_mcdi_filter_add_vlan(efx, vlan->vid); up_write(&efx->filter_sem); mutex_unlock(&efx->mac_lock); if (rc) @@ -605,7 +467,7 @@ static void efx_ef10_del_vlan_internal(struct efx_nic *efx, if (efx->filter_state) { down_write(&efx->filter_sem); - efx_ef10_filter_del_vlan(efx, vlan->vid); + efx_mcdi_filter_del_vlan(efx, vlan->vid); up_write(&efx->filter_sem); } @@ -689,7 +551,7 @@ static int efx_ef10_probe(struct efx_nic *efx) } nic_data->warm_boot_count = rc; - efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID; + efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; nic_data->vport_id = EVB_PORT_ID_ASSIGNED; @@ -725,7 +587,7 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc) goto fail4; - rc = efx_ef10_get_pf_index(efx); + rc = efx_get_pf_index(efx, &nic_data->pf_index); if (rc) goto fail5; @@ -831,22 +693,6 @@ fail1: return rc; } -static int efx_ef10_free_vis(struct efx_nic *efx) -{ - MCDI_DECLARE_BUF_ERR(outbuf); - size_t outlen; - int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - - /* -EALREADY means nothing to free, so ignore */ - if (rc == -EALREADY) - rc = 0; - if (rc) - efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen, - rc); - return rc; -} - #ifdef EFX_USE_PIO static void efx_ef10_free_piobufs(struct efx_nic *efx) @@ -1084,12 +930,12 @@ static void efx_ef10_remove(struct efx_nic *efx) efx_mcdi_mon_remove(efx); - efx_ef10_rx_free_indir_table(efx); + efx_mcdi_rx_free_indir_table(efx); if (nic_data->wc_membase) iounmap(nic_data->wc_membase); - rc = efx_ef10_free_vis(efx); + rc = efx_mcdi_free_vis(efx); WARN_ON(rc != 0); if (!nic_data->must_restore_piobufs) @@ -1260,28 +1106,10 @@ static int efx_ef10_probe_vf(struct efx_nic *efx __attribute__ ((unused))) static int efx_ef10_alloc_vis(struct efx_nic *efx, unsigned int min_vis, unsigned int max_vis) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN); struct efx_ef10_nic_data *nic_data = efx->nic_data; - size_t outlen; - int rc; - - MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis); - MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis); - rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - if (rc != 0) - return rc; - - if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN) - return -EIO; - netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n", - MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE)); - - nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE); - nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT); - return 0; + return efx_mcdi_alloc_vis(efx, min_vis, max_vis, &nic_data->vi_base, + &nic_data->n_allocated_vis); } /* Note that the failure path of this function does not free @@ -1363,7 +1191,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) } /* In case the last attached driver failed to free VIs, do it now */ - rc = efx_ef10_free_vis(efx); + rc = efx_mcdi_free_vis(efx); if (rc != 0) return rc; @@ -1384,7 +1212,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) efx->max_tx_channels = nic_data->n_allocated_vis / EFX_TXQ_TYPES; - efx_ef10_free_vis(efx); + efx_mcdi_free_vis(efx); return -EAGAIN; } @@ -1490,7 +1318,7 @@ static int efx_ef10_init_nic(struct efx_nic *efx) return 0; } -static void efx_ef10_reset_mc_allocations(struct efx_nic *efx) +static void efx_ef10_table_reset_mc_allocations(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; #ifdef CONFIG_SFC_SRIOV @@ -1503,7 +1331,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx) nic_data->must_restore_filters = true; nic_data->must_restore_piobufs = true; efx_ef10_forget_old_piobufs(efx); - efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID; + efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* Driver-created vswitches and vports must be re-created */ nic_data->must_probe_vswitching = true; @@ -1571,7 +1399,7 @@ static int efx_ef10_reset(struct efx_nic *efx, enum reset_type reset_type) */ if ((reset_type == RESET_TYPE_ALL || reset_type == RESET_TYPE_MCDI_TIMEOUT) && !rc) - efx_ef10_reset_mc_allocations(efx); + efx_ef10_table_reset_mc_allocations(efx); return rc; } @@ -2187,7 +2015,7 @@ static void efx_ef10_mcdi_reboot_detected(struct efx_nic *efx) struct efx_ef10_nic_data *nic_data = efx->nic_data; /* All our allocations have been reset */ - efx_ef10_reset_mc_allocations(efx); + efx_ef10_table_reset_mc_allocations(efx); /* The datapath firmware might have been changed */ nic_data->must_check_datapath_caps = true; @@ -2408,20 +2236,15 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx) static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / - EFX_BUF_SIZE)); bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD; - size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; struct efx_channel *channel = tx_queue->channel; struct efx_nic *efx = tx_queue->efx; - struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_ef10_nic_data *nic_data; bool tso_v2 = false; - size_t inlen; - dma_addr_t dma_addr; efx_qword_t *txd; int rc; - int i; - BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0); + + nic_data = efx->nic_data; /* Only attempt to enable TX timestamping if we have the license for it, * otherwise TXQ init will fail @@ -2448,51 +2271,9 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) channel->channel); } - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id); - - dma_addr = tx_queue->txd.buf.dma_addr; - - netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n", - tx_queue->queue, entries, (u64)dma_addr); - - for (i = 0; i < entries; ++i) { - MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr); - dma_addr += EFX_BUF_SIZE; - } - - inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); - - do { - MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS, - /* This flag was removed from mcdi_pcol.h for - * the non-_EXT version of INIT_TXQ. However, - * firmware still honours it. - */ - INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, - INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, - INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload, - INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, - tx_queue->timestamping); - - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen, - NULL, 0, NULL); - if (rc == -ENOSPC && tso_v2) { - /* Retry without TSOv2 if we're short on contexts. */ - tso_v2 = false; - netif_warn(efx, probe, efx->net_dev, - "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n"); - } else if (rc) { - efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ, - MC_CMD_INIT_TXQ_EXT_IN_LEN, - NULL, 0, rc); - goto fail; - } - } while (rc); + rc = efx_mcdi_tx_init(tx_queue, tso_v2); + if (rc) + goto fail; /* A previous user of this TX queue might have set us up the * bomb by writing a descriptor to the TX push collector but @@ -2530,35 +2311,6 @@ fail: tx_queue->queue); } -static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN); - MCDI_DECLARE_BUF_ERR(outbuf); - struct efx_nic *efx = tx_queue->efx; - size_t outlen; - int rc; - - MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE, - tx_queue->queue); - - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - - if (rc && rc != -EALREADY) - goto fail; - - return; - -fail: - efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN, - outbuf, outlen, rc); -} - -static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue) -{ - efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf); -} - /* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */ static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue) { @@ -2637,527 +2389,6 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) } } -#define RSS_MODE_HASH_ADDRS (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\ - 1 << RSS_MODE_HASH_DST_ADDR_LBN) -#define RSS_MODE_HASH_PORTS (1 << RSS_MODE_HASH_SRC_PORT_LBN |\ - 1 << RSS_MODE_HASH_DST_PORT_LBN) -#define RSS_CONTEXT_FLAGS_DEFAULT (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\ - 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\ - 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\ - 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\ - (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\ - RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\ - RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\ - (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\ - RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\ - RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN) - -static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags) -{ - /* Firmware had a bug (sfc bug 61952) where it would not actually - * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS. - * This meant that it would always contain whatever was previously - * in the MCDI buffer. Fortunately, all firmware versions with - * this bug have the same default flags value for a newly-allocated - * RSS context, and the only time we want to get the flags is just - * after allocating. Moreover, the response has a 32-bit hole - * where the context ID would be in the request, so we can use an - * overlength buffer in the request and pre-fill the flags field - * with what we believe the default to be. Thus if the firmware - * has the bug, it will leave our pre-filled value in the flags - * field of the response, and we will get the right answer. - * - * However, this does mean that this function should NOT be used if - * the RSS context flags might not be their defaults - it is ONLY - * reliably correct for a newly-allocated RSS context. - */ - MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN); - size_t outlen; - int rc; - - /* Check we have a hole for the context ID */ - BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST); - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context); - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS, - RSS_CONTEXT_FLAGS_DEFAULT); - rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf, - sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); - if (rc == 0) { - if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN) - rc = -EIO; - else - *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS); - } - return rc; -} - -/* Attempt to enable 4-tuple UDP hashing on the specified RSS context. - * If we fail, we just leave the RSS context at its default hash settings, - * which is safe but may slightly reduce performance. - * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we - * just need to set the UDP ports flags (for both IP versions). - */ -static void efx_ef10_set_rss_flags(struct efx_nic *efx, - struct efx_rss_context *ctx) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN); - u32 flags; - - BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0); - - if (efx_ef10_get_rss_flags(efx, ctx->context_id, &flags) != 0) - return; - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, - ctx->context_id); - flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN; - flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN; - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags); - if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf), - NULL, 0, NULL)) - /* Succeeded, so UDP 4-tuple is now enabled */ - ctx->rx_hash_udp_4tuple = true; -} - -static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive, - struct efx_rss_context *ctx, - unsigned *context_size) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN); - struct efx_ef10_nic_data *nic_data = efx->nic_data; - size_t outlen; - int rc; - u32 alloc_type = exclusive ? - MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE : - MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED; - unsigned rss_spread = exclusive ? - efx->rss_spread : - min(rounddown_pow_of_two(efx->rss_spread), - EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE); - - if (!exclusive && rss_spread == 1) { - ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID; - if (context_size) - *context_size = 1; - return 0; - } - - if (nic_data->datapath_caps & - 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_RSS_LIMITED_LBN) - return -EOPNOTSUPP; - - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID, - nic_data->vport_id); - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type); - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread); - - rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - if (rc != 0) - return rc; - - if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN) - return -EIO; - - ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID); - - if (context_size) - *context_size = rss_spread; - - if (nic_data->datapath_caps & - 1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN) - efx_ef10_set_rss_flags(efx, ctx); - - return 0; -} - -static int efx_ef10_free_rss_context(struct efx_nic *efx, u32 context) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN); - - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID, - context); - return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf), - NULL, 0, NULL); -} - -static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context, - const u32 *rx_indir_table, const u8 *key) -{ - MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN); - MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN); - int i, rc; - - MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID, - context); - BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) != - MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN); - - /* This iterates over the length of efx->rss_context.rx_indir_table, but - * copies bytes from rx_indir_table. That's because the latter is a - * pointer rather than an array, but should have the same length. - * The efx->rss_context.rx_hash_key loop below is similar. - */ - for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i) - MCDI_PTR(tablebuf, - RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] = - (u8) rx_indir_table[i]; - - rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf, - sizeof(tablebuf), NULL, 0, NULL); - if (rc != 0) - return rc; - - MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID, - context); - BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) != - MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN); - for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i) - MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i]; - - return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf, - sizeof(keybuf), NULL, 0, NULL); -} - -static void efx_ef10_rx_free_indir_table(struct efx_nic *efx) -{ - int rc; - - if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) { - rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id); - WARN_ON(rc != 0); - } - efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID; -} - -static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx, - unsigned *context_size) -{ - struct efx_ef10_nic_data *nic_data = efx->nic_data; - int rc = efx_ef10_alloc_rss_context(efx, false, &efx->rss_context, - context_size); - - if (rc != 0) - return rc; - - nic_data->rx_rss_context_exclusive = false; - efx_set_default_rx_indir_table(efx, &efx->rss_context); - return 0; -} - -static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx, - const u32 *rx_indir_table, - const u8 *key) -{ - u32 old_rx_rss_context = efx->rss_context.context_id; - struct efx_ef10_nic_data *nic_data = efx->nic_data; - int rc; - - if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID || - !nic_data->rx_rss_context_exclusive) { - rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context, - NULL); - if (rc == -EOPNOTSUPP) - return rc; - else if (rc != 0) - goto fail1; - } - - rc = efx_ef10_populate_rss_table(efx, efx->rss_context.context_id, - rx_indir_table, key); - if (rc != 0) - goto fail2; - - if (efx->rss_context.context_id != old_rx_rss_context && - old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID) - WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0); - nic_data->rx_rss_context_exclusive = true; - if (rx_indir_table != efx->rss_context.rx_indir_table) - memcpy(efx->rss_context.rx_indir_table, rx_indir_table, - sizeof(efx->rss_context.rx_indir_table)); - if (key != efx->rss_context.rx_hash_key) - memcpy(efx->rss_context.rx_hash_key, key, - efx->type->rx_hash_key_size); - - return 0; - -fail2: - if (old_rx_rss_context != efx->rss_context.context_id) { - WARN_ON(efx_ef10_free_rss_context(efx, efx->rss_context.context_id) != 0); - efx->rss_context.context_id = old_rx_rss_context; - } -fail1: - netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); - return rc; -} - -static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx, - struct efx_rss_context *ctx, - const u32 *rx_indir_table, - const u8 *key) -{ - int rc; - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) { - rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL); - if (rc) - return rc; - } - - if (!rx_indir_table) /* Delete this context */ - return efx_ef10_free_rss_context(efx, ctx->context_id); - - rc = efx_ef10_populate_rss_table(efx, ctx->context_id, - rx_indir_table, key); - if (rc) - return rc; - - memcpy(ctx->rx_indir_table, rx_indir_table, - sizeof(efx->rss_context.rx_indir_table)); - memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size); - - return 0; -} - -static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx, - struct efx_rss_context *ctx) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN); - MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN); - MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN); - size_t outlen; - int rc, i; - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN != - MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN); - - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) - return -ENOENT; - - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID, - ctx->context_id); - BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) != - MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN); - rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf), - tablebuf, sizeof(tablebuf), &outlen); - if (rc != 0) - return rc; - - if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN)) - return -EIO; - - for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) - ctx->rx_indir_table[i] = MCDI_PTR(tablebuf, - RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i]; - - MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID, - ctx->context_id); - BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) != - MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN); - rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf), - keybuf, sizeof(keybuf), &outlen); - if (rc != 0) - return rc; - - if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN)) - return -EIO; - - for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i) - ctx->rx_hash_key[i] = MCDI_PTR( - keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i]; - - return 0; -} - -static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx) -{ - int rc; - - mutex_lock(&efx->rss_lock); - rc = efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context); - mutex_unlock(&efx->rss_lock); - return rc; -} - -static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx) -{ - struct efx_ef10_nic_data *nic_data = efx->nic_data; - struct efx_rss_context *ctx; - int rc; - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - if (!nic_data->must_restore_rss_contexts) - return; - - list_for_each_entry(ctx, &efx->rss_context.list, list) { - /* previous NIC RSS context is gone */ - ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID; - /* so try to allocate a new one */ - rc = efx_ef10_rx_push_rss_context_config(efx, ctx, - ctx->rx_indir_table, - ctx->rx_hash_key); - if (rc) - netif_warn(efx, probe, efx->net_dev, - "failed to restore RSS context %u, rc=%d" - "; RSS filters may fail to be applied\n", - ctx->user_id, rc); - } - nic_data->must_restore_rss_contexts = false; -} - -static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user, - const u32 *rx_indir_table, - const u8 *key) -{ - int rc; - - if (efx->rss_spread == 1) - return 0; - - if (!key) - key = efx->rss_context.rx_hash_key; - - rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table, key); - - if (rc == -ENOBUFS && !user) { - unsigned context_size; - bool mismatch = false; - size_t i; - - for (i = 0; - i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch; - i++) - mismatch = rx_indir_table[i] != - ethtool_rxfh_indir_default(i, efx->rss_spread); - - rc = efx_ef10_rx_push_shared_rss_config(efx, &context_size); - if (rc == 0) { - if (context_size != efx->rss_spread) - netif_warn(efx, probe, efx->net_dev, - "Could not allocate an exclusive RSS" - " context; allocated a shared one of" - " different size." - " Wanted %u, got %u.\n", - efx->rss_spread, context_size); - else if (mismatch) - netif_warn(efx, probe, efx->net_dev, - "Could not allocate an exclusive RSS" - " context; allocated a shared one but" - " could not apply custom" - " indirection.\n"); - else - netif_info(efx, probe, efx->net_dev, - "Could not allocate an exclusive RSS" - " context; allocated a shared one.\n"); - } - } - return rc; -} - -static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user, - const u32 *rx_indir_table - __attribute__ ((unused)), - const u8 *key - __attribute__ ((unused))) -{ - if (user) - return -EOPNOTSUPP; - if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) - return 0; - return efx_ef10_rx_push_shared_rss_config(efx, NULL); -} - -static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue) -{ - return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf, - (rx_queue->ptr_mask + 1) * - sizeof(efx_qword_t), - GFP_KERNEL); -} - -static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue) -{ - MCDI_DECLARE_BUF(inbuf, - MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / - EFX_BUF_SIZE)); - struct efx_channel *channel = efx_rx_queue_channel(rx_queue); - size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE; - struct efx_nic *efx = rx_queue->efx; - struct efx_ef10_nic_data *nic_data = efx->nic_data; - size_t inlen; - dma_addr_t dma_addr; - int rc; - int i; - BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0); - - rx_queue->scatter_n = 0; - rx_queue->scatter_len = 0; - - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue)); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE, - efx_rx_queue_index(rx_queue)); - MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS, - INIT_RXQ_IN_FLAG_PREFIX, 1, - INIT_RXQ_IN_FLAG_TIMESTAMP, 1); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0); - MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id); - - dma_addr = rx_queue->rxd.buf.dma_addr; - - netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n", - efx_rx_queue_index(rx_queue), entries, (u64)dma_addr); - - for (i = 0; i < entries; ++i) { - MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr); - dma_addr += EFX_BUF_SIZE; - } - - inlen = MC_CMD_INIT_RXQ_IN_LEN(entries); - - rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen, - NULL, 0, NULL); - if (rc) - netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n", - efx_rx_queue_index(rx_queue)); -} - -static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN); - MCDI_DECLARE_BUF_ERR(outbuf); - struct efx_nic *efx = rx_queue->efx; - size_t outlen; - int rc; - - MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE, - efx_rx_queue_index(rx_queue)); - - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - - if (rc && rc != -EALREADY) - goto fail; - - return; - -fail: - efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN, - outbuf, outlen, rc); -} - -static void efx_ef10_rx_remove(struct efx_rx_queue *rx_queue) -{ - efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf); -} - /* This creates an entry in the RX descriptor queue */ static inline void efx_ef10_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index) @@ -3229,106 +2460,20 @@ efx_ef10_rx_defer_refill_complete(struct efx_nic *efx, unsigned long cookie, /* nothing to do */ } -static int efx_ef10_ev_probe(struct efx_channel *channel) -{ - return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf, - (channel->eventq_mask + 1) * - sizeof(efx_qword_t), - GFP_KERNEL); -} - -static void efx_ef10_ev_fini(struct efx_channel *channel) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN); - MCDI_DECLARE_BUF_ERR(outbuf); - struct efx_nic *efx = channel->efx; - size_t outlen; - int rc; - - MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel); - - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - - if (rc && rc != -EALREADY) - goto fail; - - return; - -fail: - efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN, - outbuf, outlen, rc); -} - static int efx_ef10_ev_init(struct efx_channel *channel) { - MCDI_DECLARE_BUF(inbuf, - MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / - EFX_BUF_SIZE)); - MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN); - size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE; struct efx_nic *efx = channel->efx; struct efx_ef10_nic_data *nic_data; - size_t inlen, outlen; unsigned int enabled, implemented; - dma_addr_t dma_addr; + bool use_v2, cut_thru; int rc; - int i; nic_data = efx->nic_data; - - /* Fill event queue with all ones (i.e. empty events) */ - memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len); - - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel); - /* INIT_EVQ expects index in vector table, not absolute */ - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE, - MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE, - MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS); - MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0); - - if (nic_data->datapath_caps2 & - 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) { - /* Use the new generic approach to specifying event queue - * configuration, requesting lower latency or higher throughput. - * The options that actually get used appear in the output. - */ - MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS, - INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1, - INIT_EVQ_V2_IN_FLAG_TYPE, - MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO); - } else { - bool cut_thru = !(nic_data->datapath_caps & - 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); - - MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, - INIT_EVQ_IN_FLAG_INTERRUPTING, 1, - INIT_EVQ_IN_FLAG_RX_MERGE, 1, - INIT_EVQ_IN_FLAG_TX_MERGE, 1, - INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru); - } - - dma_addr = channel->eventq.buf.dma_addr; - for (i = 0; i < entries; ++i) { - MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr); - dma_addr += EFX_BUF_SIZE; - } - - inlen = MC_CMD_INIT_EVQ_IN_LEN(entries); - - rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen, - outbuf, sizeof(outbuf), &outlen); - - if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN) - netif_dbg(efx, drv, efx->net_dev, - "Channel %d using event queue flags %08x\n", - channel->channel, - MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS)); + use_v2 = nic_data->datapath_caps2 & + 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN; + cut_thru = !(nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); + rc = efx_mcdi_ev_init(channel, cut_thru, use_v2); /* IRQ return is ignored */ if (channel->channel || rc) @@ -3386,15 +2531,10 @@ static int efx_ef10_ev_init(struct efx_channel *channel) return 0; fail: - efx_ef10_ev_fini(channel); + efx_mcdi_ev_fini(channel); return rc; } -static void efx_ef10_ev_remove(struct efx_channel *channel) -{ - efx_nic_free_buffer(channel->efx, &channel->eventq.buf); -} - static void efx_ef10_handle_rx_wrong_queue(struct efx_rx_queue *rx_queue, unsigned int rx_queue_label) { @@ -3976,9 +3116,9 @@ static int efx_ef10_fini_dmaq(struct efx_nic *efx) if (efx->state != STATE_RECOVERY) { efx_for_each_channel(channel, efx) { efx_for_each_channel_rx_queue(rx_queue, channel) - efx_ef10_rx_fini(rx_queue); + efx_mcdi_rx_fini(rx_queue); efx_for_each_channel_tx_queue(tx_queue, channel) - efx_ef10_tx_fini(tx_queue); + efx_mcdi_tx_fini(tx_queue); } wait_event_timeout(efx->flush_wq, @@ -4000,1538 +3140,6 @@ static void efx_ef10_prepare_flr(struct efx_nic *efx) atomic_set(&efx->active_queues, 0); } -/* Decide whether a filter should be exclusive or else should allow - * delivery to additional recipients. Currently we decide that - * filters for specific local unicast MAC and IP addresses are - * exclusive. - */ -static bool efx_ef10_filter_is_exclusive(const struct efx_filter_spec *spec) -{ - if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC && - !is_multicast_ether_addr(spec->loc_mac)) - return true; - - if ((spec->match_flags & - (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == - (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { - if (spec->ether_type == htons(ETH_P_IP) && - !ipv4_is_multicast(spec->loc_host[0])) - return true; - if (spec->ether_type == htons(ETH_P_IPV6) && - ((const u8 *)spec->loc_host)[0] != 0xff) - return true; - } - - return false; -} - -static struct efx_filter_spec * -efx_ef10_filter_entry_spec(const struct efx_ef10_filter_table *table, - unsigned int filter_idx) -{ - return (struct efx_filter_spec *)(table->entry[filter_idx].spec & - ~EFX_EF10_FILTER_FLAGS); -} - -static unsigned int -efx_ef10_filter_entry_flags(const struct efx_ef10_filter_table *table, - unsigned int filter_idx) -{ - return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS; -} - -static void -efx_ef10_filter_set_entry(struct efx_ef10_filter_table *table, - unsigned int filter_idx, - const struct efx_filter_spec *spec, - unsigned int flags) -{ - table->entry[filter_idx].spec = (unsigned long)spec | flags; -} - -static void -efx_ef10_filter_push_prep_set_match_fields(struct efx_nic *efx, - const struct efx_filter_spec *spec, - efx_dword_t *inbuf) -{ - enum efx_encap_type encap_type = efx_filter_get_encap_type(spec); - u32 match_fields = 0, uc_match, mc_match; - - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, - efx_ef10_filter_is_exclusive(spec) ? - MC_CMD_FILTER_OP_IN_OP_INSERT : - MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE); - - /* Convert match flags and values. Unlike almost - * everything else in MCDI, these fields are in - * network byte order. - */ -#define COPY_VALUE(value, mcdi_field) \ - do { \ - match_fields |= \ - 1 << MC_CMD_FILTER_OP_IN_MATCH_ ## \ - mcdi_field ## _LBN; \ - BUILD_BUG_ON( \ - MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \ - sizeof(value)); \ - memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ## mcdi_field), \ - &value, sizeof(value)); \ - } while (0) -#define COPY_FIELD(gen_flag, gen_field, mcdi_field) \ - if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) { \ - COPY_VALUE(spec->gen_field, mcdi_field); \ - } - /* Handle encap filters first. They will always be mismatch - * (unknown UC or MC) filters - */ - if (encap_type) { - /* ether_type and outer_ip_proto need to be variables - * because COPY_VALUE wants to memcpy them - */ - __be16 ether_type = - htons(encap_type & EFX_ENCAP_FLAG_IPV6 ? - ETH_P_IPV6 : ETH_P_IP); - u8 vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE; - u8 outer_ip_proto; - - switch (encap_type & EFX_ENCAP_TYPES_MASK) { - case EFX_ENCAP_TYPE_VXLAN: - vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN; - /* fallthrough */ - case EFX_ENCAP_TYPE_GENEVE: - COPY_VALUE(ether_type, ETHER_TYPE); - outer_ip_proto = IPPROTO_UDP; - COPY_VALUE(outer_ip_proto, IP_PROTO); - /* We always need to set the type field, even - * though we're not matching on the TNI. - */ - MCDI_POPULATE_DWORD_1(inbuf, - FILTER_OP_EXT_IN_VNI_OR_VSID, - FILTER_OP_EXT_IN_VNI_TYPE, - vni_type); - break; - case EFX_ENCAP_TYPE_NVGRE: - COPY_VALUE(ether_type, ETHER_TYPE); - outer_ip_proto = IPPROTO_GRE; - COPY_VALUE(outer_ip_proto, IP_PROTO); - break; - default: - WARN_ON(1); - } - - uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN; - mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN; - } else { - uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN; - mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN; - } - - if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) - match_fields |= - is_multicast_ether_addr(spec->loc_mac) ? - 1 << mc_match : - 1 << uc_match; - COPY_FIELD(REM_HOST, rem_host, SRC_IP); - COPY_FIELD(LOC_HOST, loc_host, DST_IP); - COPY_FIELD(REM_MAC, rem_mac, SRC_MAC); - COPY_FIELD(REM_PORT, rem_port, SRC_PORT); - COPY_FIELD(LOC_MAC, loc_mac, DST_MAC); - COPY_FIELD(LOC_PORT, loc_port, DST_PORT); - COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE); - COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN); - COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN); - COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO); -#undef COPY_FIELD -#undef COPY_VALUE - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS, - match_fields); -} - -static void efx_ef10_filter_push_prep(struct efx_nic *efx, - const struct efx_filter_spec *spec, - efx_dword_t *inbuf, u64 handle, - struct efx_rss_context *ctx, - bool replacing) -{ - struct efx_ef10_nic_data *nic_data = efx->nic_data; - u32 flags = spec->flags; - - memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN); - - /* If RSS filter, caller better have given us an RSS context */ - if (flags & EFX_FILTER_FLAG_RX_RSS) { - /* We don't have the ability to return an error, so we'll just - * log a warning and disable RSS for the filter. - */ - if (WARN_ON_ONCE(!ctx)) - flags &= ~EFX_FILTER_FLAG_RX_RSS; - else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)) - flags &= ~EFX_FILTER_FLAG_RX_RSS; - } - - if (replacing) { - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, - MC_CMD_FILTER_OP_IN_OP_REPLACE); - MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle); - } else { - efx_ef10_filter_push_prep_set_match_fields(efx, spec, inbuf); - } - - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id); - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST, - spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ? - MC_CMD_FILTER_OP_IN_RX_DEST_DROP : - MC_CMD_FILTER_OP_IN_RX_DEST_HOST); - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0); - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST, - MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT); - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE, - spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ? - 0 : spec->dmaq_id); - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE, - (flags & EFX_FILTER_FLAG_RX_RSS) ? - MC_CMD_FILTER_OP_IN_RX_MODE_RSS : - MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE); - if (flags & EFX_FILTER_FLAG_RX_RSS) - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id); -} - -static int efx_ef10_filter_push(struct efx_nic *efx, - const struct efx_filter_spec *spec, u64 *handle, - struct efx_rss_context *ctx, bool replacing) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN); - size_t outlen; - int rc; - - efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing); - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), &outlen); - if (rc && spec->priority != EFX_FILTER_PRI_HINT) - efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf), - outbuf, outlen, rc); - if (rc == 0) - *handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE); - if (rc == -ENOSPC) - rc = -EBUSY; /* to match efx_farch_filter_insert() */ - return rc; -} - -static u32 efx_ef10_filter_mcdi_flags_from_spec(const struct efx_filter_spec *spec) -{ - enum efx_encap_type encap_type = efx_filter_get_encap_type(spec); - unsigned int match_flags = spec->match_flags; - unsigned int uc_match, mc_match; - u32 mcdi_flags = 0; - -#define MAP_FILTER_TO_MCDI_FLAG(gen_flag, mcdi_field, encap) { \ - unsigned int old_match_flags = match_flags; \ - match_flags &= ~EFX_FILTER_MATCH_ ## gen_flag; \ - if (match_flags != old_match_flags) \ - mcdi_flags |= \ - (1 << ((encap) ? \ - MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_ ## \ - mcdi_field ## _LBN : \ - MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##\ - mcdi_field ## _LBN)); \ - } - /* inner or outer based on encap type */ - MAP_FILTER_TO_MCDI_FLAG(REM_HOST, SRC_IP, encap_type); - MAP_FILTER_TO_MCDI_FLAG(LOC_HOST, DST_IP, encap_type); - MAP_FILTER_TO_MCDI_FLAG(REM_MAC, SRC_MAC, encap_type); - MAP_FILTER_TO_MCDI_FLAG(REM_PORT, SRC_PORT, encap_type); - MAP_FILTER_TO_MCDI_FLAG(LOC_MAC, DST_MAC, encap_type); - MAP_FILTER_TO_MCDI_FLAG(LOC_PORT, DST_PORT, encap_type); - MAP_FILTER_TO_MCDI_FLAG(ETHER_TYPE, ETHER_TYPE, encap_type); - MAP_FILTER_TO_MCDI_FLAG(IP_PROTO, IP_PROTO, encap_type); - /* always outer */ - MAP_FILTER_TO_MCDI_FLAG(INNER_VID, INNER_VLAN, false); - MAP_FILTER_TO_MCDI_FLAG(OUTER_VID, OUTER_VLAN, false); -#undef MAP_FILTER_TO_MCDI_FLAG - - /* special handling for encap type, and mismatch */ - if (encap_type) { - match_flags &= ~EFX_FILTER_MATCH_ENCAP_TYPE; - mcdi_flags |= - (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN); - mcdi_flags |= (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN); - - uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN; - mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN; - } else { - uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN; - mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN; - } - - if (match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) { - match_flags &= ~EFX_FILTER_MATCH_LOC_MAC_IG; - mcdi_flags |= - is_multicast_ether_addr(spec->loc_mac) ? - 1 << mc_match : - 1 << uc_match; - } - - /* Did we map them all? */ - WARN_ON_ONCE(match_flags); - - return mcdi_flags; -} - -static int efx_ef10_filter_pri(struct efx_ef10_filter_table *table, - const struct efx_filter_spec *spec) -{ - u32 mcdi_flags = efx_ef10_filter_mcdi_flags_from_spec(spec); - unsigned int match_pri; - - for (match_pri = 0; - match_pri < table->rx_match_count; - match_pri++) - if (table->rx_match_mcdi_flags[match_pri] == mcdi_flags) - return match_pri; - - return -EPROTONOSUPPORT; -} - -static s32 efx_ef10_filter_insert_locked(struct efx_nic *efx, - struct efx_filter_spec *spec, - bool replace_equal) -{ - DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); - struct efx_ef10_nic_data *nic_data = efx->nic_data; - struct efx_ef10_filter_table *table; - struct efx_filter_spec *saved_spec; - struct efx_rss_context *ctx = NULL; - unsigned int match_pri, hash; - unsigned int priv_flags; - bool rss_locked = false; - bool replacing = false; - unsigned int depth, i; - int ins_index = -1; - DEFINE_WAIT(wait); - bool is_mc_recip; - s32 rc; - - WARN_ON(!rwsem_is_locked(&efx->filter_sem)); - table = efx->filter_state; - down_write(&table->lock); - - /* For now, only support RX filters */ - if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) != - EFX_FILTER_FLAG_RX) { - rc = -EINVAL; - goto out_unlock; - } - - rc = efx_ef10_filter_pri(table, spec); - if (rc < 0) - goto out_unlock; - match_pri = rc; - - hash = efx_filter_spec_hash(spec); - is_mc_recip = efx_filter_is_mc_recipient(spec); - if (is_mc_recip) - bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); - - if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { - mutex_lock(&efx->rss_lock); - rss_locked = true; - if (spec->rss_context) - ctx = efx_find_rss_context_entry(efx, spec->rss_context); - else - ctx = &efx->rss_context; - if (!ctx) { - rc = -ENOENT; - goto out_unlock; - } - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) { - rc = -EOPNOTSUPP; - goto out_unlock; - } - } - - /* Find any existing filters with the same match tuple or - * else a free slot to insert at. - */ - for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) { - i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); - saved_spec = efx_ef10_filter_entry_spec(table, i); - - if (!saved_spec) { - if (ins_index < 0) - ins_index = i; - } else if (efx_filter_spec_equal(spec, saved_spec)) { - if (spec->priority < saved_spec->priority && - spec->priority != EFX_FILTER_PRI_AUTO) { - rc = -EPERM; - goto out_unlock; - } - if (!is_mc_recip) { - /* This is the only one */ - if (spec->priority == - saved_spec->priority && - !replace_equal) { - rc = -EEXIST; - goto out_unlock; - } - ins_index = i; - break; - } else if (spec->priority > - saved_spec->priority || - (spec->priority == - saved_spec->priority && - replace_equal)) { - if (ins_index < 0) - ins_index = i; - else - __set_bit(depth, mc_rem_map); - } - } - } - - /* Once we reach the maximum search depth, use the first suitable - * slot, or return -EBUSY if there was none - */ - if (ins_index < 0) { - rc = -EBUSY; - goto out_unlock; - } - - /* Create a software table entry if necessary. */ - saved_spec = efx_ef10_filter_entry_spec(table, ins_index); - if (saved_spec) { - if (spec->priority == EFX_FILTER_PRI_AUTO && - saved_spec->priority >= EFX_FILTER_PRI_AUTO) { - /* Just make sure it won't be removed */ - if (saved_spec->priority > EFX_FILTER_PRI_AUTO) - saved_spec->flags |= EFX_FILTER_FLAG_RX_OVER_AUTO; - table->entry[ins_index].spec &= - ~EFX_EF10_FILTER_FLAG_AUTO_OLD; - rc = ins_index; - goto out_unlock; - } - replacing = true; - priv_flags = efx_ef10_filter_entry_flags(table, ins_index); - } else { - saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC); - if (!saved_spec) { - rc = -ENOMEM; - goto out_unlock; - } - *saved_spec = *spec; - priv_flags = 0; - } - efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags); - - /* Actually insert the filter on the HW */ - rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle, - ctx, replacing); - - if (rc == -EINVAL && nic_data->must_realloc_vis) - /* The MC rebooted under us, causing it to reject our filter - * insertion as pointing to an invalid VI (spec->dmaq_id). - */ - rc = -EAGAIN; - - /* Finalise the software table entry */ - if (rc == 0) { - if (replacing) { - /* Update the fields that may differ */ - if (saved_spec->priority == EFX_FILTER_PRI_AUTO) - saved_spec->flags |= - EFX_FILTER_FLAG_RX_OVER_AUTO; - saved_spec->priority = spec->priority; - saved_spec->flags &= EFX_FILTER_FLAG_RX_OVER_AUTO; - saved_spec->flags |= spec->flags; - saved_spec->rss_context = spec->rss_context; - saved_spec->dmaq_id = spec->dmaq_id; - } - } else if (!replacing) { - kfree(saved_spec); - saved_spec = NULL; - } else { - /* We failed to replace, so the old filter is still present. - * Roll back the software table to reflect this. In fact the - * efx_ef10_filter_set_entry() call below will do the right - * thing, so nothing extra is needed here. - */ - } - efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags); - - /* Remove and finalise entries for lower-priority multicast - * recipients - */ - if (is_mc_recip) { - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); - unsigned int depth, i; - - memset(inbuf, 0, sizeof(inbuf)); - - for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) { - if (!test_bit(depth, mc_rem_map)) - continue; - - i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); - saved_spec = efx_ef10_filter_entry_spec(table, i); - priv_flags = efx_ef10_filter_entry_flags(table, i); - - if (rc == 0) { - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, - MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); - MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, - table->entry[i].handle); - rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, - inbuf, sizeof(inbuf), - NULL, 0, NULL); - } - - if (rc == 0) { - kfree(saved_spec); - saved_spec = NULL; - priv_flags = 0; - } - efx_ef10_filter_set_entry(table, i, saved_spec, - priv_flags); - } - } - - /* If successful, return the inserted filter ID */ - if (rc == 0) - rc = efx_ef10_make_filter_id(match_pri, ins_index); - -out_unlock: - if (rss_locked) - mutex_unlock(&efx->rss_lock); - up_write(&table->lock); - return rc; -} - -static s32 efx_ef10_filter_insert(struct efx_nic *efx, - struct efx_filter_spec *spec, - bool replace_equal) -{ - s32 ret; - - down_read(&efx->filter_sem); - ret = efx_ef10_filter_insert_locked(efx, spec, replace_equal); - up_read(&efx->filter_sem); - - return ret; -} - -static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx) -{ - /* no need to do anything here on EF10 */ -} - -/* Remove a filter. - * If !by_index, remove by ID - * If by_index, remove by index - * Filter ID may come from userland and must be range-checked. - * Caller must hold efx->filter_sem for read, and efx->filter_state->lock - * for write. - */ -static int efx_ef10_filter_remove_internal(struct efx_nic *efx, - unsigned int priority_mask, - u32 filter_id, bool by_index) -{ - unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id); - struct efx_ef10_filter_table *table = efx->filter_state; - MCDI_DECLARE_BUF(inbuf, - MC_CMD_FILTER_OP_IN_HANDLE_OFST + - MC_CMD_FILTER_OP_IN_HANDLE_LEN); - struct efx_filter_spec *spec; - DEFINE_WAIT(wait); - int rc; - - spec = efx_ef10_filter_entry_spec(table, filter_idx); - if (!spec || - (!by_index && - efx_ef10_filter_pri(table, spec) != - efx_ef10_filter_get_unsafe_pri(filter_id))) - return -ENOENT; - - if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO && - priority_mask == (1U << EFX_FILTER_PRI_AUTO)) { - /* Just remove flags */ - spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO; - table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD; - return 0; - } - - if (!(priority_mask & (1U << spec->priority))) - return -ENOENT; - - if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) { - /* Reset to an automatic filter */ - - struct efx_filter_spec new_spec = *spec; - - new_spec.priority = EFX_FILTER_PRI_AUTO; - new_spec.flags = (EFX_FILTER_FLAG_RX | - (efx_rss_active(&efx->rss_context) ? - EFX_FILTER_FLAG_RX_RSS : 0)); - new_spec.dmaq_id = 0; - new_spec.rss_context = 0; - rc = efx_ef10_filter_push(efx, &new_spec, - &table->entry[filter_idx].handle, - &efx->rss_context, - true); - - if (rc == 0) - *spec = new_spec; - } else { - /* Really remove the filter */ - - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, - efx_ef10_filter_is_exclusive(spec) ? - MC_CMD_FILTER_OP_IN_OP_REMOVE : - MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); - MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, - table->entry[filter_idx].handle); - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, - inbuf, sizeof(inbuf), NULL, 0, NULL); - - if ((rc == 0) || (rc == -ENOENT)) { - /* Filter removed OK or didn't actually exist */ - kfree(spec); - efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); - } else { - efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, - MC_CMD_FILTER_OP_EXT_IN_LEN, - NULL, 0, rc); - } - } - - return rc; -} - -static int efx_ef10_filter_remove_safe(struct efx_nic *efx, - enum efx_filter_priority priority, - u32 filter_id) -{ - struct efx_ef10_filter_table *table; - int rc; - - down_read(&efx->filter_sem); - table = efx->filter_state; - down_write(&table->lock); - rc = efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id, - false); - up_write(&table->lock); - up_read(&efx->filter_sem); - return rc; -} - -/* Caller must hold efx->filter_sem for read */ -static void efx_ef10_filter_remove_unsafe(struct efx_nic *efx, - enum efx_filter_priority priority, - u32 filter_id) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - - if (filter_id == EFX_EF10_FILTER_ID_INVALID) - return; - - down_write(&table->lock); - efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id, - true); - up_write(&table->lock); -} - -static int efx_ef10_filter_get_safe(struct efx_nic *efx, - enum efx_filter_priority priority, - u32 filter_id, struct efx_filter_spec *spec) -{ - unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id); - const struct efx_filter_spec *saved_spec; - struct efx_ef10_filter_table *table; - int rc; - - down_read(&efx->filter_sem); - table = efx->filter_state; - down_read(&table->lock); - saved_spec = efx_ef10_filter_entry_spec(table, filter_idx); - if (saved_spec && saved_spec->priority == priority && - efx_ef10_filter_pri(table, saved_spec) == - efx_ef10_filter_get_unsafe_pri(filter_id)) { - *spec = *saved_spec; - rc = 0; - } else { - rc = -ENOENT; - } - up_read(&table->lock); - up_read(&efx->filter_sem); - return rc; -} - -static int efx_ef10_filter_clear_rx(struct efx_nic *efx, - enum efx_filter_priority priority) -{ - struct efx_ef10_filter_table *table; - unsigned int priority_mask; - unsigned int i; - int rc; - - priority_mask = (((1U << (priority + 1)) - 1) & - ~(1U << EFX_FILTER_PRI_AUTO)); - - down_read(&efx->filter_sem); - table = efx->filter_state; - down_write(&table->lock); - for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) { - rc = efx_ef10_filter_remove_internal(efx, priority_mask, - i, true); - if (rc && rc != -ENOENT) - break; - rc = 0; - } - - up_write(&table->lock); - up_read(&efx->filter_sem); - return rc; -} - -static u32 efx_ef10_filter_count_rx_used(struct efx_nic *efx, - enum efx_filter_priority priority) -{ - struct efx_ef10_filter_table *table; - unsigned int filter_idx; - s32 count = 0; - - down_read(&efx->filter_sem); - table = efx->filter_state; - down_read(&table->lock); - for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { - if (table->entry[filter_idx].spec && - efx_ef10_filter_entry_spec(table, filter_idx)->priority == - priority) - ++count; - } - up_read(&table->lock); - up_read(&efx->filter_sem); - return count; -} - -static u32 efx_ef10_filter_get_rx_id_limit(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - - return table->rx_match_count * HUNT_FILTER_TBL_ROWS * 2; -} - -static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx, - enum efx_filter_priority priority, - u32 *buf, u32 size) -{ - struct efx_ef10_filter_table *table; - struct efx_filter_spec *spec; - unsigned int filter_idx; - s32 count = 0; - - down_read(&efx->filter_sem); - table = efx->filter_state; - down_read(&table->lock); - - for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { - spec = efx_ef10_filter_entry_spec(table, filter_idx); - if (spec && spec->priority == priority) { - if (count == size) { - count = -EMSGSIZE; - break; - } - buf[count++] = - efx_ef10_make_filter_id( - efx_ef10_filter_pri(table, spec), - filter_idx); - } - } - up_read(&table->lock); - up_read(&efx->filter_sem); - return count; -} - -#ifdef CONFIG_RFS_ACCEL - -static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, - unsigned int filter_idx) -{ - struct efx_filter_spec *spec, saved_spec; - struct efx_ef10_filter_table *table; - struct efx_arfs_rule *rule = NULL; - bool ret = true, force = false; - u16 arfs_id; - - down_read(&efx->filter_sem); - table = efx->filter_state; - down_write(&table->lock); - spec = efx_ef10_filter_entry_spec(table, filter_idx); - - if (!spec || spec->priority != EFX_FILTER_PRI_HINT) - goto out_unlock; - - spin_lock_bh(&efx->rps_hash_lock); - if (!efx->rps_hash_table) { - /* In the absence of the table, we always return 0 to ARFS. */ - arfs_id = 0; - } else { - rule = efx_rps_hash_find(efx, spec); - if (!rule) - /* ARFS table doesn't know of this filter, so remove it */ - goto expire; - arfs_id = rule->arfs_id; - ret = efx_rps_check_rule(rule, filter_idx, &force); - if (force) - goto expire; - if (!ret) { - spin_unlock_bh(&efx->rps_hash_lock); - goto out_unlock; - } - } - if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id)) - ret = false; - else if (rule) - rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; -expire: - saved_spec = *spec; /* remove operation will kfree spec */ - spin_unlock_bh(&efx->rps_hash_lock); - /* At this point (since we dropped the lock), another thread might queue - * up a fresh insertion request (but the actual insertion will be held - * up by our possession of the filter table lock). In that case, it - * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that - * the rule is not removed by efx_rps_hash_del() below. - */ - if (ret) - ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority, - filter_idx, true) == 0; - /* While we can't safely dereference rule (we dropped the lock), we can - * still test it for NULL. - */ - if (ret && rule) { - /* Expiring, so remove entry from ARFS table */ - spin_lock_bh(&efx->rps_hash_lock); - efx_rps_hash_del(efx, &saved_spec); - spin_unlock_bh(&efx->rps_hash_lock); - } -out_unlock: - up_write(&table->lock); - up_read(&efx->filter_sem); - return ret; -} - -#endif /* CONFIG_RFS_ACCEL */ - -static int efx_ef10_filter_match_flags_from_mcdi(bool encap, u32 mcdi_flags) -{ - int match_flags = 0; - -#define MAP_FLAG(gen_flag, mcdi_field) do { \ - u32 old_mcdi_flags = mcdi_flags; \ - mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ ## \ - mcdi_field ## _LBN); \ - if (mcdi_flags != old_mcdi_flags) \ - match_flags |= EFX_FILTER_MATCH_ ## gen_flag; \ - } while (0) - - if (encap) { - /* encap filters must specify encap type */ - match_flags |= EFX_FILTER_MATCH_ENCAP_TYPE; - /* and imply ethertype and ip proto */ - mcdi_flags &= - ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN); - mcdi_flags &= - ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN); - /* VLAN tags refer to the outer packet */ - MAP_FLAG(INNER_VID, INNER_VLAN); - MAP_FLAG(OUTER_VID, OUTER_VLAN); - /* everything else refers to the inner packet */ - MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_UCAST_DST); - MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_MCAST_DST); - MAP_FLAG(REM_HOST, IFRM_SRC_IP); - MAP_FLAG(LOC_HOST, IFRM_DST_IP); - MAP_FLAG(REM_MAC, IFRM_SRC_MAC); - MAP_FLAG(REM_PORT, IFRM_SRC_PORT); - MAP_FLAG(LOC_MAC, IFRM_DST_MAC); - MAP_FLAG(LOC_PORT, IFRM_DST_PORT); - MAP_FLAG(ETHER_TYPE, IFRM_ETHER_TYPE); - MAP_FLAG(IP_PROTO, IFRM_IP_PROTO); - } else { - MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST); - MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST); - MAP_FLAG(REM_HOST, SRC_IP); - MAP_FLAG(LOC_HOST, DST_IP); - MAP_FLAG(REM_MAC, SRC_MAC); - MAP_FLAG(REM_PORT, SRC_PORT); - MAP_FLAG(LOC_MAC, DST_MAC); - MAP_FLAG(LOC_PORT, DST_PORT); - MAP_FLAG(ETHER_TYPE, ETHER_TYPE); - MAP_FLAG(INNER_VID, INNER_VLAN); - MAP_FLAG(OUTER_VID, OUTER_VLAN); - MAP_FLAG(IP_PROTO, IP_PROTO); - } -#undef MAP_FLAG - - /* Did we map them all? */ - if (mcdi_flags) - return -EINVAL; - - return match_flags; -} - -static void efx_ef10_filter_cleanup_vlans(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_ef10_filter_vlan *vlan, *next_vlan; - - /* See comment in efx_ef10_filter_table_remove() */ - if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) - return; - - if (!table) - return; - - list_for_each_entry_safe(vlan, next_vlan, &table->vlan_list, list) - efx_ef10_filter_del_vlan_internal(efx, vlan); -} - -static bool efx_ef10_filter_match_supported(struct efx_ef10_filter_table *table, - bool encap, - enum efx_filter_match_flags match_flags) -{ - unsigned int match_pri; - int mf; - - for (match_pri = 0; - match_pri < table->rx_match_count; - match_pri++) { - mf = efx_ef10_filter_match_flags_from_mcdi(encap, - table->rx_match_mcdi_flags[match_pri]); - if (mf == match_flags) - return true; - } - - return false; -} - -static int -efx_ef10_filter_table_probe_matches(struct efx_nic *efx, - struct efx_ef10_filter_table *table, - bool encap) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN); - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX); - unsigned int pd_match_pri, pd_match_count; - size_t outlen; - int rc; - - /* Find out which RX filter types are supported, and their priorities */ - MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP, - encap ? - MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES : - MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO, - inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), - &outlen); - if (rc) - return rc; - - pd_match_count = MCDI_VAR_ARRAY_LEN( - outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES); - - for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) { - u32 mcdi_flags = - MCDI_ARRAY_DWORD( - outbuf, - GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES, - pd_match_pri); - rc = efx_ef10_filter_match_flags_from_mcdi(encap, mcdi_flags); - if (rc < 0) { - netif_dbg(efx, probe, efx->net_dev, - "%s: fw flags %#x pri %u not supported in driver\n", - __func__, mcdi_flags, pd_match_pri); - } else { - netif_dbg(efx, probe, efx->net_dev, - "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n", - __func__, mcdi_flags, pd_match_pri, - rc, table->rx_match_count); - table->rx_match_mcdi_flags[table->rx_match_count] = mcdi_flags; - table->rx_match_count++; - } - } - - return 0; -} - -static int efx_ef10_filter_table_probe(struct efx_nic *efx) -{ - struct efx_ef10_nic_data *nic_data = efx->nic_data; - struct net_device *net_dev = efx->net_dev; - struct efx_ef10_filter_table *table; - struct efx_ef10_vlan *vlan; - int rc; - - if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) - return -EINVAL; - - if (efx->filter_state) /* already probed */ - return 0; - - table = kzalloc(sizeof(*table), GFP_KERNEL); - if (!table) - return -ENOMEM; - - table->rx_match_count = 0; - rc = efx_ef10_filter_table_probe_matches(efx, table, false); - if (rc) - goto fail; - if (nic_data->datapath_caps & - (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)) - rc = efx_ef10_filter_table_probe_matches(efx, table, true); - if (rc) - goto fail; - if ((efx_supported_features(efx) & NETIF_F_HW_VLAN_CTAG_FILTER) && - !(efx_ef10_filter_match_supported(table, false, - (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC)) && - efx_ef10_filter_match_supported(table, false, - (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC_IG)))) { - netif_info(efx, probe, net_dev, - "VLAN filters are not supported in this firmware variant\n"); - net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; - efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; - net_dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; - } - - table->entry = vzalloc(array_size(HUNT_FILTER_TBL_ROWS, - sizeof(*table->entry))); - if (!table->entry) { - rc = -ENOMEM; - goto fail; - } - - table->mc_promisc_last = false; - table->vlan_filter = - !!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER); - INIT_LIST_HEAD(&table->vlan_list); - init_rwsem(&table->lock); - - efx->filter_state = table; - - list_for_each_entry(vlan, &nic_data->vlan_list, list) { - rc = efx_ef10_filter_add_vlan(efx, vlan->vid); - if (rc) - goto fail_add_vlan; - } - - return 0; - -fail_add_vlan: - efx_ef10_filter_cleanup_vlans(efx); - efx->filter_state = NULL; -fail: - kfree(table); - return rc; -} - -/* Caller must hold efx->filter_sem for read if race against - * efx_ef10_filter_table_remove() is possible - */ -static void efx_ef10_filter_table_restore(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_ef10_nic_data *nic_data = efx->nic_data; - unsigned int invalid_filters = 0, failed = 0; - struct efx_ef10_filter_vlan *vlan; - struct efx_filter_spec *spec; - struct efx_rss_context *ctx; - unsigned int filter_idx; - u32 mcdi_flags; - int match_pri; - int rc, i; - - WARN_ON(!rwsem_is_locked(&efx->filter_sem)); - - if (!nic_data->must_restore_filters) - return; - - if (!table) - return; - - down_write(&table->lock); - mutex_lock(&efx->rss_lock); - - for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { - spec = efx_ef10_filter_entry_spec(table, filter_idx); - if (!spec) - continue; - - mcdi_flags = efx_ef10_filter_mcdi_flags_from_spec(spec); - match_pri = 0; - while (match_pri < table->rx_match_count && - table->rx_match_mcdi_flags[match_pri] != mcdi_flags) - ++match_pri; - if (match_pri >= table->rx_match_count) { - invalid_filters++; - goto not_restored; - } - if (spec->rss_context) - ctx = efx_find_rss_context_entry(efx, spec->rss_context); - else - ctx = &efx->rss_context; - if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { - if (!ctx) { - netif_warn(efx, drv, efx->net_dev, - "Warning: unable to restore a filter with nonexistent RSS context %u.\n", - spec->rss_context); - invalid_filters++; - goto not_restored; - } - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) { - netif_warn(efx, drv, efx->net_dev, - "Warning: unable to restore a filter with RSS context %u as it was not created.\n", - spec->rss_context); - invalid_filters++; - goto not_restored; - } - } - - rc = efx_ef10_filter_push(efx, spec, - &table->entry[filter_idx].handle, - ctx, false); - if (rc) - failed++; - - if (rc) { -not_restored: - list_for_each_entry(vlan, &table->vlan_list, list) - for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; ++i) - if (vlan->default_filters[i] == filter_idx) - vlan->default_filters[i] = - EFX_EF10_FILTER_ID_INVALID; - - kfree(spec); - efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); - } - } - - mutex_unlock(&efx->rss_lock); - up_write(&table->lock); - - /* This can happen validly if the MC's capabilities have changed, so - * is not an error. - */ - if (invalid_filters) - netif_dbg(efx, drv, efx->net_dev, - "Did not restore %u filters that are now unsupported.\n", - invalid_filters); - - if (failed) - netif_err(efx, hw, efx->net_dev, - "unable to restore %u filters\n", failed); - else - nic_data->must_restore_filters = false; -} - -static void efx_ef10_filter_table_remove(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); - struct efx_filter_spec *spec; - unsigned int filter_idx; - int rc; - - efx_ef10_filter_cleanup_vlans(efx); - efx->filter_state = NULL; - /* If we were called without locking, then it's not safe to free - * the table as others might be using it. So we just WARN, leak - * the memory, and potentially get an inconsistent filter table - * state. - * This should never actually happen. - */ - if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) - return; - - if (!table) - return; - - for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { - spec = efx_ef10_filter_entry_spec(table, filter_idx); - if (!spec) - continue; - - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, - efx_ef10_filter_is_exclusive(spec) ? - MC_CMD_FILTER_OP_IN_OP_REMOVE : - MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); - MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, - table->entry[filter_idx].handle); - rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, - sizeof(inbuf), NULL, 0, NULL); - if (rc) - netif_info(efx, drv, efx->net_dev, - "%s: filter %04x remove failed\n", - __func__, filter_idx); - kfree(spec); - } - - vfree(table->entry); - kfree(table); -} - -static void efx_ef10_filter_mark_one_old(struct efx_nic *efx, uint16_t *id) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - unsigned int filter_idx; - - efx_rwsem_assert_write_locked(&table->lock); - - if (*id != EFX_EF10_FILTER_ID_INVALID) { - filter_idx = efx_ef10_filter_get_unsafe_id(*id); - if (!table->entry[filter_idx].spec) - netif_dbg(efx, drv, efx->net_dev, - "marked null spec old %04x:%04x\n", *id, - filter_idx); - table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD; - *id = EFX_EF10_FILTER_ID_INVALID; - } -} - -/* Mark old per-VLAN filters that may need to be removed */ -static void _efx_ef10_filter_vlan_mark_old(struct efx_nic *efx, - struct efx_ef10_filter_vlan *vlan) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - unsigned int i; - - for (i = 0; i < table->dev_uc_count; i++) - efx_ef10_filter_mark_one_old(efx, &vlan->uc[i]); - for (i = 0; i < table->dev_mc_count; i++) - efx_ef10_filter_mark_one_old(efx, &vlan->mc[i]); - for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++) - efx_ef10_filter_mark_one_old(efx, &vlan->default_filters[i]); -} - -/* Mark old filters that may need to be removed. - * Caller must hold efx->filter_sem for read if race against - * efx_ef10_filter_table_remove() is possible - */ -static void efx_ef10_filter_mark_old(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_ef10_filter_vlan *vlan; - - down_write(&table->lock); - list_for_each_entry(vlan, &table->vlan_list, list) - _efx_ef10_filter_vlan_mark_old(efx, vlan); - up_write(&table->lock); -} - -static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct net_device *net_dev = efx->net_dev; - struct netdev_hw_addr *uc; - unsigned int i; - - table->uc_promisc = !!(net_dev->flags & IFF_PROMISC); - ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr); - i = 1; - netdev_for_each_uc_addr(uc, net_dev) { - if (i >= EFX_EF10_FILTER_DEV_UC_MAX) { - table->uc_promisc = true; - break; - } - ether_addr_copy(table->dev_uc_list[i].addr, uc->addr); - i++; - } - - table->dev_uc_count = i; -} - -static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct net_device *net_dev = efx->net_dev; - struct netdev_hw_addr *mc; - unsigned int i; - - table->mc_overflow = false; - table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI)); - - i = 0; - netdev_for_each_mc_addr(mc, net_dev) { - if (i >= EFX_EF10_FILTER_DEV_MC_MAX) { - table->mc_promisc = true; - table->mc_overflow = true; - break; - } - ether_addr_copy(table->dev_mc_list[i].addr, mc->addr); - i++; - } - - table->dev_mc_count = i; -} - -static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx, - struct efx_ef10_filter_vlan *vlan, - bool multicast, bool rollback) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_ef10_dev_addr *addr_list; - enum efx_filter_flags filter_flags; - struct efx_filter_spec spec; - u8 baddr[ETH_ALEN]; - unsigned int i, j; - int addr_count; - u16 *ids; - int rc; - - if (multicast) { - addr_list = table->dev_mc_list; - addr_count = table->dev_mc_count; - ids = vlan->mc; - } else { - addr_list = table->dev_uc_list; - addr_count = table->dev_uc_count; - ids = vlan->uc; - } - - filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; - - /* Insert/renew filters */ - for (i = 0; i < addr_count; i++) { - EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID); - efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); - efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr); - rc = efx_ef10_filter_insert_locked(efx, &spec, true); - if (rc < 0) { - if (rollback) { - netif_info(efx, drv, efx->net_dev, - "efx_ef10_filter_insert failed rc=%d\n", - rc); - /* Fall back to promiscuous */ - for (j = 0; j < i; j++) { - efx_ef10_filter_remove_unsafe( - efx, EFX_FILTER_PRI_AUTO, - ids[j]); - ids[j] = EFX_EF10_FILTER_ID_INVALID; - } - return rc; - } else { - /* keep invalid ID, and carry on */ - } - } else { - ids[i] = efx_ef10_filter_get_unsafe_id(rc); - } - } - - if (multicast && rollback) { - /* Also need an Ethernet broadcast filter */ - EFX_WARN_ON_PARANOID(vlan->default_filters[EFX_EF10_BCAST] != - EFX_EF10_FILTER_ID_INVALID); - efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); - eth_broadcast_addr(baddr); - efx_filter_set_eth_local(&spec, vlan->vid, baddr); - rc = efx_ef10_filter_insert_locked(efx, &spec, true); - if (rc < 0) { - netif_warn(efx, drv, efx->net_dev, - "Broadcast filter insert failed rc=%d\n", rc); - /* Fall back to promiscuous */ - for (j = 0; j < i; j++) { - efx_ef10_filter_remove_unsafe( - efx, EFX_FILTER_PRI_AUTO, - ids[j]); - ids[j] = EFX_EF10_FILTER_ID_INVALID; - } - return rc; - } else { - vlan->default_filters[EFX_EF10_BCAST] = - efx_ef10_filter_get_unsafe_id(rc); - } - } - - return 0; -} - -static int efx_ef10_filter_insert_def(struct efx_nic *efx, - struct efx_ef10_filter_vlan *vlan, - enum efx_encap_type encap_type, - bool multicast, bool rollback) -{ - struct efx_ef10_nic_data *nic_data = efx->nic_data; - enum efx_filter_flags filter_flags; - struct efx_filter_spec spec; - u8 baddr[ETH_ALEN]; - int rc; - u16 *id; - - filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; - - efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); - - if (multicast) - efx_filter_set_mc_def(&spec); - else - efx_filter_set_uc_def(&spec); - - if (encap_type) { - if (nic_data->datapath_caps & - (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)) - efx_filter_set_encap_type(&spec, encap_type); - else - /* don't insert encap filters on non-supporting - * platforms. ID will be left as INVALID. - */ - return 0; - } - - if (vlan->vid != EFX_FILTER_VID_UNSPEC) - efx_filter_set_eth_local(&spec, vlan->vid, NULL); - - rc = efx_ef10_filter_insert_locked(efx, &spec, true); - if (rc < 0) { - const char *um = multicast ? "Multicast" : "Unicast"; - const char *encap_name = ""; - const char *encap_ipv = ""; - - if ((encap_type & EFX_ENCAP_TYPES_MASK) == - EFX_ENCAP_TYPE_VXLAN) - encap_name = "VXLAN "; - else if ((encap_type & EFX_ENCAP_TYPES_MASK) == - EFX_ENCAP_TYPE_NVGRE) - encap_name = "NVGRE "; - else if ((encap_type & EFX_ENCAP_TYPES_MASK) == - EFX_ENCAP_TYPE_GENEVE) - encap_name = "GENEVE "; - if (encap_type & EFX_ENCAP_FLAG_IPV6) - encap_ipv = "IPv6 "; - else if (encap_type) - encap_ipv = "IPv4 "; - - /* unprivileged functions can't insert mismatch filters - * for encapsulated or unicast traffic, so downgrade - * those warnings to debug. - */ - netif_cond_dbg(efx, drv, efx->net_dev, - rc == -EPERM && (encap_type || !multicast), warn, - "%s%s%s mismatch filter insert failed rc=%d\n", - encap_name, encap_ipv, um, rc); - } else if (multicast) { - /* mapping from encap types to default filter IDs (multicast) */ - static enum efx_ef10_default_filters map[] = { - [EFX_ENCAP_TYPE_NONE] = EFX_EF10_MCDEF, - [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_MCDEF, - [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_MCDEF, - [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_MCDEF, - [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] = - EFX_EF10_VXLAN6_MCDEF, - [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] = - EFX_EF10_NVGRE6_MCDEF, - [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] = - EFX_EF10_GENEVE6_MCDEF, - }; - - /* quick bounds check (BCAST result impossible) */ - BUILD_BUG_ON(EFX_EF10_BCAST != 0); - if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) { - WARN_ON(1); - return -EINVAL; - } - /* then follow map */ - id = &vlan->default_filters[map[encap_type]]; - - EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID); - *id = efx_ef10_filter_get_unsafe_id(rc); - if (!nic_data->workaround_26807 && !encap_type) { - /* Also need an Ethernet broadcast filter */ - efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, - filter_flags, 0); - eth_broadcast_addr(baddr); - efx_filter_set_eth_local(&spec, vlan->vid, baddr); - rc = efx_ef10_filter_insert_locked(efx, &spec, true); - if (rc < 0) { - netif_warn(efx, drv, efx->net_dev, - "Broadcast filter insert failed rc=%d\n", - rc); - if (rollback) { - /* Roll back the mc_def filter */ - efx_ef10_filter_remove_unsafe( - efx, EFX_FILTER_PRI_AUTO, - *id); - *id = EFX_EF10_FILTER_ID_INVALID; - return rc; - } - } else { - EFX_WARN_ON_PARANOID( - vlan->default_filters[EFX_EF10_BCAST] != - EFX_EF10_FILTER_ID_INVALID); - vlan->default_filters[EFX_EF10_BCAST] = - efx_ef10_filter_get_unsafe_id(rc); - } - } - rc = 0; - } else { - /* mapping from encap types to default filter IDs (unicast) */ - static enum efx_ef10_default_filters map[] = { - [EFX_ENCAP_TYPE_NONE] = EFX_EF10_UCDEF, - [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_UCDEF, - [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_UCDEF, - [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_UCDEF, - [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] = - EFX_EF10_VXLAN6_UCDEF, - [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] = - EFX_EF10_NVGRE6_UCDEF, - [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] = - EFX_EF10_GENEVE6_UCDEF, - }; - - /* quick bounds check (BCAST result impossible) */ - BUILD_BUG_ON(EFX_EF10_BCAST != 0); - if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) { - WARN_ON(1); - return -EINVAL; - } - /* then follow map */ - id = &vlan->default_filters[map[encap_type]]; - EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID); - *id = rc; - rc = 0; - } - return rc; -} - -/* Remove filters that weren't renewed. */ -static void efx_ef10_filter_remove_old(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - int remove_failed = 0; - int remove_noent = 0; - int rc; - int i; - - down_write(&table->lock); - for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) { - if (READ_ONCE(table->entry[i].spec) & - EFX_EF10_FILTER_FLAG_AUTO_OLD) { - rc = efx_ef10_filter_remove_internal(efx, - 1U << EFX_FILTER_PRI_AUTO, i, true); - if (rc == -ENOENT) - remove_noent++; - else if (rc) - remove_failed++; - } - } - up_write(&table->lock); - - if (remove_failed) - netif_info(efx, drv, efx->net_dev, - "%s: failed to remove %d filters\n", - __func__, remove_failed); - if (remove_noent) - netif_info(efx, drv, efx->net_dev, - "%s: failed to remove %d non-existent filters\n", - __func__, remove_noent); -} - static int efx_ef10_vport_set_mac_address(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; @@ -5545,7 +3153,7 @@ static int efx_ef10_vport_set_mac_address(struct efx_nic *efx) efx_device_detach_sync(efx); efx_net_stop(efx->net_dev); down_write(&efx->filter_sem); - efx_ef10_filter_table_remove(efx); + efx_mcdi_filter_table_remove(efx); up_write(&efx->filter_sem); rc = efx_ef10_vadaptor_free(efx, nic_data->vport_id); @@ -5577,7 +3185,7 @@ restore_vadaptor: goto reset_nic; restore_filters: down_write(&efx->filter_sem); - rc2 = efx_ef10_filter_table_probe(efx); + rc2 = efx_mcdi_filter_table_probe(efx); up_write(&efx->filter_sem); if (rc2) goto reset_nic; @@ -5598,256 +3206,6 @@ reset_nic: return rc ? rc : rc2; } -/* Caller must hold efx->filter_sem for read if race against - * efx_ef10_filter_table_remove() is possible - */ -static void efx_ef10_filter_vlan_sync_rx_mode(struct efx_nic *efx, - struct efx_ef10_filter_vlan *vlan) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_ef10_nic_data *nic_data = efx->nic_data; - - /* Do not install unspecified VID if VLAN filtering is enabled. - * Do not install all specified VIDs if VLAN filtering is disabled. - */ - if ((vlan->vid == EFX_FILTER_VID_UNSPEC) == table->vlan_filter) - return; - - /* Insert/renew unicast filters */ - if (table->uc_promisc) { - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NONE, - false, false); - efx_ef10_filter_insert_addr_list(efx, vlan, false, false); - } else { - /* If any of the filters failed to insert, fall back to - * promiscuous mode - add in the uc_def filter. But keep - * our individual unicast filters. - */ - if (efx_ef10_filter_insert_addr_list(efx, vlan, false, false)) - efx_ef10_filter_insert_def(efx, vlan, - EFX_ENCAP_TYPE_NONE, - false, false); - } - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN, - false, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN | - EFX_ENCAP_FLAG_IPV6, - false, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE, - false, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE | - EFX_ENCAP_FLAG_IPV6, - false, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE, - false, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE | - EFX_ENCAP_FLAG_IPV6, - false, false); - - /* Insert/renew multicast filters */ - /* If changing promiscuous state with cascaded multicast filters, remove - * old filters first, so that packets are dropped rather than duplicated - */ - if (nic_data->workaround_26807 && - table->mc_promisc_last != table->mc_promisc) - efx_ef10_filter_remove_old(efx); - if (table->mc_promisc) { - if (nic_data->workaround_26807) { - /* If we failed to insert promiscuous filters, rollback - * and fall back to individual multicast filters - */ - if (efx_ef10_filter_insert_def(efx, vlan, - EFX_ENCAP_TYPE_NONE, - true, true)) { - /* Changing promisc state, so remove old filters */ - efx_ef10_filter_remove_old(efx); - efx_ef10_filter_insert_addr_list(efx, vlan, - true, false); - } - } else { - /* If we failed to insert promiscuous filters, don't - * rollback. Regardless, also insert the mc_list, - * unless it's incomplete due to overflow - */ - efx_ef10_filter_insert_def(efx, vlan, - EFX_ENCAP_TYPE_NONE, - true, false); - if (!table->mc_overflow) - efx_ef10_filter_insert_addr_list(efx, vlan, - true, false); - } - } else { - /* If any filters failed to insert, rollback and fall back to - * promiscuous mode - mc_def filter and maybe broadcast. If - * that fails, roll back again and insert as many of our - * individual multicast filters as we can. - */ - if (efx_ef10_filter_insert_addr_list(efx, vlan, true, true)) { - /* Changing promisc state, so remove old filters */ - if (nic_data->workaround_26807) - efx_ef10_filter_remove_old(efx); - if (efx_ef10_filter_insert_def(efx, vlan, - EFX_ENCAP_TYPE_NONE, - true, true)) - efx_ef10_filter_insert_addr_list(efx, vlan, - true, false); - } - } - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN, - true, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN | - EFX_ENCAP_FLAG_IPV6, - true, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE, - true, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE | - EFX_ENCAP_FLAG_IPV6, - true, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE, - true, false); - efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE | - EFX_ENCAP_FLAG_IPV6, - true, false); -} - -/* Caller must hold efx->filter_sem for read if race against - * efx_ef10_filter_table_remove() is possible - */ -static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct net_device *net_dev = efx->net_dev; - struct efx_ef10_filter_vlan *vlan; - bool vlan_filter; - - if (!efx_dev_registered(efx)) - return; - - if (!table) - return; - - efx_ef10_filter_mark_old(efx); - - /* Copy/convert the address lists; add the primary station - * address and broadcast address - */ - netif_addr_lock_bh(net_dev); - efx_ef10_filter_uc_addr_list(efx); - efx_ef10_filter_mc_addr_list(efx); - netif_addr_unlock_bh(net_dev); - - /* If VLAN filtering changes, all old filters are finally removed. - * Do it in advance to avoid conflicts for unicast untagged and - * VLAN 0 tagged filters. - */ - vlan_filter = !!(net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER); - if (table->vlan_filter != vlan_filter) { - table->vlan_filter = vlan_filter; - efx_ef10_filter_remove_old(efx); - } - - list_for_each_entry(vlan, &table->vlan_list, list) - efx_ef10_filter_vlan_sync_rx_mode(efx, vlan); - - efx_ef10_filter_remove_old(efx); - table->mc_promisc_last = table->mc_promisc; -} - -static struct efx_ef10_filter_vlan *efx_ef10_filter_find_vlan(struct efx_nic *efx, u16 vid) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_ef10_filter_vlan *vlan; - - WARN_ON(!rwsem_is_locked(&efx->filter_sem)); - - list_for_each_entry(vlan, &table->vlan_list, list) { - if (vlan->vid == vid) - return vlan; - } - - return NULL; -} - -static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_ef10_filter_vlan *vlan; - unsigned int i; - - if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) - return -EINVAL; - - vlan = efx_ef10_filter_find_vlan(efx, vid); - if (WARN_ON(vlan)) { - netif_err(efx, drv, efx->net_dev, - "VLAN %u already added\n", vid); - return -EALREADY; - } - - vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); - if (!vlan) - return -ENOMEM; - - vlan->vid = vid; - - for (i = 0; i < ARRAY_SIZE(vlan->uc); i++) - vlan->uc[i] = EFX_EF10_FILTER_ID_INVALID; - for (i = 0; i < ARRAY_SIZE(vlan->mc); i++) - vlan->mc[i] = EFX_EF10_FILTER_ID_INVALID; - for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++) - vlan->default_filters[i] = EFX_EF10_FILTER_ID_INVALID; - - list_add_tail(&vlan->list, &table->vlan_list); - - if (efx_dev_registered(efx)) - efx_ef10_filter_vlan_sync_rx_mode(efx, vlan); - - return 0; -} - -static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx, - struct efx_ef10_filter_vlan *vlan) -{ - unsigned int i; - - /* See comment in efx_ef10_filter_table_remove() */ - if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) - return; - - list_del(&vlan->list); - - for (i = 0; i < ARRAY_SIZE(vlan->uc); i++) - efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, - vlan->uc[i]); - for (i = 0; i < ARRAY_SIZE(vlan->mc); i++) - efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, - vlan->mc[i]); - for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++) - if (vlan->default_filters[i] != EFX_EF10_FILTER_ID_INVALID) - efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, - vlan->default_filters[i]); - - kfree(vlan); -} - -static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid) -{ - struct efx_ef10_filter_vlan *vlan; - - /* See comment in efx_ef10_filter_table_remove() */ - if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) - return; - - vlan = efx_ef10_filter_find_vlan(efx, vid); - if (!vlan) { - netif_err(efx, drv, efx->net_dev, - "VLAN %u not found in filter state\n", vid); - return; - } - - efx_ef10_filter_del_vlan_internal(efx, vlan); -} - static int efx_ef10_set_mac_address(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN); @@ -5860,7 +3218,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) mutex_lock(&efx->mac_lock); down_write(&efx->filter_sem); - efx_ef10_filter_table_remove(efx); + efx_mcdi_filter_table_remove(efx); ether_addr_copy(MCDI_PTR(inbuf, VADAPTOR_SET_MAC_IN_MACADDR), efx->net_dev->dev_addr); @@ -5869,7 +3227,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf, sizeof(inbuf), NULL, 0, NULL); - efx_ef10_filter_table_probe(efx); + efx_mcdi_filter_table_probe(efx); up_write(&efx->filter_sem); mutex_unlock(&efx->mac_lock); @@ -5931,14 +3289,14 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) static int efx_ef10_mac_reconfigure(struct efx_nic *efx) { - efx_ef10_filter_sync_rx_mode(efx); + efx_mcdi_filter_sync_rx_mode(efx); return efx_mcdi_set_mac(efx); } static int efx_ef10_mac_reconfigure_vf(struct efx_nic *efx) { - efx_ef10_filter_sync_rx_mode(efx); + efx_mcdi_filter_sync_rx_mode(efx); return 0; } @@ -6650,36 +4008,36 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .irq_handle_legacy = efx_ef10_legacy_interrupt, .tx_probe = efx_ef10_tx_probe, .tx_init = efx_ef10_tx_init, - .tx_remove = efx_ef10_tx_remove, + .tx_remove = efx_mcdi_tx_remove, .tx_write = efx_ef10_tx_write, .tx_limit_len = efx_ef10_tx_limit_len, - .rx_push_rss_config = efx_ef10_vf_rx_push_rss_config, - .rx_pull_rss_config = efx_ef10_rx_pull_rss_config, - .rx_probe = efx_ef10_rx_probe, - .rx_init = efx_ef10_rx_init, - .rx_remove = efx_ef10_rx_remove, + .rx_push_rss_config = efx_mcdi_vf_rx_push_rss_config, + .rx_pull_rss_config = efx_mcdi_rx_pull_rss_config, + .rx_probe = efx_mcdi_rx_probe, + .rx_init = efx_mcdi_rx_init, + .rx_remove = efx_mcdi_rx_remove, .rx_write = efx_ef10_rx_write, .rx_defer_refill = efx_ef10_rx_defer_refill, - .ev_probe = efx_ef10_ev_probe, + .ev_probe = efx_mcdi_ev_probe, .ev_init = efx_ef10_ev_init, - .ev_fini = efx_ef10_ev_fini, - .ev_remove = efx_ef10_ev_remove, + .ev_fini = efx_mcdi_ev_fini, + .ev_remove = efx_mcdi_ev_remove, .ev_process = efx_ef10_ev_process, .ev_read_ack = efx_ef10_ev_read_ack, .ev_test_generate = efx_ef10_ev_test_generate, - .filter_table_probe = efx_ef10_filter_table_probe, - .filter_table_restore = efx_ef10_filter_table_restore, - .filter_table_remove = efx_ef10_filter_table_remove, - .filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter, - .filter_insert = efx_ef10_filter_insert, - .filter_remove_safe = efx_ef10_filter_remove_safe, - .filter_get_safe = efx_ef10_filter_get_safe, - .filter_clear_rx = efx_ef10_filter_clear_rx, - .filter_count_rx_used = efx_ef10_filter_count_rx_used, - .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit, - .filter_get_rx_ids = efx_ef10_filter_get_rx_ids, + .filter_table_probe = efx_mcdi_filter_table_probe, + .filter_table_restore = efx_mcdi_filter_table_restore, + .filter_table_remove = efx_mcdi_filter_table_remove, + .filter_update_rx_scatter = efx_mcdi_update_rx_scatter, + .filter_insert = efx_mcdi_filter_insert, + .filter_remove_safe = efx_mcdi_filter_remove_safe, + .filter_get_safe = efx_mcdi_filter_get_safe, + .filter_clear_rx = efx_mcdi_filter_clear_rx, + .filter_count_rx_used = efx_mcdi_filter_count_rx_used, + .filter_get_rx_id_limit = efx_mcdi_filter_get_rx_id_limit, + .filter_get_rx_ids = efx_mcdi_filter_get_rx_ids, #ifdef CONFIG_RFS_ACCEL - .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one, + .filter_rfs_expire_one = efx_mcdi_filter_rfs_expire_one, #endif #ifdef CONFIG_SFC_MTD .mtd_probe = efx_port_dummy_op_int, @@ -6709,7 +4067,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH, .offload_features = EF10_OFFLOAD_FEATURES, .mcdi_max_ver = 2, - .max_rx_ip_filters = HUNT_FILTER_TBL_ROWS, + .max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS, .hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE | 1 << HWTSTAMP_FILTER_ALL, .rx_hash_key_size = 40, @@ -6759,39 +4117,39 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .irq_handle_legacy = efx_ef10_legacy_interrupt, .tx_probe = efx_ef10_tx_probe, .tx_init = efx_ef10_tx_init, - .tx_remove = efx_ef10_tx_remove, + .tx_remove = efx_mcdi_tx_remove, .tx_write = efx_ef10_tx_write, .tx_limit_len = efx_ef10_tx_limit_len, - .rx_push_rss_config = efx_ef10_pf_rx_push_rss_config, - .rx_pull_rss_config = efx_ef10_rx_pull_rss_config, - .rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config, - .rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config, - .rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts, - .rx_probe = efx_ef10_rx_probe, - .rx_init = efx_ef10_rx_init, - .rx_remove = efx_ef10_rx_remove, + .rx_push_rss_config = efx_mcdi_pf_rx_push_rss_config, + .rx_pull_rss_config = efx_mcdi_rx_pull_rss_config, + .rx_push_rss_context_config = efx_mcdi_rx_push_rss_context_config, + .rx_pull_rss_context_config = efx_mcdi_rx_pull_rss_context_config, + .rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts, + .rx_probe = efx_mcdi_rx_probe, + .rx_init = efx_mcdi_rx_init, + .rx_remove = efx_mcdi_rx_remove, .rx_write = efx_ef10_rx_write, .rx_defer_refill = efx_ef10_rx_defer_refill, - .ev_probe = efx_ef10_ev_probe, + .ev_probe = efx_mcdi_ev_probe, .ev_init = efx_ef10_ev_init, - .ev_fini = efx_ef10_ev_fini, - .ev_remove = efx_ef10_ev_remove, + .ev_fini = efx_mcdi_ev_fini, + .ev_remove = efx_mcdi_ev_remove, .ev_process = efx_ef10_ev_process, .ev_read_ack = efx_ef10_ev_read_ack, .ev_test_generate = efx_ef10_ev_test_generate, - .filter_table_probe = efx_ef10_filter_table_probe, - .filter_table_restore = efx_ef10_filter_table_restore, - .filter_table_remove = efx_ef10_filter_table_remove, - .filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter, - .filter_insert = efx_ef10_filter_insert, - .filter_remove_safe = efx_ef10_filter_remove_safe, - .filter_get_safe = efx_ef10_filter_get_safe, - .filter_clear_rx = efx_ef10_filter_clear_rx, - .filter_count_rx_used = efx_ef10_filter_count_rx_used, - .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit, - .filter_get_rx_ids = efx_ef10_filter_get_rx_ids, + .filter_table_probe = efx_mcdi_filter_table_probe, + .filter_table_restore = efx_mcdi_filter_table_restore, + .filter_table_remove = efx_mcdi_filter_table_remove, + .filter_update_rx_scatter = efx_mcdi_update_rx_scatter, + .filter_insert = efx_mcdi_filter_insert, + .filter_remove_safe = efx_mcdi_filter_remove_safe, + .filter_get_safe = efx_mcdi_filter_get_safe, + .filter_clear_rx = efx_mcdi_filter_clear_rx, + .filter_count_rx_used = efx_mcdi_filter_count_rx_used, + .filter_get_rx_id_limit = efx_mcdi_filter_get_rx_id_limit, + .filter_get_rx_ids = efx_mcdi_filter_get_rx_ids, #ifdef CONFIG_RFS_ACCEL - .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one, + .filter_rfs_expire_one = efx_mcdi_filter_rfs_expire_one, #endif #ifdef CONFIG_SFC_MTD .mtd_probe = efx_ef10_mtd_probe, @@ -6844,7 +4202,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH, .offload_features = EF10_OFFLOAD_FEATURES, .mcdi_max_ver = 2, - .max_rx_ip_filters = HUNT_FILTER_TBL_ROWS, + .max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS, .hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE | 1 << HWTSTAMP_FILTER_ALL, .rx_hash_key_size = 40, diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c index 52bd43f45761..14393767ef9f 100644 --- a/drivers/net/ethernet/sfc/ef10_sriov.c +++ b/drivers/net/ethernet/sfc/ef10_sriov.c @@ -522,10 +522,9 @@ int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac) if (!is_zero_ether_addr(mac)) { rc = efx_ef10_vport_add_mac(efx, vf->vport_id, mac); - if (rc) { - eth_zero_addr(vf->mac); + if (rc) goto fail; - } + if (vf->efx) ether_addr_copy(vf->efx->net_dev->dev_addr, mac); } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 6891df471538..4481f21a1f43 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -23,6 +23,10 @@ #include <net/gre.h> #include <net/udp_tunnel.h> #include "efx.h" +#include "efx_common.h" +#include "efx_channels.h" +#include "rx_common.h" +#include "tx_common.h" #include "nic.h" #include "io.h" #include "selftest.h" @@ -39,56 +43,6 @@ ************************************************************************** */ -/* Loopback mode names (see LOOPBACK_MODE()) */ -const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; -const char *const efx_loopback_mode_names[] = { - [LOOPBACK_NONE] = "NONE", - [LOOPBACK_DATA] = "DATAPATH", - [LOOPBACK_GMAC] = "GMAC", - [LOOPBACK_XGMII] = "XGMII", - [LOOPBACK_XGXS] = "XGXS", - [LOOPBACK_XAUI] = "XAUI", - [LOOPBACK_GMII] = "GMII", - [LOOPBACK_SGMII] = "SGMII", - [LOOPBACK_XGBR] = "XGBR", - [LOOPBACK_XFI] = "XFI", - [LOOPBACK_XAUI_FAR] = "XAUI_FAR", - [LOOPBACK_GMII_FAR] = "GMII_FAR", - [LOOPBACK_SGMII_FAR] = "SGMII_FAR", - [LOOPBACK_XFI_FAR] = "XFI_FAR", - [LOOPBACK_GPHY] = "GPHY", - [LOOPBACK_PHYXS] = "PHYXS", - [LOOPBACK_PCS] = "PCS", - [LOOPBACK_PMAPMD] = "PMA/PMD", - [LOOPBACK_XPORT] = "XPORT", - [LOOPBACK_XGMII_WS] = "XGMII_WS", - [LOOPBACK_XAUI_WS] = "XAUI_WS", - [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", - [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", - [LOOPBACK_GMII_WS] = "GMII_WS", - [LOOPBACK_XFI_WS] = "XFI_WS", - [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", - [LOOPBACK_PHYXS_WS] = "PHYXS_WS", -}; - -const unsigned int efx_reset_type_max = RESET_TYPE_MAX; -const char *const efx_reset_type_names[] = { - [RESET_TYPE_INVISIBLE] = "INVISIBLE", - [RESET_TYPE_ALL] = "ALL", - [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", - [RESET_TYPE_WORLD] = "WORLD", - [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", - [RESET_TYPE_DATAPATH] = "DATAPATH", - [RESET_TYPE_MC_BIST] = "MC_BIST", - [RESET_TYPE_DISABLE] = "DISABLE", - [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", - [RESET_TYPE_INT_ERROR] = "INT_ERROR", - [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", - [RESET_TYPE_TX_SKIP] = "TX_SKIP", - [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", - [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", -}; - /* UDP tunnel type names */ static const char *const efx_udp_tunnel_type_names[] = { [TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan", @@ -104,18 +58,6 @@ void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen) snprintf(buf, buflen, "type %d", type); } -/* Reset workqueue. If any NIC has a hardware failure then a reset will be - * queued onto this work queue. This is not a per-nic work queue, because - * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. - */ -static struct workqueue_struct *reset_workqueue; - -/* How often and how many times to poll for a reset while waiting for a - * BIST that another function started to complete. - */ -#define BIST_WAIT_DELAY_MS 100 -#define BIST_WAIT_DELAY_COUNT 100 - /************************************************************************** * * Configurable values @@ -135,21 +77,6 @@ module_param(efx_separate_tx_channels, bool, 0444); MODULE_PARM_DESC(efx_separate_tx_channels, "Use separate channels for TX and RX"); -/* This is the weight assigned to each of the (per-channel) virtual - * NAPI devices. - */ -static int napi_weight = 64; - -/* This is the time (in jiffies) between invocations of the hardware - * monitor. - * On Falcon-based NICs, this will: - * - Check the on-board hardware monitor; - * - Poll the link state and reconfigure the hardware as necessary. - * On Siena-based NICs for power systems with EEH support, this will give EEH a - * chance to start. - */ -static unsigned int efx_monitor_interval = 1 * HZ; - /* Initial interrupt moderation settings. They can be modified after * module load with ethtool. * @@ -169,38 +96,10 @@ static unsigned int rx_irq_mod_usec = 60; */ static unsigned int tx_irq_mod_usec = 150; -/* This is the first interrupt mode to try out of: - * 0 => MSI-X - * 1 => MSI - * 2 => legacy - */ -static unsigned int interrupt_mode; - -/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), - * i.e. the number of CPUs among which we may distribute simultaneous - * interrupt handling. - * - * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. - * The default (0) means to assign an interrupt to each core. - */ -static unsigned int rss_cpus; -module_param(rss_cpus, uint, 0444); -MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); - static bool phy_flash_cfg; module_param(phy_flash_cfg, bool, 0644); MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially"); -static unsigned irq_adapt_low_thresh = 8000; -module_param(irq_adapt_low_thresh, uint, 0644); -MODULE_PARM_DESC(irq_adapt_low_thresh, - "Threshold score for reducing IRQ moderation"); - -static unsigned irq_adapt_high_thresh = 16000; -module_param(irq_adapt_high_thresh, uint, 0644); -MODULE_PARM_DESC(irq_adapt_high_thresh, - "Threshold score for increasing IRQ moderation"); - static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | @@ -214,18 +113,8 @@ MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); * *************************************************************************/ -static int efx_soft_enable_interrupts(struct efx_nic *efx); -static void efx_soft_disable_interrupts(struct efx_nic *efx); -static void efx_remove_channel(struct efx_channel *channel); -static void efx_remove_channels(struct efx_nic *efx); static const struct efx_channel_type efx_default_channel_type; static void efx_remove_port(struct efx_nic *efx); -static void efx_init_napi_channel(struct efx_channel *channel); -static void efx_fini_napi(struct efx_nic *efx); -static void efx_fini_napi_channel(struct efx_channel *channel); -static void efx_fini_struct(struct efx_nic *efx); -static void efx_start_all(struct efx_nic *efx); -static void efx_stop_all(struct efx_nic *efx); static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog); static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp); static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, @@ -239,776 +128,12 @@ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, ASSERT_RTNL(); \ } while (0) -static int efx_check_disabled(struct efx_nic *efx) -{ - if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { - netif_err(efx, drv, efx->net_dev, - "device is disabled due to earlier errors\n"); - return -EIO; - } - return 0; -} - -/************************************************************************** - * - * Event queue processing - * - *************************************************************************/ - -/* Process channel's event queue - * - * This function is responsible for processing the event queue of a - * single channel. The caller must guarantee that this function will - * never be concurrently called more than once on the same channel, - * though different channels may be being processed concurrently. - */ -static int efx_process_channel(struct efx_channel *channel, int budget) -{ - struct efx_tx_queue *tx_queue; - struct list_head rx_list; - int spent; - - if (unlikely(!channel->enabled)) - return 0; - - /* Prepare the batch receive list */ - EFX_WARN_ON_PARANOID(channel->rx_list != NULL); - INIT_LIST_HEAD(&rx_list); - channel->rx_list = &rx_list; - - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->pkts_compl = 0; - tx_queue->bytes_compl = 0; - } - - spent = efx_nic_process_eventq(channel, budget); - if (spent && efx_channel_has_rx_queue(channel)) { - struct efx_rx_queue *rx_queue = - efx_channel_get_rx_queue(channel); - - efx_rx_flush_packet(channel); - efx_fast_push_rx_descriptors(rx_queue, true); - } - - /* Update BQL */ - efx_for_each_channel_tx_queue(tx_queue, channel) { - if (tx_queue->bytes_compl) { - netdev_tx_completed_queue(tx_queue->core_txq, - tx_queue->pkts_compl, tx_queue->bytes_compl); - } - } - - /* Receive any packets we queued up */ - netif_receive_skb_list(channel->rx_list); - channel->rx_list = NULL; - - return spent; -} - -/* NAPI poll handler - * - * NAPI guarantees serialisation of polls of the same device, which - * provides the guarantee required by efx_process_channel(). - */ -static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) -{ - int step = efx->irq_mod_step_us; - - if (channel->irq_mod_score < irq_adapt_low_thresh) { - if (channel->irq_moderation_us > step) { - channel->irq_moderation_us -= step; - efx->type->push_irq_moderation(channel); - } - } else if (channel->irq_mod_score > irq_adapt_high_thresh) { - if (channel->irq_moderation_us < - efx->irq_rx_moderation_us) { - channel->irq_moderation_us += step; - efx->type->push_irq_moderation(channel); - } - } - - channel->irq_count = 0; - channel->irq_mod_score = 0; -} - -static int efx_poll(struct napi_struct *napi, int budget) -{ - struct efx_channel *channel = - container_of(napi, struct efx_channel, napi_str); - struct efx_nic *efx = channel->efx; - int spent; - - netif_vdbg(efx, intr, efx->net_dev, - "channel %d NAPI poll executing on CPU %d\n", - channel->channel, raw_smp_processor_id()); - - spent = efx_process_channel(channel, budget); - - xdp_do_flush_map(); - - if (spent < budget) { - if (efx_channel_has_rx_queue(channel) && - efx->irq_rx_adaptive && - unlikely(++channel->irq_count == 1000)) { - efx_update_irq_mod(efx, channel); - } - -#ifdef CONFIG_RFS_ACCEL - /* Perhaps expire some ARFS filters */ - mod_delayed_work(system_wq, &channel->filter_work, 0); -#endif - - /* There is no race here; although napi_disable() will - * only wait for napi_complete(), this isn't a problem - * since efx_nic_eventq_read_ack() will have no effect if - * interrupts have already been disabled. - */ - if (napi_complete_done(napi, spent)) - efx_nic_eventq_read_ack(channel); - } - - return spent; -} - -/* Create event queue - * Event queue memory allocations are done only once. If the channel - * is reset, the memory buffer will be reused; this guards against - * errors during channel reset and also simplifies interrupt handling. - */ -static int efx_probe_eventq(struct efx_channel *channel) -{ - struct efx_nic *efx = channel->efx; - unsigned long entries; - - netif_dbg(efx, probe, efx->net_dev, - "chan %d create event queue\n", channel->channel); - - /* Build an event queue with room for one event per tx and rx buffer, - * plus some extra for link state events and MCDI completions. */ - entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128); - EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE); - channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1; - - return efx_nic_probe_eventq(channel); -} - -/* Prepare channel's event queue */ -static int efx_init_eventq(struct efx_channel *channel) -{ - struct efx_nic *efx = channel->efx; - int rc; - - EFX_WARN_ON_PARANOID(channel->eventq_init); - - netif_dbg(efx, drv, efx->net_dev, - "chan %d init event queue\n", channel->channel); - - rc = efx_nic_init_eventq(channel); - if (rc == 0) { - efx->type->push_irq_moderation(channel); - channel->eventq_read_ptr = 0; - channel->eventq_init = true; - } - return rc; -} - -/* Enable event queue processing and NAPI */ -void efx_start_eventq(struct efx_channel *channel) -{ - netif_dbg(channel->efx, ifup, channel->efx->net_dev, - "chan %d start event queue\n", channel->channel); - - /* Make sure the NAPI handler sees the enabled flag set */ - channel->enabled = true; - smp_wmb(); - - napi_enable(&channel->napi_str); - efx_nic_eventq_read_ack(channel); -} - -/* Disable event queue processing and NAPI */ -void efx_stop_eventq(struct efx_channel *channel) -{ - if (!channel->enabled) - return; - - napi_disable(&channel->napi_str); - channel->enabled = false; -} - -static void efx_fini_eventq(struct efx_channel *channel) -{ - if (!channel->eventq_init) - return; - - netif_dbg(channel->efx, drv, channel->efx->net_dev, - "chan %d fini event queue\n", channel->channel); - - efx_nic_fini_eventq(channel); - channel->eventq_init = false; -} - -static void efx_remove_eventq(struct efx_channel *channel) -{ - netif_dbg(channel->efx, drv, channel->efx->net_dev, - "chan %d remove event queue\n", channel->channel); - - efx_nic_remove_eventq(channel); -} - -/************************************************************************** - * - * Channel handling - * - *************************************************************************/ - -/* Allocate and initialise a channel structure. */ -static struct efx_channel * -efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) -{ - struct efx_channel *channel; - struct efx_rx_queue *rx_queue; - struct efx_tx_queue *tx_queue; - int j; - - channel = kzalloc(sizeof(*channel), GFP_KERNEL); - if (!channel) - return NULL; - - channel->efx = efx; - channel->channel = i; - channel->type = &efx_default_channel_type; - - for (j = 0; j < EFX_TXQ_TYPES; j++) { - tx_queue = &channel->tx_queue[j]; - tx_queue->efx = efx; - tx_queue->queue = i * EFX_TXQ_TYPES + j; - tx_queue->channel = channel; - } - -#ifdef CONFIG_RFS_ACCEL - INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); -#endif - - rx_queue = &channel->rx_queue; - rx_queue->efx = efx; - timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); - - return channel; -} - -/* Allocate and initialise a channel structure, copying parameters - * (but not resources) from an old channel structure. - */ -static struct efx_channel * -efx_copy_channel(const struct efx_channel *old_channel) -{ - struct efx_channel *channel; - struct efx_rx_queue *rx_queue; - struct efx_tx_queue *tx_queue; - int j; - - channel = kmalloc(sizeof(*channel), GFP_KERNEL); - if (!channel) - return NULL; - - *channel = *old_channel; - - channel->napi_dev = NULL; - INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); - channel->napi_str.napi_id = 0; - channel->napi_str.state = 0; - memset(&channel->eventq, 0, sizeof(channel->eventq)); - - for (j = 0; j < EFX_TXQ_TYPES; j++) { - tx_queue = &channel->tx_queue[j]; - if (tx_queue->channel) - tx_queue->channel = channel; - tx_queue->buffer = NULL; - memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); - } - - rx_queue = &channel->rx_queue; - rx_queue->buffer = NULL; - memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); - timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); -#ifdef CONFIG_RFS_ACCEL - INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); -#endif - - return channel; -} - -static int efx_probe_channel(struct efx_channel *channel) -{ - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - int rc; - - netif_dbg(channel->efx, probe, channel->efx->net_dev, - "creating channel %d\n", channel->channel); - - rc = channel->type->pre_probe(channel); - if (rc) - goto fail; - - rc = efx_probe_eventq(channel); - if (rc) - goto fail; - - efx_for_each_channel_tx_queue(tx_queue, channel) { - rc = efx_probe_tx_queue(tx_queue); - if (rc) - goto fail; - } - - efx_for_each_channel_rx_queue(rx_queue, channel) { - rc = efx_probe_rx_queue(rx_queue); - if (rc) - goto fail; - } - - channel->rx_list = NULL; - - return 0; - -fail: - efx_remove_channel(channel); - return rc; -} - -static void -efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) -{ - struct efx_nic *efx = channel->efx; - const char *type; - int number; - - number = channel->channel; - - if (number >= efx->xdp_channel_offset && - !WARN_ON_ONCE(!efx->n_xdp_channels)) { - type = "-xdp"; - number -= efx->xdp_channel_offset; - } else if (efx->tx_channel_offset == 0) { - type = ""; - } else if (number < efx->tx_channel_offset) { - type = "-rx"; - } else { - type = "-tx"; - number -= efx->tx_channel_offset; - } - snprintf(buf, len, "%s%s-%d", efx->name, type, number); -} - -static void efx_set_channel_names(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - channel->type->get_name(channel, - efx->msi_context[channel->channel].name, - sizeof(efx->msi_context[0].name)); -} - -static int efx_probe_channels(struct efx_nic *efx) -{ - struct efx_channel *channel; - int rc; - - /* Restart special buffer allocation */ - efx->next_buffer_table = 0; - - /* Probe channels in reverse, so that any 'extra' channels - * use the start of the buffer table. This allows the traffic - * channels to be resized without moving them or wasting the - * entries before them. - */ - efx_for_each_channel_rev(channel, efx) { - rc = efx_probe_channel(channel); - if (rc) { - netif_err(efx, probe, efx->net_dev, - "failed to create channel %d\n", - channel->channel); - goto fail; - } - } - efx_set_channel_names(efx); - - return 0; - -fail: - efx_remove_channels(efx); - return rc; -} - -/* Channels are shutdown and reinitialised whilst the NIC is running - * to propagate configuration changes (mtu, checksum offload), or - * to clear hardware error conditions - */ -static void efx_start_datapath(struct efx_nic *efx) -{ - netdev_features_t old_features = efx->net_dev->features; - bool old_rx_scatter = efx->rx_scatter; - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - struct efx_channel *channel; - size_t rx_buf_len; - - /* Calculate the rx buffer allocation parameters required to - * support the current MTU, including padding for header - * alignment and overruns. - */ - efx->rx_dma_len = (efx->rx_prefix_size + - EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + - efx->type->rx_buffer_padding); - rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + - efx->rx_ip_align + efx->rx_dma_len); - if (rx_buf_len <= PAGE_SIZE) { - efx->rx_scatter = efx->type->always_rx_scatter; - efx->rx_buffer_order = 0; - } else if (efx->type->can_rx_scatter) { - BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); - BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + - 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, - EFX_RX_BUF_ALIGNMENT) > - PAGE_SIZE); - efx->rx_scatter = true; - efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; - efx->rx_buffer_order = 0; - } else { - efx->rx_scatter = false; - efx->rx_buffer_order = get_order(rx_buf_len); - } - - efx_rx_config_page_split(efx); - if (efx->rx_buffer_order) - netif_dbg(efx, drv, efx->net_dev, - "RX buf len=%u; page order=%u batch=%u\n", - efx->rx_dma_len, efx->rx_buffer_order, - efx->rx_pages_per_batch); - else - netif_dbg(efx, drv, efx->net_dev, - "RX buf len=%u step=%u bpp=%u; page batch=%u\n", - efx->rx_dma_len, efx->rx_page_buf_step, - efx->rx_bufs_per_page, efx->rx_pages_per_batch); - - /* Restore previously fixed features in hw_features and remove - * features which are fixed now - */ - efx->net_dev->hw_features |= efx->net_dev->features; - efx->net_dev->hw_features &= ~efx->fixed_features; - efx->net_dev->features |= efx->fixed_features; - if (efx->net_dev->features != old_features) - netdev_features_change(efx->net_dev); - - /* RX filters may also have scatter-enabled flags */ - if (efx->rx_scatter != old_rx_scatter) - efx->type->filter_update_rx_scatter(efx); - - /* We must keep at least one descriptor in a TX ring empty. - * We could avoid this when the queue size does not exactly - * match the hardware ring size, but it's not that important. - * Therefore we stop the queue when one more skb might fill - * the ring completely. We wake it when half way back to - * empty. - */ - efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); - efx->txq_wake_thresh = efx->txq_stop_thresh / 2; - - /* Initialise the channels */ - efx_for_each_channel(channel, efx) { - efx_for_each_channel_tx_queue(tx_queue, channel) { - efx_init_tx_queue(tx_queue); - atomic_inc(&efx->active_queues); - } - - efx_for_each_channel_rx_queue(rx_queue, channel) { - efx_init_rx_queue(rx_queue); - atomic_inc(&efx->active_queues); - efx_stop_eventq(channel); - efx_fast_push_rx_descriptors(rx_queue, false); - efx_start_eventq(channel); - } - - WARN_ON(channel->rx_pkt_n_frags); - } - - efx_ptp_start_datapath(efx); - - if (netif_device_present(efx->net_dev)) - netif_tx_wake_all_queues(efx->net_dev); -} - -static void efx_stop_datapath(struct efx_nic *efx) -{ - struct efx_channel *channel; - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - int rc; - - EFX_ASSERT_RESET_SERIALISED(efx); - BUG_ON(efx->port_enabled); - - efx_ptp_stop_datapath(efx); - - /* Stop RX refill */ - efx_for_each_channel(channel, efx) { - efx_for_each_channel_rx_queue(rx_queue, channel) - rx_queue->refill_enabled = false; - } - - efx_for_each_channel(channel, efx) { - /* RX packet processing is pipelined, so wait for the - * NAPI handler to complete. At least event queue 0 - * might be kept active by non-data events, so don't - * use napi_synchronize() but actually disable NAPI - * temporarily. - */ - if (efx_channel_has_rx_queue(channel)) { - efx_stop_eventq(channel); - efx_start_eventq(channel); - } - } - - rc = efx->type->fini_dmaq(efx); - if (rc) { - netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); - } else { - netif_dbg(efx, drv, efx->net_dev, - "successfully flushed all queues\n"); - } - - efx_for_each_channel(channel, efx) { - efx_for_each_channel_rx_queue(rx_queue, channel) - efx_fini_rx_queue(rx_queue); - efx_for_each_possible_channel_tx_queue(tx_queue, channel) - efx_fini_tx_queue(tx_queue); - } - efx->xdp_rxq_info_failed = false; -} - -static void efx_remove_channel(struct efx_channel *channel) -{ - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - - netif_dbg(channel->efx, drv, channel->efx->net_dev, - "destroy chan %d\n", channel->channel); - - efx_for_each_channel_rx_queue(rx_queue, channel) - efx_remove_rx_queue(rx_queue); - efx_for_each_possible_channel_tx_queue(tx_queue, channel) - efx_remove_tx_queue(tx_queue); - efx_remove_eventq(channel); - channel->type->post_remove(channel); -} - -static void efx_remove_channels(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - efx_remove_channel(channel); - - kfree(efx->xdp_tx_queues); -} - -int -efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) -{ - struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; - u32 old_rxq_entries, old_txq_entries; - unsigned i, next_buffer_table = 0; - int rc, rc2; - - rc = efx_check_disabled(efx); - if (rc) - return rc; - - /* Not all channels should be reallocated. We must avoid - * reallocating their buffer table entries. - */ - efx_for_each_channel(channel, efx) { - struct efx_rx_queue *rx_queue; - struct efx_tx_queue *tx_queue; - - if (channel->type->copy) - continue; - next_buffer_table = max(next_buffer_table, - channel->eventq.index + - channel->eventq.entries); - efx_for_each_channel_rx_queue(rx_queue, channel) - next_buffer_table = max(next_buffer_table, - rx_queue->rxd.index + - rx_queue->rxd.entries); - efx_for_each_channel_tx_queue(tx_queue, channel) - next_buffer_table = max(next_buffer_table, - tx_queue->txd.index + - tx_queue->txd.entries); - } - - efx_device_detach_sync(efx); - efx_stop_all(efx); - efx_soft_disable_interrupts(efx); - - /* Clone channels (where possible) */ - memset(other_channel, 0, sizeof(other_channel)); - for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - if (channel->type->copy) - channel = channel->type->copy(channel); - if (!channel) { - rc = -ENOMEM; - goto out; - } - other_channel[i] = channel; - } - - /* Swap entry counts and channel pointers */ - old_rxq_entries = efx->rxq_entries; - old_txq_entries = efx->txq_entries; - efx->rxq_entries = rxq_entries; - efx->txq_entries = txq_entries; - for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - efx->channel[i] = other_channel[i]; - other_channel[i] = channel; - } - - /* Restart buffer table allocation */ - efx->next_buffer_table = next_buffer_table; - - for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - if (!channel->type->copy) - continue; - rc = efx_probe_channel(channel); - if (rc) - goto rollback; - efx_init_napi_channel(efx->channel[i]); - } - -out: - /* Destroy unused channel structures */ - for (i = 0; i < efx->n_channels; i++) { - channel = other_channel[i]; - if (channel && channel->type->copy) { - efx_fini_napi_channel(channel); - efx_remove_channel(channel); - kfree(channel); - } - } - - rc2 = efx_soft_enable_interrupts(efx); - if (rc2) { - rc = rc ? rc : rc2; - netif_err(efx, drv, efx->net_dev, - "unable to restart interrupts on channel reallocation\n"); - efx_schedule_reset(efx, RESET_TYPE_DISABLE); - } else { - efx_start_all(efx); - efx_device_attach_if_not_resetting(efx); - } - return rc; - -rollback: - /* Swap back */ - efx->rxq_entries = old_rxq_entries; - efx->txq_entries = old_txq_entries; - for (i = 0; i < efx->n_channels; i++) { - channel = efx->channel[i]; - efx->channel[i] = other_channel[i]; - other_channel[i] = channel; - } - goto out; -} - -void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) -{ - mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); -} - -static bool efx_default_channel_want_txqs(struct efx_channel *channel) -{ - return channel->channel - channel->efx->tx_channel_offset < - channel->efx->n_tx_channels; -} - -static const struct efx_channel_type efx_default_channel_type = { - .pre_probe = efx_channel_dummy_op_int, - .post_remove = efx_channel_dummy_op_void, - .get_name = efx_get_channel_name, - .copy = efx_copy_channel, - .want_txqs = efx_default_channel_want_txqs, - .keep_eventq = false, - .want_pio = true, -}; - -int efx_channel_dummy_op_int(struct efx_channel *channel) -{ - return 0; -} - -void efx_channel_dummy_op_void(struct efx_channel *channel) -{ -} - /************************************************************************** * * Port handling * **************************************************************************/ -/* This ensures that the kernel is kept informed (via - * netif_carrier_on/off) of the link status, and also maintains the - * link status's stop on the port's TX queue. - */ -void efx_link_status_changed(struct efx_nic *efx) -{ - struct efx_link_state *link_state = &efx->link_state; - - /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure - * that no events are triggered between unregister_netdev() and the - * driver unloading. A more general condition is that NETDEV_CHANGE - * can only be generated between NETDEV_UP and NETDEV_DOWN */ - if (!netif_running(efx->net_dev)) - return; - - if (link_state->up != netif_carrier_ok(efx->net_dev)) { - efx->n_link_state_changes++; - - if (link_state->up) - netif_carrier_on(efx->net_dev); - else - netif_carrier_off(efx->net_dev); - } - - /* Status message for kernel log */ - if (link_state->up) - netif_info(efx, link, efx->net_dev, - "link up at %uMbps %s-duplex (MTU %d)\n", - link_state->speed, link_state->fd ? "full" : "half", - efx->net_dev->mtu); - else - netif_info(efx, link, efx->net_dev, "link down\n"); -} - -void efx_link_set_advertising(struct efx_nic *efx, - const unsigned long *advertising) -{ - memcpy(efx->link_advertising, advertising, - sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); - - efx->link_advertising[0] |= ADVERTISED_Autoneg; - if (advertising[0] & ADVERTISED_Pause) - efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); - else - efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); - if (advertising[0] & ADVERTISED_Asym_Pause) - efx->wanted_fc ^= EFX_FC_TX; -} - /* Equivalent to efx_link_set_advertising with all-zeroes, except does not * force the Autoneg bit on. */ @@ -1035,73 +160,6 @@ void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) static void efx_fini_port(struct efx_nic *efx); -/* We assume that efx->type->reconfigure_mac will always try to sync RX - * filters and therefore needs to read-lock the filter table against freeing - */ -void efx_mac_reconfigure(struct efx_nic *efx) -{ - down_read(&efx->filter_sem); - efx->type->reconfigure_mac(efx); - up_read(&efx->filter_sem); -} - -/* Push loopback/power/transmit disable settings to the PHY, and reconfigure - * the MAC appropriately. All other PHY configuration changes are pushed - * through phy_op->set_settings(), and pushed asynchronously to the MAC - * through efx_monitor(). - * - * Callers must hold the mac_lock - */ -int __efx_reconfigure_port(struct efx_nic *efx) -{ - enum efx_phy_mode phy_mode; - int rc; - - WARN_ON(!mutex_is_locked(&efx->mac_lock)); - - /* Disable PHY transmit in mac level loopbacks */ - phy_mode = efx->phy_mode; - if (LOOPBACK_INTERNAL(efx)) - efx->phy_mode |= PHY_MODE_TX_DISABLED; - else - efx->phy_mode &= ~PHY_MODE_TX_DISABLED; - - rc = efx->type->reconfigure_port(efx); - - if (rc) - efx->phy_mode = phy_mode; - - return rc; -} - -/* Reinitialise the MAC to pick up new PHY settings, even if the port is - * disabled. */ -int efx_reconfigure_port(struct efx_nic *efx) -{ - int rc; - - EFX_ASSERT_RESET_SERIALISED(efx); - - mutex_lock(&efx->mac_lock); - rc = __efx_reconfigure_port(efx); - mutex_unlock(&efx->mac_lock); - - return rc; -} - -/* Asynchronous work item for changing MAC promiscuity and multicast - * hash. Avoid a drain/rx_ingress enable by reconfiguring the current - * MAC directly. */ -static void efx_mac_work(struct work_struct *data) -{ - struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); - - mutex_lock(&efx->mac_lock); - if (efx->port_enabled) - efx_mac_reconfigure(efx); - mutex_unlock(&efx->mac_lock); -} - static int efx_probe_port(struct efx_nic *efx) { int rc; @@ -1155,44 +213,6 @@ fail1: return rc; } -static void efx_start_port(struct efx_nic *efx) -{ - netif_dbg(efx, ifup, efx->net_dev, "start port\n"); - BUG_ON(efx->port_enabled); - - mutex_lock(&efx->mac_lock); - efx->port_enabled = true; - - /* Ensure MAC ingress/egress is enabled */ - efx_mac_reconfigure(efx); - - mutex_unlock(&efx->mac_lock); -} - -/* Cancel work for MAC reconfiguration, periodic hardware monitoring - * and the async self-test, wait for them to finish and prevent them - * being scheduled again. This doesn't cover online resets, which - * should only be cancelled when removing the device. - */ -static void efx_stop_port(struct efx_nic *efx) -{ - netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); - - EFX_ASSERT_RESET_SERIALISED(efx); - - mutex_lock(&efx->mac_lock); - efx->port_enabled = false; - mutex_unlock(&efx->mac_lock); - - /* Serialise against efx_set_multicast_list() */ - netif_addr_lock_bh(efx->net_dev); - netif_addr_unlock_bh(efx->net_dev); - - cancel_delayed_work_sync(&efx->monitor_work); - efx_selftest_async_cancel(efx); - cancel_work_sync(&efx->mac_work); -} - static void efx_fini_port(struct efx_nic *efx) { netif_dbg(efx, drv, efx->net_dev, "shut down port\n"); @@ -1291,582 +311,6 @@ static void efx_dissociate(struct efx_nic *efx) } } -/* This configures the PCI device to enable I/O and DMA. */ -static int efx_init_io(struct efx_nic *efx) -{ - struct pci_dev *pci_dev = efx->pci_dev; - dma_addr_t dma_mask = efx->type->max_dma_mask; - unsigned int mem_map_size = efx->type->mem_map_size(efx); - int rc, bar; - - netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); - - bar = efx->type->mem_bar(efx); - - rc = pci_enable_device(pci_dev); - if (rc) { - netif_err(efx, probe, efx->net_dev, - "failed to enable PCI device\n"); - goto fail1; - } - - pci_set_master(pci_dev); - - /* Set the PCI DMA mask. Try all possibilities from our genuine mask - * down to 32 bits, because some architectures will allow 40 bit - * masks event though they reject 46 bit masks. - */ - while (dma_mask > 0x7fffffffUL) { - rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); - if (rc == 0) - break; - dma_mask >>= 1; - } - if (rc) { - netif_err(efx, probe, efx->net_dev, - "could not find a suitable DMA mask\n"); - goto fail2; - } - netif_dbg(efx, probe, efx->net_dev, - "using DMA mask %llx\n", (unsigned long long) dma_mask); - - efx->membase_phys = pci_resource_start(efx->pci_dev, bar); - rc = pci_request_region(pci_dev, bar, "sfc"); - if (rc) { - netif_err(efx, probe, efx->net_dev, - "request for memory BAR failed\n"); - rc = -EIO; - goto fail3; - } - efx->membase = ioremap(efx->membase_phys, mem_map_size); - if (!efx->membase) { - netif_err(efx, probe, efx->net_dev, - "could not map memory BAR at %llx+%x\n", - (unsigned long long)efx->membase_phys, mem_map_size); - rc = -ENOMEM; - goto fail4; - } - netif_dbg(efx, probe, efx->net_dev, - "memory BAR at %llx+%x (virtual %p)\n", - (unsigned long long)efx->membase_phys, mem_map_size, - efx->membase); - - return 0; - - fail4: - pci_release_region(efx->pci_dev, bar); - fail3: - efx->membase_phys = 0; - fail2: - pci_disable_device(efx->pci_dev); - fail1: - return rc; -} - -static void efx_fini_io(struct efx_nic *efx) -{ - int bar; - - netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); - - if (efx->membase) { - iounmap(efx->membase); - efx->membase = NULL; - } - - if (efx->membase_phys) { - bar = efx->type->mem_bar(efx); - pci_release_region(efx->pci_dev, bar); - efx->membase_phys = 0; - } - - /* Don't disable bus-mastering if VFs are assigned */ - if (!pci_vfs_assigned(efx->pci_dev)) - pci_disable_device(efx->pci_dev); -} - -void efx_set_default_rx_indir_table(struct efx_nic *efx, - struct efx_rss_context *ctx) -{ - size_t i; - - for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) - ctx->rx_indir_table[i] = - ethtool_rxfh_indir_default(i, efx->rss_spread); -} - -static unsigned int efx_wanted_parallelism(struct efx_nic *efx) -{ - cpumask_var_t thread_mask; - unsigned int count; - int cpu; - - if (rss_cpus) { - count = rss_cpus; - } else { - if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { - netif_warn(efx, probe, efx->net_dev, - "RSS disabled due to allocation failure\n"); - return 1; - } - - count = 0; - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, thread_mask)) { - ++count; - cpumask_or(thread_mask, thread_mask, - topology_sibling_cpumask(cpu)); - } - } - - free_cpumask_var(thread_mask); - } - - if (count > EFX_MAX_RX_QUEUES) { - netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, - "Reducing number of rx queues from %u to %u.\n", - count, EFX_MAX_RX_QUEUES); - count = EFX_MAX_RX_QUEUES; - } - - /* If RSS is requested for the PF *and* VFs then we can't write RSS - * table entries that are inaccessible to VFs - */ -#ifdef CONFIG_SFC_SRIOV - if (efx->type->sriov_wanted) { - if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && - count > efx_vf_size(efx)) { - netif_warn(efx, probe, efx->net_dev, - "Reducing number of RSS channels from %u to %u for " - "VF support. Increase vf-msix-limit to use more " - "channels on the PF.\n", - count, efx_vf_size(efx)); - count = efx_vf_size(efx); - } - } -#endif - - return count; -} - -static int efx_allocate_msix_channels(struct efx_nic *efx, - unsigned int max_channels, - unsigned int extra_channels, - unsigned int parallelism) -{ - unsigned int n_channels = parallelism; - int vec_count; - int n_xdp_tx; - int n_xdp_ev; - - if (efx_separate_tx_channels) - n_channels *= 2; - n_channels += extra_channels; - - /* To allow XDP transmit to happen from arbitrary NAPI contexts - * we allocate a TX queue per CPU. We share event queues across - * multiple tx queues, assuming tx and ev queues are both - * maximum size. - */ - - n_xdp_tx = num_possible_cpus(); - n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); - - vec_count = pci_msix_vec_count(efx->pci_dev); - if (vec_count < 0) - return vec_count; - - max_channels = min_t(unsigned int, vec_count, max_channels); - - /* Check resources. - * We need a channel per event queue, plus a VI per tx queue. - * This may be more pessimistic than it needs to be. - */ - if (n_channels + n_xdp_ev > max_channels) { - netif_err(efx, drv, efx->net_dev, - "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", - n_xdp_ev, n_channels, max_channels); - efx->n_xdp_channels = 0; - efx->xdp_tx_per_channel = 0; - efx->xdp_tx_queue_count = 0; - } else { - efx->n_xdp_channels = n_xdp_ev; - efx->xdp_tx_per_channel = EFX_TXQ_TYPES; - efx->xdp_tx_queue_count = n_xdp_tx; - n_channels += n_xdp_ev; - netif_dbg(efx, drv, efx->net_dev, - "Allocating %d TX and %d event queues for XDP\n", - n_xdp_tx, n_xdp_ev); - } - - if (vec_count < n_channels) { - netif_err(efx, drv, efx->net_dev, - "WARNING: Insufficient MSI-X vectors available (%d < %u).\n", - vec_count, n_channels); - netif_err(efx, drv, efx->net_dev, - "WARNING: Performance may be reduced.\n"); - n_channels = vec_count; - } - - n_channels = min(n_channels, max_channels); - - efx->n_channels = n_channels; - - /* Ignore XDP tx channels when creating rx channels. */ - n_channels -= efx->n_xdp_channels; - - if (efx_separate_tx_channels) { - efx->n_tx_channels = - min(max(n_channels / 2, 1U), - efx->max_tx_channels); - efx->tx_channel_offset = - n_channels - efx->n_tx_channels; - efx->n_rx_channels = - max(n_channels - - efx->n_tx_channels, 1U); - } else { - efx->n_tx_channels = min(n_channels, efx->max_tx_channels); - efx->tx_channel_offset = 0; - efx->n_rx_channels = n_channels; - } - - efx->n_rx_channels = min(efx->n_rx_channels, parallelism); - efx->n_tx_channels = min(efx->n_tx_channels, parallelism); - - efx->xdp_channel_offset = n_channels; - - netif_dbg(efx, drv, efx->net_dev, - "Allocating %u RX channels\n", - efx->n_rx_channels); - - return efx->n_channels; -} - -/* Probe the number and type of interrupts we are able to obtain, and - * the resulting numbers of channels and RX queues. - */ -static int efx_probe_interrupts(struct efx_nic *efx) -{ - unsigned int extra_channels = 0; - unsigned int rss_spread; - unsigned int i, j; - int rc; - - for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) - if (efx->extra_channel_type[i]) - ++extra_channels; - - if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { - unsigned int parallelism = efx_wanted_parallelism(efx); - struct msix_entry xentries[EFX_MAX_CHANNELS]; - unsigned int n_channels; - - rc = efx_allocate_msix_channels(efx, efx->max_channels, - extra_channels, parallelism); - if (rc >= 0) { - n_channels = rc; - for (i = 0; i < n_channels; i++) - xentries[i].entry = i; - rc = pci_enable_msix_range(efx->pci_dev, xentries, 1, - n_channels); - } - if (rc < 0) { - /* Fall back to single channel MSI */ - netif_err(efx, drv, efx->net_dev, - "could not enable MSI-X\n"); - if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI) - efx->interrupt_mode = EFX_INT_MODE_MSI; - else - return rc; - } else if (rc < n_channels) { - netif_err(efx, drv, efx->net_dev, - "WARNING: Insufficient MSI-X vectors" - " available (%d < %u).\n", rc, n_channels); - netif_err(efx, drv, efx->net_dev, - "WARNING: Performance may be reduced.\n"); - n_channels = rc; - } - - if (rc > 0) { - for (i = 0; i < efx->n_channels; i++) - efx_get_channel(efx, i)->irq = - xentries[i].vector; - } - } - - /* Try single interrupt MSI */ - if (efx->interrupt_mode == EFX_INT_MODE_MSI) { - efx->n_channels = 1; - efx->n_rx_channels = 1; - efx->n_tx_channels = 1; - efx->n_xdp_channels = 0; - efx->xdp_channel_offset = efx->n_channels; - rc = pci_enable_msi(efx->pci_dev); - if (rc == 0) { - efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; - } else { - netif_err(efx, drv, efx->net_dev, - "could not enable MSI\n"); - if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY) - efx->interrupt_mode = EFX_INT_MODE_LEGACY; - else - return rc; - } - } - - /* Assume legacy interrupts */ - if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { - efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); - efx->n_rx_channels = 1; - efx->n_tx_channels = 1; - efx->n_xdp_channels = 0; - efx->xdp_channel_offset = efx->n_channels; - efx->legacy_irq = efx->pci_dev->irq; - } - - /* Assign extra channels if possible, before XDP channels */ - efx->n_extra_tx_channels = 0; - j = efx->xdp_channel_offset; - for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { - if (!efx->extra_channel_type[i]) - continue; - if (j <= efx->tx_channel_offset + efx->n_tx_channels) { - efx->extra_channel_type[i]->handle_no_channel(efx); - } else { - --j; - efx_get_channel(efx, j)->type = - efx->extra_channel_type[i]; - if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) - efx->n_extra_tx_channels++; - } - } - - rss_spread = efx->n_rx_channels; - /* RSS might be usable on VFs even if it is disabled on the PF */ -#ifdef CONFIG_SFC_SRIOV - if (efx->type->sriov_wanted) { - efx->rss_spread = ((rss_spread > 1 || - !efx->type->sriov_wanted(efx)) ? - rss_spread : efx_vf_size(efx)); - return 0; - } -#endif - efx->rss_spread = rss_spread; - - return 0; -} - -#if defined(CONFIG_SMP) -static void efx_set_interrupt_affinity(struct efx_nic *efx) -{ - struct efx_channel *channel; - unsigned int cpu; - - efx_for_each_channel(channel, efx) { - cpu = cpumask_local_spread(channel->channel, - pcibus_to_node(efx->pci_dev->bus)); - irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); - } -} - -static void efx_clear_interrupt_affinity(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - irq_set_affinity_hint(channel->irq, NULL); -} -#else -static void -efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) -{ -} - -static void -efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) -{ -} -#endif /* CONFIG_SMP */ - -static int efx_soft_enable_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel, *end_channel; - int rc; - - BUG_ON(efx->state == STATE_DISABLED); - - efx->irq_soft_enabled = true; - smp_wmb(); - - efx_for_each_channel(channel, efx) { - if (!channel->type->keep_eventq) { - rc = efx_init_eventq(channel); - if (rc) - goto fail; - } - efx_start_eventq(channel); - } - - efx_mcdi_mode_event(efx); - - return 0; -fail: - end_channel = channel; - efx_for_each_channel(channel, efx) { - if (channel == end_channel) - break; - efx_stop_eventq(channel); - if (!channel->type->keep_eventq) - efx_fini_eventq(channel); - } - - return rc; -} - -static void efx_soft_disable_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel; - - if (efx->state == STATE_DISABLED) - return; - - efx_mcdi_mode_poll(efx); - - efx->irq_soft_enabled = false; - smp_wmb(); - - if (efx->legacy_irq) - synchronize_irq(efx->legacy_irq); - - efx_for_each_channel(channel, efx) { - if (channel->irq) - synchronize_irq(channel->irq); - - efx_stop_eventq(channel); - if (!channel->type->keep_eventq) - efx_fini_eventq(channel); - } - - /* Flush the asynchronous MCDI request queue */ - efx_mcdi_flush_async(efx); -} - -static int efx_enable_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel, *end_channel; - int rc; - - BUG_ON(efx->state == STATE_DISABLED); - - if (efx->eeh_disabled_legacy_irq) { - enable_irq(efx->legacy_irq); - efx->eeh_disabled_legacy_irq = false; - } - - efx->type->irq_enable_master(efx); - - efx_for_each_channel(channel, efx) { - if (channel->type->keep_eventq) { - rc = efx_init_eventq(channel); - if (rc) - goto fail; - } - } - - rc = efx_soft_enable_interrupts(efx); - if (rc) - goto fail; - - return 0; - -fail: - end_channel = channel; - efx_for_each_channel(channel, efx) { - if (channel == end_channel) - break; - if (channel->type->keep_eventq) - efx_fini_eventq(channel); - } - - efx->type->irq_disable_non_ev(efx); - - return rc; -} - -static void efx_disable_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_soft_disable_interrupts(efx); - - efx_for_each_channel(channel, efx) { - if (channel->type->keep_eventq) - efx_fini_eventq(channel); - } - - efx->type->irq_disable_non_ev(efx); -} - -static void efx_remove_interrupts(struct efx_nic *efx) -{ - struct efx_channel *channel; - - /* Remove MSI/MSI-X interrupts */ - efx_for_each_channel(channel, efx) - channel->irq = 0; - pci_disable_msi(efx->pci_dev); - pci_disable_msix(efx->pci_dev); - - /* Remove legacy interrupt */ - efx->legacy_irq = 0; -} - -static int efx_set_channels(struct efx_nic *efx) -{ - struct efx_channel *channel; - struct efx_tx_queue *tx_queue; - int xdp_queue_number; - - efx->tx_channel_offset = - efx_separate_tx_channels ? - efx->n_channels - efx->n_tx_channels : 0; - - if (efx->xdp_tx_queue_count) { - EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); - - /* Allocate array for XDP TX queue lookup. */ - efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count, - sizeof(*efx->xdp_tx_queues), - GFP_KERNEL); - if (!efx->xdp_tx_queues) - return -ENOMEM; - } - - /* We need to mark which channels really have RX and TX - * queues, and adjust the TX queue numbers if we have separate - * RX-only and TX-only channels. - */ - xdp_queue_number = 0; - efx_for_each_channel(channel, efx) { - if (channel->channel < efx->n_rx_channels) - channel->rx_queue.core_index = channel->channel; - else - channel->rx_queue.core_index = -1; - - efx_for_each_channel_tx_queue(tx_queue, channel) { - tx_queue->queue -= (efx->tx_channel_offset * - EFX_TXQ_TYPES); - - if (efx_channel_is_xdp_tx(channel) && - xdp_queue_number < efx->xdp_tx_queue_count) { - efx->xdp_tx_queues[xdp_queue_number] = tx_queue; - xdp_queue_number++; - } - } - } - return 0; -} - static int efx_probe_nic(struct efx_nic *efx) { int rc; @@ -1939,70 +383,6 @@ static void efx_remove_nic(struct efx_nic *efx) efx->type->remove(efx); } -static int efx_probe_filters(struct efx_nic *efx) -{ - int rc; - - init_rwsem(&efx->filter_sem); - mutex_lock(&efx->mac_lock); - down_write(&efx->filter_sem); - rc = efx->type->filter_table_probe(efx); - if (rc) - goto out_unlock; - -#ifdef CONFIG_RFS_ACCEL - if (efx->type->offload_features & NETIF_F_NTUPLE) { - struct efx_channel *channel; - int i, success = 1; - - efx_for_each_channel(channel, efx) { - channel->rps_flow_id = - kcalloc(efx->type->max_rx_ip_filters, - sizeof(*channel->rps_flow_id), - GFP_KERNEL); - if (!channel->rps_flow_id) - success = 0; - else - for (i = 0; - i < efx->type->max_rx_ip_filters; - ++i) - channel->rps_flow_id[i] = - RPS_FLOW_ID_INVALID; - channel->rfs_expire_index = 0; - channel->rfs_filter_count = 0; - } - - if (!success) { - efx_for_each_channel(channel, efx) - kfree(channel->rps_flow_id); - efx->type->filter_table_remove(efx); - rc = -ENOMEM; - goto out_unlock; - } - } -#endif -out_unlock: - up_write(&efx->filter_sem); - mutex_unlock(&efx->mac_lock); - return rc; -} - -static void efx_remove_filters(struct efx_nic *efx) -{ -#ifdef CONFIG_RFS_ACCEL - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) { - cancel_delayed_work_sync(&channel->filter_work); - kfree(channel->rps_flow_id); - } -#endif - down_write(&efx->filter_sem); - efx->type->filter_table_remove(efx); - up_write(&efx->filter_sem); -} - - /************************************************************************** * * NIC startup/shutdown @@ -2067,81 +447,6 @@ static int efx_probe_all(struct efx_nic *efx) return rc; } -/* If the interface is supposed to be running but is not, start - * the hardware and software data path, regular activity for the port - * (MAC statistics, link polling, etc.) and schedule the port to be - * reconfigured. Interrupts must already be enabled. This function - * is safe to call multiple times, so long as the NIC is not disabled. - * Requires the RTNL lock. - */ -static void efx_start_all(struct efx_nic *efx) -{ - EFX_ASSERT_RESET_SERIALISED(efx); - BUG_ON(efx->state == STATE_DISABLED); - - /* Check that it is appropriate to restart the interface. All - * of these flags are safe to read under just the rtnl lock */ - if (efx->port_enabled || !netif_running(efx->net_dev) || - efx->reset_pending) - return; - - efx_start_port(efx); - efx_start_datapath(efx); - - /* Start the hardware monitor if there is one */ - if (efx->type->monitor != NULL) - queue_delayed_work(efx->workqueue, &efx->monitor_work, - efx_monitor_interval); - - /* Link state detection is normally event-driven; we have - * to poll now because we could have missed a change - */ - mutex_lock(&efx->mac_lock); - if (efx->phy_op->poll(efx)) - efx_link_status_changed(efx); - mutex_unlock(&efx->mac_lock); - - efx->type->start_stats(efx); - efx->type->pull_stats(efx); - spin_lock_bh(&efx->stats_lock); - efx->type->update_stats(efx, NULL, NULL); - spin_unlock_bh(&efx->stats_lock); -} - -/* Quiesce the hardware and software data path, and regular activity - * for the port without bringing the link down. Safe to call multiple - * times with the NIC in almost any state, but interrupts should be - * enabled. Requires the RTNL lock. - */ -static void efx_stop_all(struct efx_nic *efx) -{ - EFX_ASSERT_RESET_SERIALISED(efx); - - /* port_enabled can be read safely under the rtnl lock */ - if (!efx->port_enabled) - return; - - /* update stats before we go down so we can accurately count - * rx_nodesc_drops - */ - efx->type->pull_stats(efx); - spin_lock_bh(&efx->stats_lock); - efx->type->update_stats(efx, NULL, NULL); - spin_unlock_bh(&efx->stats_lock); - efx->type->stop_stats(efx); - efx_stop_port(efx); - - /* Stop the kernel transmit interface. This is only valid if - * the device is stopped or detached; otherwise the watchdog - * may fire immediately. - */ - WARN_ON(netif_running(efx->net_dev) && - netif_device_present(efx->net_dev)); - netif_tx_disable(efx->net_dev); - - efx_stop_datapath(efx); -} - static void efx_remove_all(struct efx_nic *efx) { rtnl_lock(); @@ -2237,36 +542,6 @@ void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, /************************************************************************** * - * Hardware monitor - * - **************************************************************************/ - -/* Run periodically off the general workqueue */ -static void efx_monitor(struct work_struct *data) -{ - struct efx_nic *efx = container_of(data, struct efx_nic, - monitor_work.work); - - netif_vdbg(efx, timer, efx->net_dev, - "hardware monitor executing on CPU %d\n", - raw_smp_processor_id()); - BUG_ON(efx->type->monitor == NULL); - - /* If the mac_lock is already held then it is likely a port - * reconfiguration is already in place, which will likely do - * most of the work of monitor() anyway. */ - if (mutex_trylock(&efx->mac_lock)) { - if (efx->port_enabled) - efx->type->monitor(efx); - mutex_unlock(&efx->mac_lock); - } - - queue_delayed_work(efx->workqueue, &efx->monitor_work, - efx_monitor_interval); -} - -/************************************************************************** - * * ioctls * *************************************************************************/ @@ -2294,45 +569,6 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) /************************************************************************** * - * NAPI interface - * - **************************************************************************/ - -static void efx_init_napi_channel(struct efx_channel *channel) -{ - struct efx_nic *efx = channel->efx; - - channel->napi_dev = efx->net_dev; - netif_napi_add(channel->napi_dev, &channel->napi_str, - efx_poll, napi_weight); -} - -static void efx_init_napi(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - efx_init_napi_channel(channel); -} - -static void efx_fini_napi_channel(struct efx_channel *channel) -{ - if (channel->napi_dev) - netif_napi_del(&channel->napi_str); - - channel->napi_dev = NULL; -} - -static void efx_fini_napi(struct efx_nic *efx) -{ - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - efx_fini_napi_channel(channel); -} - -/************************************************************************** - * * Kernel net device interface * *************************************************************************/ @@ -2382,19 +618,8 @@ int efx_net_stop(struct net_device *net_dev) return 0; } -/* Context: process, dev_base_lock or RTNL held, non-blocking. */ -static void efx_net_stats(struct net_device *net_dev, - struct rtnl_link_stats64 *stats) -{ - struct efx_nic *efx = netdev_priv(net_dev); - - spin_lock_bh(&efx->stats_lock); - efx->type->update_stats(efx, NULL, stats); - spin_unlock_bh(&efx->stats_lock); -} - /* Context: netif_tx_lock held, BHs disabled. */ -static void efx_watchdog(struct net_device *net_dev) +static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) { struct efx_nic *efx = netdev_priv(net_dev); @@ -2405,51 +630,6 @@ static void efx_watchdog(struct net_device *net_dev) efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); } -static unsigned int efx_xdp_max_mtu(struct efx_nic *efx) -{ - /* The maximum MTU that we can fit in a single page, allowing for - * framing, overhead and XDP headroom. - */ - int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + - efx->rx_prefix_size + efx->type->rx_buffer_padding + - efx->rx_ip_align + XDP_PACKET_HEADROOM; - - return PAGE_SIZE - overhead; -} - -/* Context: process, rtnl_lock() held. */ -static int efx_change_mtu(struct net_device *net_dev, int new_mtu) -{ - struct efx_nic *efx = netdev_priv(net_dev); - int rc; - - rc = efx_check_disabled(efx); - if (rc) - return rc; - - if (rtnl_dereference(efx->xdp_prog) && - new_mtu > efx_xdp_max_mtu(efx)) { - netif_err(efx, drv, efx->net_dev, - "Requested MTU of %d too big for XDP (max: %d)\n", - new_mtu, efx_xdp_max_mtu(efx)); - return -EINVAL; - } - - netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); - - efx_device_detach_sync(efx); - efx_stop_all(efx); - - mutex_lock(&efx->mac_lock); - net_dev->mtu = new_mtu; - efx_mac_reconfigure(efx); - mutex_unlock(&efx->mac_lock); - - efx_start_all(efx); - efx_device_attach_if_not_resetting(efx); - return 0; -} - static int efx_set_mac_address(struct net_device *net_dev, void *data) { struct efx_nic *efx = netdev_priv(net_dev); @@ -2726,28 +906,6 @@ show_phy_type(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL); -#ifdef CONFIG_SFC_MCDI_LOGGING -static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct efx_nic *efx = dev_get_drvdata(dev); - struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - - return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); -} -static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct efx_nic *efx = dev_get_drvdata(dev); - struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - bool enable = count > 0 && *buf != '0'; - - mcdi->logging_enabled = enable; - return count; -} -static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log); -#endif - static int efx_register_netdev(struct efx_nic *efx) { struct net_device *net_dev = efx->net_dev; @@ -2807,21 +965,11 @@ static int efx_register_netdev(struct efx_nic *efx) "failed to init net dev attributes\n"); goto fail_registered; } -#ifdef CONFIG_SFC_MCDI_LOGGING - rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); - if (rc) { - netif_err(efx, drv, efx->net_dev, - "failed to init net dev attributes\n"); - goto fail_attr_mcdi_logging; - } -#endif + + efx_init_mcdi_logging(efx); return 0; -#ifdef CONFIG_SFC_MCDI_LOGGING -fail_attr_mcdi_logging: - device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); -#endif fail_registered: rtnl_lock(); efx_dissociate(efx); @@ -2842,9 +990,7 @@ static void efx_unregister_netdev(struct efx_nic *efx) if (efx_dev_registered(efx)) { strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); -#ifdef CONFIG_SFC_MCDI_LOGGING - device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); -#endif + efx_fini_mcdi_logging(efx); device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); unregister_netdev(efx->net_dev); } @@ -2852,292 +998,6 @@ static void efx_unregister_netdev(struct efx_nic *efx) /************************************************************************** * - * Device reset and suspend - * - **************************************************************************/ - -/* Tears down the entire software state and most of the hardware state - * before reset. */ -void efx_reset_down(struct efx_nic *efx, enum reset_type method) -{ - EFX_ASSERT_RESET_SERIALISED(efx); - - if (method == RESET_TYPE_MCDI_TIMEOUT) - efx->type->prepare_flr(efx); - - efx_stop_all(efx); - efx_disable_interrupts(efx); - - mutex_lock(&efx->mac_lock); - down_write(&efx->filter_sem); - mutex_lock(&efx->rss_lock); - if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && - method != RESET_TYPE_DATAPATH) - efx->phy_op->fini(efx); - efx->type->fini(efx); -} - -/* This function will always ensure that the locks acquired in - * efx_reset_down() are released. A failure return code indicates - * that we were unable to reinitialise the hardware, and the - * driver should be disabled. If ok is false, then the rx and tx - * engines are not restarted, pending a RESET_DISABLE. */ -int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) -{ - int rc; - - EFX_ASSERT_RESET_SERIALISED(efx); - - if (method == RESET_TYPE_MCDI_TIMEOUT) - efx->type->finish_flr(efx); - - /* Ensure that SRAM is initialised even if we're disabling the device */ - rc = efx->type->init(efx); - if (rc) { - netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); - goto fail; - } - - if (!ok) - goto fail; - - if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && - method != RESET_TYPE_DATAPATH) { - rc = efx->phy_op->init(efx); - if (rc) - goto fail; - rc = efx->phy_op->reconfigure(efx); - if (rc && rc != -EPERM) - netif_err(efx, drv, efx->net_dev, - "could not restore PHY settings\n"); - } - - rc = efx_enable_interrupts(efx); - if (rc) - goto fail; - -#ifdef CONFIG_SFC_SRIOV - rc = efx->type->vswitching_restore(efx); - if (rc) /* not fatal; the PF will still work fine */ - netif_warn(efx, probe, efx->net_dev, - "failed to restore vswitching rc=%d;" - " VFs may not function\n", rc); -#endif - - if (efx->type->rx_restore_rss_contexts) - efx->type->rx_restore_rss_contexts(efx); - mutex_unlock(&efx->rss_lock); - efx->type->filter_table_restore(efx); - up_write(&efx->filter_sem); - if (efx->type->sriov_reset) - efx->type->sriov_reset(efx); - - mutex_unlock(&efx->mac_lock); - - efx_start_all(efx); - - if (efx->type->udp_tnl_push_ports) - efx->type->udp_tnl_push_ports(efx); - - return 0; - -fail: - efx->port_initialized = false; - - mutex_unlock(&efx->rss_lock); - up_write(&efx->filter_sem); - mutex_unlock(&efx->mac_lock); - - return rc; -} - -/* Reset the NIC using the specified method. Note that the reset may - * fail, in which case the card will be left in an unusable state. - * - * Caller must hold the rtnl_lock. - */ -int efx_reset(struct efx_nic *efx, enum reset_type method) -{ - int rc, rc2; - bool disabled; - - netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", - RESET_TYPE(method)); - - efx_device_detach_sync(efx); - efx_reset_down(efx, method); - - rc = efx->type->reset(efx, method); - if (rc) { - netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); - goto out; - } - - /* Clear flags for the scopes we covered. We assume the NIC and - * driver are now quiescent so that there is no race here. - */ - if (method < RESET_TYPE_MAX_METHOD) - efx->reset_pending &= -(1 << (method + 1)); - else /* it doesn't fit into the well-ordered scope hierarchy */ - __clear_bit(method, &efx->reset_pending); - - /* Reinitialise bus-mastering, which may have been turned off before - * the reset was scheduled. This is still appropriate, even in the - * RESET_TYPE_DISABLE since this driver generally assumes the hardware - * can respond to requests. */ - pci_set_master(efx->pci_dev); - -out: - /* Leave device stopped if necessary */ - disabled = rc || - method == RESET_TYPE_DISABLE || - method == RESET_TYPE_RECOVER_OR_DISABLE; - rc2 = efx_reset_up(efx, method, !disabled); - if (rc2) { - disabled = true; - if (!rc) - rc = rc2; - } - - if (disabled) { - dev_close(efx->net_dev); - netif_err(efx, drv, efx->net_dev, "has been disabled\n"); - efx->state = STATE_DISABLED; - } else { - netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); - efx_device_attach_if_not_resetting(efx); - } - return rc; -} - -/* Try recovery mechanisms. - * For now only EEH is supported. - * Returns 0 if the recovery mechanisms are unsuccessful. - * Returns a non-zero value otherwise. - */ -int efx_try_recovery(struct efx_nic *efx) -{ -#ifdef CONFIG_EEH - /* A PCI error can occur and not be seen by EEH because nothing - * happens on the PCI bus. In this case the driver may fail and - * schedule a 'recover or reset', leading to this recovery handler. - * Manually call the eeh failure check function. - */ - struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); - if (eeh_dev_check_failure(eehdev)) { - /* The EEH mechanisms will handle the error and reset the - * device if necessary. - */ - return 1; - } -#endif - return 0; -} - -static void efx_wait_for_bist_end(struct efx_nic *efx) -{ - int i; - - for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { - if (efx_mcdi_poll_reboot(efx)) - goto out; - msleep(BIST_WAIT_DELAY_MS); - } - - netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); -out: - /* Either way unset the BIST flag. If we found no reboot we probably - * won't recover, but we should try. - */ - efx->mc_bist_for_other_fn = false; -} - -/* The worker thread exists so that code that cannot sleep can - * schedule a reset for later. - */ -static void efx_reset_work(struct work_struct *data) -{ - struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); - unsigned long pending; - enum reset_type method; - - pending = READ_ONCE(efx->reset_pending); - method = fls(pending) - 1; - - if (method == RESET_TYPE_MC_BIST) - efx_wait_for_bist_end(efx); - - if ((method == RESET_TYPE_RECOVER_OR_DISABLE || - method == RESET_TYPE_RECOVER_OR_ALL) && - efx_try_recovery(efx)) - return; - - if (!pending) - return; - - rtnl_lock(); - - /* We checked the state in efx_schedule_reset() but it may - * have changed by now. Now that we have the RTNL lock, - * it cannot change again. - */ - if (efx->state == STATE_READY) - (void)efx_reset(efx, method); - - rtnl_unlock(); -} - -void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) -{ - enum reset_type method; - - if (efx->state == STATE_RECOVERY) { - netif_dbg(efx, drv, efx->net_dev, - "recovering: skip scheduling %s reset\n", - RESET_TYPE(type)); - return; - } - - switch (type) { - case RESET_TYPE_INVISIBLE: - case RESET_TYPE_ALL: - case RESET_TYPE_RECOVER_OR_ALL: - case RESET_TYPE_WORLD: - case RESET_TYPE_DISABLE: - case RESET_TYPE_RECOVER_OR_DISABLE: - case RESET_TYPE_DATAPATH: - case RESET_TYPE_MC_BIST: - case RESET_TYPE_MCDI_TIMEOUT: - method = type; - netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", - RESET_TYPE(method)); - break; - default: - method = efx->type->map_reset_reason(type); - netif_dbg(efx, drv, efx->net_dev, - "scheduling %s reset for %s\n", - RESET_TYPE(method), RESET_TYPE(type)); - break; - } - - set_bit(method, &efx->reset_pending); - smp_mb(); /* ensure we change reset_pending before checking state */ - - /* If we're not READY then just leave the flags set as the cue - * to abort probing or reschedule the reset later. - */ - if (READ_ONCE(efx->state) != STATE_READY) - return; - - /* efx_process_channel() will no longer read events once a - * reset is scheduled. So switch back to poll'd MCDI completions. */ - efx_mcdi_mode_poll(efx); - - queue_work(reset_workqueue, &efx->reset_work); -} - -/************************************************************************** - * * List of NICs we support * **************************************************************************/ @@ -3169,139 +1029,10 @@ static const struct pci_device_id efx_pci_table[] = { /************************************************************************** * - * Dummy PHY/MAC operations - * - * Can be used for some unimplemented operations - * Needed so all function pointers are valid and do not have to be tested - * before use - * - **************************************************************************/ -int efx_port_dummy_op_int(struct efx_nic *efx) -{ - return 0; -} -void efx_port_dummy_op_void(struct efx_nic *efx) {} - -static bool efx_port_dummy_op_poll(struct efx_nic *efx) -{ - return false; -} - -static const struct efx_phy_operations efx_dummy_phy_operations = { - .init = efx_port_dummy_op_int, - .reconfigure = efx_port_dummy_op_int, - .poll = efx_port_dummy_op_poll, - .fini = efx_port_dummy_op_void, -}; - -/************************************************************************** - * * Data housekeeping * **************************************************************************/ -/* This zeroes out and then fills in the invariants in a struct - * efx_nic (including all sub-structures). - */ -static int efx_init_struct(struct efx_nic *efx, - struct pci_dev *pci_dev, struct net_device *net_dev) -{ - int rc = -ENOMEM, i; - - /* Initialise common structures */ - INIT_LIST_HEAD(&efx->node); - INIT_LIST_HEAD(&efx->secondary_list); - spin_lock_init(&efx->biu_lock); -#ifdef CONFIG_SFC_MTD - INIT_LIST_HEAD(&efx->mtd_list); -#endif - INIT_WORK(&efx->reset_work, efx_reset_work); - INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); - INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); - efx->pci_dev = pci_dev; - efx->msg_enable = debug; - efx->state = STATE_UNINIT; - strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); - - efx->net_dev = net_dev; - efx->rx_prefix_size = efx->type->rx_prefix_size; - efx->rx_ip_align = - NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; - efx->rx_packet_hash_offset = - efx->type->rx_hash_offset - efx->type->rx_prefix_size; - efx->rx_packet_ts_offset = - efx->type->rx_ts_offset - efx->type->rx_prefix_size; - INIT_LIST_HEAD(&efx->rss_context.list); - mutex_init(&efx->rss_lock); - spin_lock_init(&efx->stats_lock); - efx->vi_stride = EFX_DEFAULT_VI_STRIDE; - efx->num_mac_stats = MC_CMD_MAC_NSTATS; - BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); - mutex_init(&efx->mac_lock); -#ifdef CONFIG_RFS_ACCEL - mutex_init(&efx->rps_mutex); - spin_lock_init(&efx->rps_hash_lock); - /* Failure to allocate is not fatal, but may degrade ARFS performance */ - efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, - sizeof(*efx->rps_hash_table), GFP_KERNEL); -#endif - efx->phy_op = &efx_dummy_phy_operations; - efx->mdio.dev = net_dev; - INIT_WORK(&efx->mac_work, efx_mac_work); - init_waitqueue_head(&efx->flush_wq); - - for (i = 0; i < EFX_MAX_CHANNELS; i++) { - efx->channel[i] = efx_alloc_channel(efx, i, NULL); - if (!efx->channel[i]) - goto fail; - efx->msi_context[i].efx = efx; - efx->msi_context[i].index = i; - } - - /* Higher numbered interrupt modes are less capable! */ - if (WARN_ON_ONCE(efx->type->max_interrupt_mode > - efx->type->min_interrupt_mode)) { - rc = -EIO; - goto fail; - } - efx->interrupt_mode = max(efx->type->max_interrupt_mode, - interrupt_mode); - efx->interrupt_mode = min(efx->type->min_interrupt_mode, - interrupt_mode); - - /* Would be good to use the net_dev name, but we're too early */ - snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", - pci_name(pci_dev)); - efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); - if (!efx->workqueue) - goto fail; - - return 0; - -fail: - efx_fini_struct(efx); - return rc; -} - -static void efx_fini_struct(struct efx_nic *efx) -{ - int i; - -#ifdef CONFIG_RFS_ACCEL - kfree(efx->rps_hash_table); -#endif - - for (i = 0; i < EFX_MAX_CHANNELS; i++) - kfree(efx->channel[i]); - - kfree(efx->vpd_sn); - - if (efx->workqueue) { - destroy_workqueue(efx->workqueue); - efx->workqueue = NULL; - } -} - void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) { u64 n_rx_nodesc_trunc = 0; @@ -3313,197 +1044,6 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops); } -bool efx_filter_spec_equal(const struct efx_filter_spec *left, - const struct efx_filter_spec *right) -{ - if ((left->match_flags ^ right->match_flags) | - ((left->flags ^ right->flags) & - (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) - return false; - - return memcmp(&left->outer_vid, &right->outer_vid, - sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) == 0; -} - -u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) -{ - BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); - return jhash2((const u32 *)&spec->outer_vid, - (sizeof(struct efx_filter_spec) - - offsetof(struct efx_filter_spec, outer_vid)) / 4, - 0); -} - -#ifdef CONFIG_RFS_ACCEL -bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, - bool *force) -{ - if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { - /* ARFS is currently updating this entry, leave it */ - return false; - } - if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { - /* ARFS tried and failed to update this, so it's probably out - * of date. Remove the filter and the ARFS rule entry. - */ - rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; - *force = true; - return true; - } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ - /* ARFS has moved on, so old filter is not needed. Since we did - * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will - * not be removed by efx_rps_hash_del() subsequently. - */ - *force = true; - return true; - } - /* Remove it iff ARFS wants to. */ - return true; -} - -static -struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, - const struct efx_filter_spec *spec) -{ - u32 hash = efx_filter_spec_hash(spec); - - lockdep_assert_held(&efx->rps_hash_lock); - if (!efx->rps_hash_table) - return NULL; - return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; -} - -struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, - const struct efx_filter_spec *spec) -{ - struct efx_arfs_rule *rule; - struct hlist_head *head; - struct hlist_node *node; - - head = efx_rps_hash_bucket(efx, spec); - if (!head) - return NULL; - hlist_for_each(node, head) { - rule = container_of(node, struct efx_arfs_rule, node); - if (efx_filter_spec_equal(spec, &rule->spec)) - return rule; - } - return NULL; -} - -struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, - const struct efx_filter_spec *spec, - bool *new) -{ - struct efx_arfs_rule *rule; - struct hlist_head *head; - struct hlist_node *node; - - head = efx_rps_hash_bucket(efx, spec); - if (!head) - return NULL; - hlist_for_each(node, head) { - rule = container_of(node, struct efx_arfs_rule, node); - if (efx_filter_spec_equal(spec, &rule->spec)) { - *new = false; - return rule; - } - } - rule = kmalloc(sizeof(*rule), GFP_ATOMIC); - *new = true; - if (rule) { - memcpy(&rule->spec, spec, sizeof(rule->spec)); - hlist_add_head(&rule->node, head); - } - return rule; -} - -void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) -{ - struct efx_arfs_rule *rule; - struct hlist_head *head; - struct hlist_node *node; - - head = efx_rps_hash_bucket(efx, spec); - if (WARN_ON(!head)) - return; - hlist_for_each(node, head) { - rule = container_of(node, struct efx_arfs_rule, node); - if (efx_filter_spec_equal(spec, &rule->spec)) { - /* Someone already reused the entry. We know that if - * this check doesn't fire (i.e. filter_id == REMOVING) - * then the REMOVING mark was put there by our caller, - * because caller is holding a lock on filter table and - * only holders of that lock set REMOVING. - */ - if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) - return; - hlist_del(node); - kfree(rule); - return; - } - } - /* We didn't find it. */ - WARN_ON(1); -} -#endif - -/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because - * (a) this is an infrequent control-plane operation and (b) n is small (max 64) - */ -struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) -{ - struct list_head *head = &efx->rss_context.list; - struct efx_rss_context *ctx, *new; - u32 id = 1; /* Don't use zero, that refers to the master RSS context */ - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - /* Search for first gap in the numbering */ - list_for_each_entry(ctx, head, list) { - if (ctx->user_id != id) - break; - id++; - /* Check for wrap. If this happens, we have nearly 2^32 - * allocated RSS contexts, which seems unlikely. - */ - if (WARN_ON_ONCE(!id)) - return NULL; - } - - /* Create the new entry */ - new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL); - if (!new) - return NULL; - new->context_id = EFX_EF10_RSS_CONTEXT_INVALID; - new->rx_hash_udp_4tuple = false; - - /* Insert the new entry into the gap */ - new->user_id = id; - list_add_tail(&new->list, &ctx->list); - return new; -} - -struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) -{ - struct list_head *head = &efx->rss_context.list; - struct efx_rss_context *ctx; - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - list_for_each_entry(ctx, head, list) - if (ctx->user_id == id) - return ctx; - return NULL; -} - -void efx_free_rss_context_entry(struct efx_rss_context *ctx) -{ - list_del(&ctx->list); - kfree(ctx); -} - /************************************************************************** * * PCI interface @@ -3519,7 +1059,7 @@ static void efx_pci_remove_main(struct efx_nic *efx) * are not READY. */ BUG_ON(efx->state == STATE_READY); - cancel_work_sync(&efx->reset_work); + efx_flush_reset_workqueue(efx); efx_disable_interrupts(efx); efx_clear_interrupt_affinity(efx); @@ -3559,7 +1099,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_pci_remove_main(efx); - efx_fini_io(efx); + efx_fini_io(efx, efx->type->mem_bar(efx)); netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n"); efx_fini_struct(efx); @@ -3782,7 +1322,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev, efx_probe_vpd_strings(efx); /* Set up basic I/O (BAR mappings etc) */ - rc = efx_init_io(efx); + rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask, + efx->type->mem_map_size(efx)); if (rc) goto fail2; @@ -3826,7 +1367,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, return 0; fail3: - efx_fini_io(efx); + efx_fini_io(efx, efx->type->mem_bar(efx)); fail2: efx_fini_struct(efx); fail1: @@ -3904,7 +1445,7 @@ static int efx_pm_thaw(struct device *dev) rtnl_unlock(); /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ - queue_work(reset_workqueue, &efx->reset_work); + efx_queue_reset_work(efx); return 0; @@ -4083,10 +1624,6 @@ static struct pci_driver efx_pci_driver = { * *************************************************************************/ -module_param(interrupt_mode, uint, 0444); -MODULE_PARM_DESC(interrupt_mode, - "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); - static int __init efx_init_module(void) { int rc; @@ -4103,11 +1640,9 @@ static int __init efx_init_module(void) goto err_sriov; #endif - reset_workqueue = create_singlethread_workqueue("sfc_reset"); - if (!reset_workqueue) { - rc = -ENOMEM; + rc = efx_create_reset_workqueue(); + if (rc) goto err_reset; - } rc = pci_register_driver(&efx_pci_driver); if (rc < 0) @@ -4116,7 +1651,7 @@ static int __init efx_init_module(void) return 0; err_pci: - destroy_workqueue(reset_workqueue); + efx_destroy_reset_workqueue(); err_reset: #ifdef CONFIG_SFC_SRIOV efx_fini_sriov(); @@ -4132,7 +1667,7 @@ static void __exit efx_exit_module(void) printk(KERN_INFO "Solarflare NET driver unloading\n"); pci_unregister_driver(&efx_pci_driver); - destroy_workqueue(reset_workqueue); + efx_destroy_reset_workqueue(); #ifdef CONFIG_SFC_SRIOV efx_fini_sriov(); #endif diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 2dd8d5002315..f1bdb04efbe4 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -15,31 +15,17 @@ int efx_net_open(struct net_device *net_dev); int efx_net_stop(struct net_device *net_dev); /* TX */ -int efx_probe_tx_queue(struct efx_tx_queue *tx_queue); -void efx_remove_tx_queue(struct efx_tx_queue *tx_queue); -void efx_init_tx_queue(struct efx_tx_queue *tx_queue); void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue); -void efx_fini_tx_queue(struct efx_tx_queue *tx_queue); netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev); netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, void *type_data); -unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); extern unsigned int efx_piobuf_size; extern bool efx_separate_tx_channels; /* RX */ -void efx_set_default_rx_indir_table(struct efx_nic *efx, - struct efx_rss_context *ctx); -void efx_rx_config_page_split(struct efx_nic *efx); -int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); -void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); -void efx_init_rx_queue(struct efx_rx_queue *rx_queue); -void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); -void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic); -void efx_rx_slow_fill(struct timer_list *t); void __efx_rx_packet(struct efx_channel *channel); void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, unsigned int n_frags, unsigned int len, u16 flags); @@ -48,7 +34,6 @@ static inline void efx_rx_flush_packet(struct efx_channel *channel) if (channel->rx_pkt_n_frags) __efx_rx_packet(channel); } -void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); #define EFX_MAX_DMAQ_SIZE 4096UL #define EFX_DEFAULT_DMAQ_SIZE 1024UL @@ -80,8 +65,6 @@ static inline bool efx_rss_enabled(struct efx_nic *efx) /* Filters */ -void efx_mac_reconfigure(struct efx_nic *efx); - /** * efx_filter_insert_filter - add or replace a filter * @efx: NIC in which to insert the filter @@ -186,58 +169,17 @@ static inline void efx_filter_rfs_expire(struct work_struct *data) static inline void efx_filter_rfs_expire(struct work_struct *data) {} #define efx_filter_rfs_enabled() 0 #endif -bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); - -bool efx_filter_spec_equal(const struct efx_filter_spec *left, - const struct efx_filter_spec *right); -u32 efx_filter_spec_hash(const struct efx_filter_spec *spec); - -#ifdef CONFIG_RFS_ACCEL -bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, - bool *force); - -struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, - const struct efx_filter_spec *spec); - -/* @new is written to indicate if entry was newly added (true) or if an old - * entry was found and returned (false). - */ -struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, - const struct efx_filter_spec *spec, - bool *new); - -void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec); -#endif /* RSS contexts */ -struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); -struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); -void efx_free_rss_context_entry(struct efx_rss_context *ctx); static inline bool efx_rss_active(struct efx_rss_context *ctx) { - return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID; + return ctx->context_id != EFX_MCDI_RSS_CONTEXT_INVALID; } -/* Channels */ -int efx_channel_dummy_op_int(struct efx_channel *channel); -void efx_channel_dummy_op_void(struct efx_channel *channel); -int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries); - -/* Ports */ -int efx_reconfigure_port(struct efx_nic *efx); -int __efx_reconfigure_port(struct efx_nic *efx); - /* Ethtool support */ extern const struct ethtool_ops efx_ethtool_ops; -/* Reset handling */ -int efx_reset(struct efx_nic *efx, enum reset_type method); -void efx_reset_down(struct efx_nic *efx, enum reset_type method); -int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok); -int efx_try_recovery(struct efx_nic *efx); - /* Global */ -void efx_schedule_reset(struct efx_nic *efx, enum reset_type type); unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs); unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks); int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, @@ -245,8 +187,6 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, bool rx_may_override_tx); void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, unsigned int *rx_usecs, bool *rx_adaptive); -void efx_stop_eventq(struct efx_channel *channel); -void efx_start_eventq(struct efx_channel *channel); /* Dummy PHY ops for PHY drivers */ int efx_port_dummy_op_int(struct efx_nic *efx); @@ -293,9 +233,6 @@ static inline void efx_schedule_channel_irq(struct efx_channel *channel) efx_schedule_channel(channel); } -void efx_link_status_changed(struct efx_nic *efx); -void efx_link_set_advertising(struct efx_nic *efx, - const unsigned long *advertising); void efx_link_clear_advertising(struct efx_nic *efx); void efx_link_set_wanted_fc(struct efx_nic *efx, u8); diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c new file mode 100644 index 000000000000..aeb5e8aa2f2a --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -0,0 +1,1234 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include <linux/module.h> +#include "efx_channels.h" +#include "efx.h" +#include "efx_common.h" +#include "tx_common.h" +#include "rx_common.h" +#include "nic.h" +#include "sriov.h" + +/* This is the first interrupt mode to try out of: + * 0 => MSI-X + * 1 => MSI + * 2 => legacy + */ +static unsigned int interrupt_mode; +module_param(interrupt_mode, uint, 0444); +MODULE_PARM_DESC(interrupt_mode, + "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); + +/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), + * i.e. the number of CPUs among which we may distribute simultaneous + * interrupt handling. + * + * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. + * The default (0) means to assign an interrupt to each core. + */ +static unsigned int rss_cpus; +module_param(rss_cpus, uint, 0444); +MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); + +static unsigned int irq_adapt_low_thresh = 8000; +module_param(irq_adapt_low_thresh, uint, 0644); +MODULE_PARM_DESC(irq_adapt_low_thresh, + "Threshold score for reducing IRQ moderation"); + +static unsigned int irq_adapt_high_thresh = 16000; +module_param(irq_adapt_high_thresh, uint, 0644); +MODULE_PARM_DESC(irq_adapt_high_thresh, + "Threshold score for increasing IRQ moderation"); + +/* This is the weight assigned to each of the (per-channel) virtual + * NAPI devices. + */ +static int napi_weight = 64; + +/*************** + * Housekeeping + ***************/ + +int efx_channel_dummy_op_int(struct efx_channel *channel) +{ + return 0; +} + +void efx_channel_dummy_op_void(struct efx_channel *channel) +{ +} + +static const struct efx_channel_type efx_default_channel_type = { + .pre_probe = efx_channel_dummy_op_int, + .post_remove = efx_channel_dummy_op_void, + .get_name = efx_get_channel_name, + .copy = efx_copy_channel, + .want_txqs = efx_default_channel_want_txqs, + .keep_eventq = false, + .want_pio = true, +}; + +/************* + * INTERRUPTS + *************/ + +static unsigned int efx_wanted_parallelism(struct efx_nic *efx) +{ + cpumask_var_t thread_mask; + unsigned int count; + int cpu; + + if (rss_cpus) { + count = rss_cpus; + } else { + if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { + netif_warn(efx, probe, efx->net_dev, + "RSS disabled due to allocation failure\n"); + return 1; + } + + count = 0; + for_each_online_cpu(cpu) { + if (!cpumask_test_cpu(cpu, thread_mask)) { + ++count; + cpumask_or(thread_mask, thread_mask, + topology_sibling_cpumask(cpu)); + } + } + + free_cpumask_var(thread_mask); + } + + if (count > EFX_MAX_RX_QUEUES) { + netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, + "Reducing number of rx queues from %u to %u.\n", + count, EFX_MAX_RX_QUEUES); + count = EFX_MAX_RX_QUEUES; + } + + /* If RSS is requested for the PF *and* VFs then we can't write RSS + * table entries that are inaccessible to VFs + */ +#ifdef CONFIG_SFC_SRIOV + if (efx->type->sriov_wanted) { + if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && + count > efx_vf_size(efx)) { + netif_warn(efx, probe, efx->net_dev, + "Reducing number of RSS channels from %u to %u for " + "VF support. Increase vf-msix-limit to use more " + "channels on the PF.\n", + count, efx_vf_size(efx)); + count = efx_vf_size(efx); + } + } +#endif + + return count; +} + +static int efx_allocate_msix_channels(struct efx_nic *efx, + unsigned int max_channels, + unsigned int extra_channels, + unsigned int parallelism) +{ + unsigned int n_channels = parallelism; + int vec_count; + int n_xdp_tx; + int n_xdp_ev; + + if (efx_separate_tx_channels) + n_channels *= 2; + n_channels += extra_channels; + + /* To allow XDP transmit to happen from arbitrary NAPI contexts + * we allocate a TX queue per CPU. We share event queues across + * multiple tx queues, assuming tx and ev queues are both + * maximum size. + */ + + n_xdp_tx = num_possible_cpus(); + n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); + + vec_count = pci_msix_vec_count(efx->pci_dev); + if (vec_count < 0) + return vec_count; + + max_channels = min_t(unsigned int, vec_count, max_channels); + + /* Check resources. + * We need a channel per event queue, plus a VI per tx queue. + * This may be more pessimistic than it needs to be. + */ + if (n_channels + n_xdp_ev > max_channels) { + netif_err(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + efx->n_xdp_channels = 0; + efx->xdp_tx_per_channel = 0; + efx->xdp_tx_queue_count = 0; + } else { + efx->n_xdp_channels = n_xdp_ev; + efx->xdp_tx_per_channel = EFX_TXQ_TYPES; + efx->xdp_tx_queue_count = n_xdp_tx; + n_channels += n_xdp_ev; + netif_dbg(efx, drv, efx->net_dev, + "Allocating %d TX and %d event queues for XDP\n", + n_xdp_tx, n_xdp_ev); + } + + if (vec_count < n_channels) { + netif_err(efx, drv, efx->net_dev, + "WARNING: Insufficient MSI-X vectors available (%d < %u).\n", + vec_count, n_channels); + netif_err(efx, drv, efx->net_dev, + "WARNING: Performance may be reduced.\n"); + n_channels = vec_count; + } + + n_channels = min(n_channels, max_channels); + + efx->n_channels = n_channels; + + /* Ignore XDP tx channels when creating rx channels. */ + n_channels -= efx->n_xdp_channels; + + if (efx_separate_tx_channels) { + efx->n_tx_channels = + min(max(n_channels / 2, 1U), + efx->max_tx_channels); + efx->tx_channel_offset = + n_channels - efx->n_tx_channels; + efx->n_rx_channels = + max(n_channels - + efx->n_tx_channels, 1U); + } else { + efx->n_tx_channels = min(n_channels, efx->max_tx_channels); + efx->tx_channel_offset = 0; + efx->n_rx_channels = n_channels; + } + + efx->n_rx_channels = min(efx->n_rx_channels, parallelism); + efx->n_tx_channels = min(efx->n_tx_channels, parallelism); + + efx->xdp_channel_offset = n_channels; + + netif_dbg(efx, drv, efx->net_dev, + "Allocating %u RX channels\n", + efx->n_rx_channels); + + return efx->n_channels; +} + +/* Probe the number and type of interrupts we are able to obtain, and + * the resulting numbers of channels and RX queues. + */ +int efx_probe_interrupts(struct efx_nic *efx) +{ + unsigned int extra_channels = 0; + unsigned int rss_spread; + unsigned int i, j; + int rc; + + for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) + if (efx->extra_channel_type[i]) + ++extra_channels; + + if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { + unsigned int parallelism = efx_wanted_parallelism(efx); + struct msix_entry xentries[EFX_MAX_CHANNELS]; + unsigned int n_channels; + + rc = efx_allocate_msix_channels(efx, efx->max_channels, + extra_channels, parallelism); + if (rc >= 0) { + n_channels = rc; + for (i = 0; i < n_channels; i++) + xentries[i].entry = i; + rc = pci_enable_msix_range(efx->pci_dev, xentries, 1, + n_channels); + } + if (rc < 0) { + /* Fall back to single channel MSI */ + netif_err(efx, drv, efx->net_dev, + "could not enable MSI-X\n"); + if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI) + efx->interrupt_mode = EFX_INT_MODE_MSI; + else + return rc; + } else if (rc < n_channels) { + netif_err(efx, drv, efx->net_dev, + "WARNING: Insufficient MSI-X vectors" + " available (%d < %u).\n", rc, n_channels); + netif_err(efx, drv, efx->net_dev, + "WARNING: Performance may be reduced.\n"); + n_channels = rc; + } + + if (rc > 0) { + for (i = 0; i < efx->n_channels; i++) + efx_get_channel(efx, i)->irq = + xentries[i].vector; + } + } + + /* Try single interrupt MSI */ + if (efx->interrupt_mode == EFX_INT_MODE_MSI) { + efx->n_channels = 1; + efx->n_rx_channels = 1; + efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; + rc = pci_enable_msi(efx->pci_dev); + if (rc == 0) { + efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; + } else { + netif_err(efx, drv, efx->net_dev, + "could not enable MSI\n"); + if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY) + efx->interrupt_mode = EFX_INT_MODE_LEGACY; + else + return rc; + } + } + + /* Assume legacy interrupts */ + if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { + efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); + efx->n_rx_channels = 1; + efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; + efx->legacy_irq = efx->pci_dev->irq; + } + + /* Assign extra channels if possible, before XDP channels */ + efx->n_extra_tx_channels = 0; + j = efx->xdp_channel_offset; + for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { + if (!efx->extra_channel_type[i]) + continue; + if (j <= efx->tx_channel_offset + efx->n_tx_channels) { + efx->extra_channel_type[i]->handle_no_channel(efx); + } else { + --j; + efx_get_channel(efx, j)->type = + efx->extra_channel_type[i]; + if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) + efx->n_extra_tx_channels++; + } + } + + rss_spread = efx->n_rx_channels; + /* RSS might be usable on VFs even if it is disabled on the PF */ +#ifdef CONFIG_SFC_SRIOV + if (efx->type->sriov_wanted) { + efx->rss_spread = ((rss_spread > 1 || + !efx->type->sriov_wanted(efx)) ? + rss_spread : efx_vf_size(efx)); + return 0; + } +#endif + efx->rss_spread = rss_spread; + + return 0; +} + +#if defined(CONFIG_SMP) +void efx_set_interrupt_affinity(struct efx_nic *efx) +{ + struct efx_channel *channel; + unsigned int cpu; + + efx_for_each_channel(channel, efx) { + cpu = cpumask_local_spread(channel->channel, + pcibus_to_node(efx->pci_dev->bus)); + irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); + } +} + +void efx_clear_interrupt_affinity(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + irq_set_affinity_hint(channel->irq, NULL); +} +#else +void +efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) +{ +} + +void +efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) +{ +} +#endif /* CONFIG_SMP */ + +void efx_remove_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel; + + /* Remove MSI/MSI-X interrupts */ + efx_for_each_channel(channel, efx) + channel->irq = 0; + pci_disable_msi(efx->pci_dev); + pci_disable_msix(efx->pci_dev); + + /* Remove legacy interrupt */ + efx->legacy_irq = 0; +} + +/*************** + * EVENT QUEUES + ***************/ + +/* Create event queue + * Event queue memory allocations are done only once. If the channel + * is reset, the memory buffer will be reused; this guards against + * errors during channel reset and also simplifies interrupt handling. + */ +int efx_probe_eventq(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + unsigned long entries; + + netif_dbg(efx, probe, efx->net_dev, + "chan %d create event queue\n", channel->channel); + + /* Build an event queue with room for one event per tx and rx buffer, + * plus some extra for link state events and MCDI completions. + */ + entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128); + EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE); + channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1; + + return efx_nic_probe_eventq(channel); +} + +/* Prepare channel's event queue */ +int efx_init_eventq(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + int rc; + + EFX_WARN_ON_PARANOID(channel->eventq_init); + + netif_dbg(efx, drv, efx->net_dev, + "chan %d init event queue\n", channel->channel); + + rc = efx_nic_init_eventq(channel); + if (rc == 0) { + efx->type->push_irq_moderation(channel); + channel->eventq_read_ptr = 0; + channel->eventq_init = true; + } + return rc; +} + +/* Enable event queue processing and NAPI */ +void efx_start_eventq(struct efx_channel *channel) +{ + netif_dbg(channel->efx, ifup, channel->efx->net_dev, + "chan %d start event queue\n", channel->channel); + + /* Make sure the NAPI handler sees the enabled flag set */ + channel->enabled = true; + smp_wmb(); + + napi_enable(&channel->napi_str); + efx_nic_eventq_read_ack(channel); +} + +/* Disable event queue processing and NAPI */ +void efx_stop_eventq(struct efx_channel *channel) +{ + if (!channel->enabled) + return; + + napi_disable(&channel->napi_str); + channel->enabled = false; +} + +void efx_fini_eventq(struct efx_channel *channel) +{ + if (!channel->eventq_init) + return; + + netif_dbg(channel->efx, drv, channel->efx->net_dev, + "chan %d fini event queue\n", channel->channel); + + efx_nic_fini_eventq(channel); + channel->eventq_init = false; +} + +void efx_remove_eventq(struct efx_channel *channel) +{ + netif_dbg(channel->efx, drv, channel->efx->net_dev, + "chan %d remove event queue\n", channel->channel); + + efx_nic_remove_eventq(channel); +} + +/************************************************************************** + * + * Channel handling + * + *************************************************************************/ + +/* Allocate and initialise a channel structure. */ +struct efx_channel * +efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) +{ + struct efx_rx_queue *rx_queue; + struct efx_tx_queue *tx_queue; + struct efx_channel *channel; + int j; + + channel = kzalloc(sizeof(*channel), GFP_KERNEL); + if (!channel) + return NULL; + + channel->efx = efx; + channel->channel = i; + channel->type = &efx_default_channel_type; + + for (j = 0; j < EFX_TXQ_TYPES; j++) { + tx_queue = &channel->tx_queue[j]; + tx_queue->efx = efx; + tx_queue->queue = i * EFX_TXQ_TYPES + j; + tx_queue->channel = channel; + } + +#ifdef CONFIG_RFS_ACCEL + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); +#endif + + rx_queue = &channel->rx_queue; + rx_queue->efx = efx; + timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); + + return channel; +} + +int efx_init_channels(struct efx_nic *efx) +{ + unsigned int i; + + for (i = 0; i < EFX_MAX_CHANNELS; i++) { + efx->channel[i] = efx_alloc_channel(efx, i, NULL); + if (!efx->channel[i]) + return -ENOMEM; + efx->msi_context[i].efx = efx; + efx->msi_context[i].index = i; + } + + /* Higher numbered interrupt modes are less capable! */ + if (WARN_ON_ONCE(efx->type->max_interrupt_mode > + efx->type->min_interrupt_mode)) { + return -EIO; + } + efx->interrupt_mode = max(efx->type->max_interrupt_mode, + interrupt_mode); + efx->interrupt_mode = min(efx->type->min_interrupt_mode, + interrupt_mode); + + return 0; +} + +void efx_fini_channels(struct efx_nic *efx) +{ + unsigned int i; + + for (i = 0; i < EFX_MAX_CHANNELS; i++) + if (efx->channel[i]) { + kfree(efx->channel[i]); + efx->channel[i] = NULL; + } +} + +/* Allocate and initialise a channel structure, copying parameters + * (but not resources) from an old channel structure. + */ +struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel) +{ + struct efx_rx_queue *rx_queue; + struct efx_tx_queue *tx_queue; + struct efx_channel *channel; + int j; + + channel = kmalloc(sizeof(*channel), GFP_KERNEL); + if (!channel) + return NULL; + + *channel = *old_channel; + + channel->napi_dev = NULL; + INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); + channel->napi_str.napi_id = 0; + channel->napi_str.state = 0; + memset(&channel->eventq, 0, sizeof(channel->eventq)); + + for (j = 0; j < EFX_TXQ_TYPES; j++) { + tx_queue = &channel->tx_queue[j]; + if (tx_queue->channel) + tx_queue->channel = channel; + tx_queue->buffer = NULL; + memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); + } + + rx_queue = &channel->rx_queue; + rx_queue->buffer = NULL; + memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); + timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); +#ifdef CONFIG_RFS_ACCEL + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); +#endif + + return channel; +} + +static int efx_probe_channel(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int rc; + + netif_dbg(channel->efx, probe, channel->efx->net_dev, + "creating channel %d\n", channel->channel); + + rc = channel->type->pre_probe(channel); + if (rc) + goto fail; + + rc = efx_probe_eventq(channel); + if (rc) + goto fail; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + rc = efx_probe_tx_queue(tx_queue); + if (rc) + goto fail; + } + + efx_for_each_channel_rx_queue(rx_queue, channel) { + rc = efx_probe_rx_queue(rx_queue); + if (rc) + goto fail; + } + + channel->rx_list = NULL; + + return 0; + +fail: + efx_remove_channel(channel); + return rc; +} + +void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) +{ + struct efx_nic *efx = channel->efx; + const char *type; + int number; + + number = channel->channel; + + if (number >= efx->xdp_channel_offset && + !WARN_ON_ONCE(!efx->n_xdp_channels)) { + type = "-xdp"; + number -= efx->xdp_channel_offset; + } else if (efx->tx_channel_offset == 0) { + type = ""; + } else if (number < efx->tx_channel_offset) { + type = "-rx"; + } else { + type = "-tx"; + number -= efx->tx_channel_offset; + } + snprintf(buf, len, "%s%s-%d", efx->name, type, number); +} + +void efx_set_channel_names(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + channel->type->get_name(channel, + efx->msi_context[channel->channel].name, + sizeof(efx->msi_context[0].name)); +} + +int efx_probe_channels(struct efx_nic *efx) +{ + struct efx_channel *channel; + int rc; + + /* Restart special buffer allocation */ + efx->next_buffer_table = 0; + + /* Probe channels in reverse, so that any 'extra' channels + * use the start of the buffer table. This allows the traffic + * channels to be resized without moving them or wasting the + * entries before them. + */ + efx_for_each_channel_rev(channel, efx) { + rc = efx_probe_channel(channel); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "failed to create channel %d\n", + channel->channel); + goto fail; + } + } + efx_set_channel_names(efx); + + return 0; + +fail: + efx_remove_channels(efx); + return rc; +} + +void efx_remove_channel(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + + netif_dbg(channel->efx, drv, channel->efx->net_dev, + "destroy chan %d\n", channel->channel); + + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_remove_rx_queue(rx_queue); + efx_for_each_possible_channel_tx_queue(tx_queue, channel) + efx_remove_tx_queue(tx_queue); + efx_remove_eventq(channel); + channel->type->post_remove(channel); +} + +void efx_remove_channels(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_remove_channel(channel); + + kfree(efx->xdp_tx_queues); +} + +int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) +{ + struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; + unsigned int i, next_buffer_table = 0; + u32 old_rxq_entries, old_txq_entries; + int rc, rc2; + + rc = efx_check_disabled(efx); + if (rc) + return rc; + + /* Not all channels should be reallocated. We must avoid + * reallocating their buffer table entries. + */ + efx_for_each_channel(channel, efx) { + struct efx_rx_queue *rx_queue; + struct efx_tx_queue *tx_queue; + + if (channel->type->copy) + continue; + next_buffer_table = max(next_buffer_table, + channel->eventq.index + + channel->eventq.entries); + efx_for_each_channel_rx_queue(rx_queue, channel) + next_buffer_table = max(next_buffer_table, + rx_queue->rxd.index + + rx_queue->rxd.entries); + efx_for_each_channel_tx_queue(tx_queue, channel) + next_buffer_table = max(next_buffer_table, + tx_queue->txd.index + + tx_queue->txd.entries); + } + + efx_device_detach_sync(efx); + efx_stop_all(efx); + efx_soft_disable_interrupts(efx); + + /* Clone channels (where possible) */ + memset(other_channel, 0, sizeof(other_channel)); + for (i = 0; i < efx->n_channels; i++) { + channel = efx->channel[i]; + if (channel->type->copy) + channel = channel->type->copy(channel); + if (!channel) { + rc = -ENOMEM; + goto out; + } + other_channel[i] = channel; + } + + /* Swap entry counts and channel pointers */ + old_rxq_entries = efx->rxq_entries; + old_txq_entries = efx->txq_entries; + efx->rxq_entries = rxq_entries; + efx->txq_entries = txq_entries; + for (i = 0; i < efx->n_channels; i++) { + channel = efx->channel[i]; + efx->channel[i] = other_channel[i]; + other_channel[i] = channel; + } + + /* Restart buffer table allocation */ + efx->next_buffer_table = next_buffer_table; + + for (i = 0; i < efx->n_channels; i++) { + channel = efx->channel[i]; + if (!channel->type->copy) + continue; + rc = efx_probe_channel(channel); + if (rc) + goto rollback; + efx_init_napi_channel(efx->channel[i]); + } + +out: + /* Destroy unused channel structures */ + for (i = 0; i < efx->n_channels; i++) { + channel = other_channel[i]; + if (channel && channel->type->copy) { + efx_fini_napi_channel(channel); + efx_remove_channel(channel); + kfree(channel); + } + } + + rc2 = efx_soft_enable_interrupts(efx); + if (rc2) { + rc = rc ? rc : rc2; + netif_err(efx, drv, efx->net_dev, + "unable to restart interrupts on channel reallocation\n"); + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + } else { + efx_start_all(efx); + efx_device_attach_if_not_resetting(efx); + } + return rc; + +rollback: + /* Swap back */ + efx->rxq_entries = old_rxq_entries; + efx->txq_entries = old_txq_entries; + for (i = 0; i < efx->n_channels; i++) { + channel = efx->channel[i]; + efx->channel[i] = other_channel[i]; + other_channel[i] = channel; + } + goto out; +} + +int efx_set_channels(struct efx_nic *efx) +{ + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + int xdp_queue_number; + + efx->tx_channel_offset = + efx_separate_tx_channels ? + efx->n_channels - efx->n_tx_channels : 0; + + if (efx->xdp_tx_queue_count) { + EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); + + /* Allocate array for XDP TX queue lookup. */ + efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count, + sizeof(*efx->xdp_tx_queues), + GFP_KERNEL); + if (!efx->xdp_tx_queues) + return -ENOMEM; + } + + /* We need to mark which channels really have RX and TX + * queues, and adjust the TX queue numbers if we have separate + * RX-only and TX-only channels. + */ + xdp_queue_number = 0; + efx_for_each_channel(channel, efx) { + if (channel->channel < efx->n_rx_channels) + channel->rx_queue.core_index = channel->channel; + else + channel->rx_queue.core_index = -1; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->queue -= (efx->tx_channel_offset * + EFX_TXQ_TYPES); + + if (efx_channel_is_xdp_tx(channel) && + xdp_queue_number < efx->xdp_tx_queue_count) { + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + xdp_queue_number++; + } + } + } + return 0; +} + +bool efx_default_channel_want_txqs(struct efx_channel *channel) +{ + return channel->channel - channel->efx->tx_channel_offset < + channel->efx->n_tx_channels; +} + +/************* + * START/STOP + *************/ + +int efx_soft_enable_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel, *end_channel; + int rc; + + BUG_ON(efx->state == STATE_DISABLED); + + efx->irq_soft_enabled = true; + smp_wmb(); + + efx_for_each_channel(channel, efx) { + if (!channel->type->keep_eventq) { + rc = efx_init_eventq(channel); + if (rc) + goto fail; + } + efx_start_eventq(channel); + } + + efx_mcdi_mode_event(efx); + + return 0; +fail: + end_channel = channel; + efx_for_each_channel(channel, efx) { + if (channel == end_channel) + break; + efx_stop_eventq(channel); + if (!channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + return rc; +} + +void efx_soft_disable_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel; + + if (efx->state == STATE_DISABLED) + return; + + efx_mcdi_mode_poll(efx); + + efx->irq_soft_enabled = false; + smp_wmb(); + + if (efx->legacy_irq) + synchronize_irq(efx->legacy_irq); + + efx_for_each_channel(channel, efx) { + if (channel->irq) + synchronize_irq(channel->irq); + + efx_stop_eventq(channel); + if (!channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + /* Flush the asynchronous MCDI request queue */ + efx_mcdi_flush_async(efx); +} + +int efx_enable_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel, *end_channel; + int rc; + + /* TODO: Is this really a bug? */ + BUG_ON(efx->state == STATE_DISABLED); + + if (efx->eeh_disabled_legacy_irq) { + enable_irq(efx->legacy_irq); + efx->eeh_disabled_legacy_irq = false; + } + + efx->type->irq_enable_master(efx); + + efx_for_each_channel(channel, efx) { + if (channel->type->keep_eventq) { + rc = efx_init_eventq(channel); + if (rc) + goto fail; + } + } + + rc = efx_soft_enable_interrupts(efx); + if (rc) + goto fail; + + return 0; + +fail: + end_channel = channel; + efx_for_each_channel(channel, efx) { + if (channel == end_channel) + break; + if (channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + efx->type->irq_disable_non_ev(efx); + + return rc; +} + +void efx_disable_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_soft_disable_interrupts(efx); + + efx_for_each_channel(channel, efx) { + if (channel->type->keep_eventq) + efx_fini_eventq(channel); + } + + efx->type->irq_disable_non_ev(efx); +} + +void efx_start_channels(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) { + efx_for_each_channel_tx_queue(tx_queue, channel) { + efx_init_tx_queue(tx_queue); + atomic_inc(&efx->active_queues); + } + + efx_for_each_channel_rx_queue(rx_queue, channel) { + efx_init_rx_queue(rx_queue); + atomic_inc(&efx->active_queues); + efx_stop_eventq(channel); + efx_fast_push_rx_descriptors(rx_queue, false); + efx_start_eventq(channel); + } + + WARN_ON(channel->rx_pkt_n_frags); + } +} + +void efx_stop_channels(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + struct efx_channel *channel; + int rc = 0; + + /* Stop RX refill */ + efx_for_each_channel(channel, efx) { + efx_for_each_channel_rx_queue(rx_queue, channel) + rx_queue->refill_enabled = false; + } + + efx_for_each_channel(channel, efx) { + /* RX packet processing is pipelined, so wait for the + * NAPI handler to complete. At least event queue 0 + * might be kept active by non-data events, so don't + * use napi_synchronize() but actually disable NAPI + * temporarily. + */ + if (efx_channel_has_rx_queue(channel)) { + efx_stop_eventq(channel); + efx_start_eventq(channel); + } + } + + if (efx->type->fini_dmaq) + rc = efx->type->fini_dmaq(efx); + + if (rc) { + netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); + } else { + netif_dbg(efx, drv, efx->net_dev, + "successfully flushed all queues\n"); + } + + efx_for_each_channel(channel, efx) { + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_fini_rx_queue(rx_queue); + efx_for_each_possible_channel_tx_queue(tx_queue, channel) + efx_fini_tx_queue(tx_queue); + } +} + +/************************************************************************** + * + * NAPI interface + * + *************************************************************************/ + +/* Process channel's event queue + * + * This function is responsible for processing the event queue of a + * single channel. The caller must guarantee that this function will + * never be concurrently called more than once on the same channel, + * though different channels may be being processed concurrently. + */ +static int efx_process_channel(struct efx_channel *channel, int budget) +{ + struct efx_tx_queue *tx_queue; + struct list_head rx_list; + int spent; + + if (unlikely(!channel->enabled)) + return 0; + + /* Prepare the batch receive list */ + EFX_WARN_ON_PARANOID(channel->rx_list != NULL); + INIT_LIST_HEAD(&rx_list); + channel->rx_list = &rx_list; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + tx_queue->pkts_compl = 0; + tx_queue->bytes_compl = 0; + } + + spent = efx_nic_process_eventq(channel, budget); + if (spent && efx_channel_has_rx_queue(channel)) { + struct efx_rx_queue *rx_queue = + efx_channel_get_rx_queue(channel); + + efx_rx_flush_packet(channel); + efx_fast_push_rx_descriptors(rx_queue, true); + } + + /* Update BQL */ + efx_for_each_channel_tx_queue(tx_queue, channel) { + if (tx_queue->bytes_compl) { + netdev_tx_completed_queue(tx_queue->core_txq, + tx_queue->pkts_compl, + tx_queue->bytes_compl); + } + } + + /* Receive any packets we queued up */ + netif_receive_skb_list(channel->rx_list); + channel->rx_list = NULL; + + return spent; +} + +static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) +{ + int step = efx->irq_mod_step_us; + + if (channel->irq_mod_score < irq_adapt_low_thresh) { + if (channel->irq_moderation_us > step) { + channel->irq_moderation_us -= step; + efx->type->push_irq_moderation(channel); + } + } else if (channel->irq_mod_score > irq_adapt_high_thresh) { + if (channel->irq_moderation_us < + efx->irq_rx_moderation_us) { + channel->irq_moderation_us += step; + efx->type->push_irq_moderation(channel); + } + } + + channel->irq_count = 0; + channel->irq_mod_score = 0; +} + +/* NAPI poll handler + * + * NAPI guarantees serialisation of polls of the same device, which + * provides the guarantee required by efx_process_channel(). + */ +static int efx_poll(struct napi_struct *napi, int budget) +{ + struct efx_channel *channel = + container_of(napi, struct efx_channel, napi_str); + struct efx_nic *efx = channel->efx; + int spent; + + netif_vdbg(efx, intr, efx->net_dev, + "channel %d NAPI poll executing on CPU %d\n", + channel->channel, raw_smp_processor_id()); + + spent = efx_process_channel(channel, budget); + + xdp_do_flush_map(); + + if (spent < budget) { + if (efx_channel_has_rx_queue(channel) && + efx->irq_rx_adaptive && + unlikely(++channel->irq_count == 1000)) { + efx_update_irq_mod(efx, channel); + } + +#ifdef CONFIG_RFS_ACCEL + /* Perhaps expire some ARFS filters */ + mod_delayed_work(system_wq, &channel->filter_work, 0); +#endif + + /* There is no race here; although napi_disable() will + * only wait for napi_complete(), this isn't a problem + * since efx_nic_eventq_read_ack() will have no effect if + * interrupts have already been disabled. + */ + if (napi_complete_done(napi, spent)) + efx_nic_eventq_read_ack(channel); + } + + return spent; +} + +void efx_init_napi_channel(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + + channel->napi_dev = efx->net_dev; + netif_napi_add(channel->napi_dev, &channel->napi_str, + efx_poll, napi_weight); +} + +void efx_init_napi(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_init_napi_channel(channel); +} + +void efx_fini_napi_channel(struct efx_channel *channel) +{ + if (channel->napi_dev) + netif_napi_del(&channel->napi_str); + + channel->napi_dev = NULL; +} + +void efx_fini_napi(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_fini_napi_channel(channel); +} diff --git a/drivers/net/ethernet/sfc/efx_channels.h b/drivers/net/ethernet/sfc/efx_channels.h new file mode 100644 index 000000000000..8d7b8c4142d7 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_channels.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_CHANNELS_H +#define EFX_CHANNELS_H + +int efx_probe_interrupts(struct efx_nic *efx); +void efx_remove_interrupts(struct efx_nic *efx); +int efx_soft_enable_interrupts(struct efx_nic *efx); +void efx_soft_disable_interrupts(struct efx_nic *efx); +int efx_enable_interrupts(struct efx_nic *efx); +void efx_disable_interrupts(struct efx_nic *efx); + +void efx_set_interrupt_affinity(struct efx_nic *efx); +void efx_clear_interrupt_affinity(struct efx_nic *efx); + +int efx_probe_eventq(struct efx_channel *channel); +int efx_init_eventq(struct efx_channel *channel); +void efx_start_eventq(struct efx_channel *channel); +void efx_stop_eventq(struct efx_channel *channel); +void efx_fini_eventq(struct efx_channel *channel); +void efx_remove_eventq(struct efx_channel *channel); + +struct efx_channel * +efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel); +int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries); +void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len); +void efx_set_channel_names(struct efx_nic *efx); +int efx_init_channels(struct efx_nic *efx); +int efx_probe_channels(struct efx_nic *efx); +int efx_set_channels(struct efx_nic *efx); +bool efx_default_channel_want_txqs(struct efx_channel *channel); +void efx_remove_channel(struct efx_channel *channel); +void efx_remove_channels(struct efx_nic *efx); +void efx_fini_channels(struct efx_nic *efx); +struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel); +void efx_start_channels(struct efx_nic *efx); +void efx_stop_channels(struct efx_nic *efx); + +void efx_init_napi_channel(struct efx_channel *channel); +void efx_init_napi(struct efx_nic *efx); +void efx_fini_napi_channel(struct efx_channel *channel); +void efx_fini_napi(struct efx_nic *efx); + +int efx_channel_dummy_op_int(struct efx_channel *channel); +void efx_channel_dummy_op_void(struct efx_channel *channel); + +#endif diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c new file mode 100644 index 000000000000..b0d76bc19673 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -0,0 +1,1102 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include <linux/module.h> +#include <linux/netdevice.h> +#include "efx_common.h" +#include "efx_channels.h" +#include "efx.h" +#include "mcdi.h" +#include "selftest.h" +#include "rx_common.h" +#include "tx_common.h" +#include "nic.h" +#include "io.h" +#include "mcdi_pcol.h" + +static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | + NETIF_MSG_LINK | NETIF_MSG_IFDOWN | + NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | + NETIF_MSG_TX_ERR | NETIF_MSG_HW); +module_param(debug, uint, 0); +MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); + +/* This is the time (in jiffies) between invocations of the hardware + * monitor. + * On Falcon-based NICs, this will: + * - Check the on-board hardware monitor; + * - Poll the link state and reconfigure the hardware as necessary. + * On Siena-based NICs for power systems with EEH support, this will give EEH a + * chance to start. + */ +static unsigned int efx_monitor_interval = 1 * HZ; + +/* How often and how many times to poll for a reset while waiting for a + * BIST that another function started to complete. + */ +#define BIST_WAIT_DELAY_MS 100 +#define BIST_WAIT_DELAY_COUNT 100 + +/* Default stats update time */ +#define STATS_PERIOD_MS_DEFAULT 1000 + +const unsigned int efx_reset_type_max = RESET_TYPE_MAX; +const char *const efx_reset_type_names[] = { + [RESET_TYPE_INVISIBLE] = "INVISIBLE", + [RESET_TYPE_ALL] = "ALL", + [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", + [RESET_TYPE_WORLD] = "WORLD", + [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", + [RESET_TYPE_DATAPATH] = "DATAPATH", + [RESET_TYPE_MC_BIST] = "MC_BIST", + [RESET_TYPE_DISABLE] = "DISABLE", + [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", + [RESET_TYPE_INT_ERROR] = "INT_ERROR", + [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", + [RESET_TYPE_TX_SKIP] = "TX_SKIP", + [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", + [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", +}; + +#define RESET_TYPE(type) \ + STRING_TABLE_LOOKUP(type, efx_reset_type) + +/* Loopback mode names (see LOOPBACK_MODE()) */ +const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; +const char *const efx_loopback_mode_names[] = { + [LOOPBACK_NONE] = "NONE", + [LOOPBACK_DATA] = "DATAPATH", + [LOOPBACK_GMAC] = "GMAC", + [LOOPBACK_XGMII] = "XGMII", + [LOOPBACK_XGXS] = "XGXS", + [LOOPBACK_XAUI] = "XAUI", + [LOOPBACK_GMII] = "GMII", + [LOOPBACK_SGMII] = "SGMII", + [LOOPBACK_XGBR] = "XGBR", + [LOOPBACK_XFI] = "XFI", + [LOOPBACK_XAUI_FAR] = "XAUI_FAR", + [LOOPBACK_GMII_FAR] = "GMII_FAR", + [LOOPBACK_SGMII_FAR] = "SGMII_FAR", + [LOOPBACK_XFI_FAR] = "XFI_FAR", + [LOOPBACK_GPHY] = "GPHY", + [LOOPBACK_PHYXS] = "PHYXS", + [LOOPBACK_PCS] = "PCS", + [LOOPBACK_PMAPMD] = "PMA/PMD", + [LOOPBACK_XPORT] = "XPORT", + [LOOPBACK_XGMII_WS] = "XGMII_WS", + [LOOPBACK_XAUI_WS] = "XAUI_WS", + [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", + [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", + [LOOPBACK_GMII_WS] = "GMII_WS", + [LOOPBACK_XFI_WS] = "XFI_WS", + [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", + [LOOPBACK_PHYXS_WS] = "PHYXS_WS", +}; + +/* Reset workqueue. If any NIC has a hardware failure then a reset will be + * queued onto this work queue. This is not a per-nic work queue, because + * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. + */ +static struct workqueue_struct *reset_workqueue; + +int efx_create_reset_workqueue(void) +{ + reset_workqueue = create_singlethread_workqueue("sfc_reset"); + if (!reset_workqueue) { + printk(KERN_ERR "Failed to create reset workqueue\n"); + return -ENOMEM; + } + + return 0; +} + +void efx_queue_reset_work(struct efx_nic *efx) +{ + queue_work(reset_workqueue, &efx->reset_work); +} + +void efx_flush_reset_workqueue(struct efx_nic *efx) +{ + cancel_work_sync(&efx->reset_work); +} + +void efx_destroy_reset_workqueue(void) +{ + if (reset_workqueue) { + destroy_workqueue(reset_workqueue); + reset_workqueue = NULL; + } +} + +/* We assume that efx->type->reconfigure_mac will always try to sync RX + * filters and therefore needs to read-lock the filter table against freeing + */ +void efx_mac_reconfigure(struct efx_nic *efx) +{ + if (efx->type->reconfigure_mac) { + down_read(&efx->filter_sem); + efx->type->reconfigure_mac(efx); + up_read(&efx->filter_sem); + } +} + +/* Asynchronous work item for changing MAC promiscuity and multicast + * hash. Avoid a drain/rx_ingress enable by reconfiguring the current + * MAC directly. + */ +static void efx_mac_work(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); + + mutex_lock(&efx->mac_lock); + if (efx->port_enabled) + efx_mac_reconfigure(efx); + mutex_unlock(&efx->mac_lock); +} + +/* This ensures that the kernel is kept informed (via + * netif_carrier_on/off) of the link status, and also maintains the + * link status's stop on the port's TX queue. + */ +void efx_link_status_changed(struct efx_nic *efx) +{ + struct efx_link_state *link_state = &efx->link_state; + + /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure + * that no events are triggered between unregister_netdev() and the + * driver unloading. A more general condition is that NETDEV_CHANGE + * can only be generated between NETDEV_UP and NETDEV_DOWN + */ + if (!netif_running(efx->net_dev)) + return; + + if (link_state->up != netif_carrier_ok(efx->net_dev)) { + efx->n_link_state_changes++; + + if (link_state->up) + netif_carrier_on(efx->net_dev); + else + netif_carrier_off(efx->net_dev); + } + + /* Status message for kernel log */ + if (link_state->up) + netif_info(efx, link, efx->net_dev, + "link up at %uMbps %s-duplex (MTU %d)\n", + link_state->speed, link_state->fd ? "full" : "half", + efx->net_dev->mtu); + else + netif_info(efx, link, efx->net_dev, "link down\n"); +} + +unsigned int efx_xdp_max_mtu(struct efx_nic *efx) +{ + /* The maximum MTU that we can fit in a single page, allowing for + * framing, overhead and XDP headroom. + */ + int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + + efx->rx_prefix_size + efx->type->rx_buffer_padding + + efx->rx_ip_align + XDP_PACKET_HEADROOM; + + return PAGE_SIZE - overhead; +} + +/* Context: process, rtnl_lock() held. */ +int efx_change_mtu(struct net_device *net_dev, int new_mtu) +{ + struct efx_nic *efx = netdev_priv(net_dev); + int rc; + + rc = efx_check_disabled(efx); + if (rc) + return rc; + + if (rtnl_dereference(efx->xdp_prog) && + new_mtu > efx_xdp_max_mtu(efx)) { + netif_err(efx, drv, efx->net_dev, + "Requested MTU of %d too big for XDP (max: %d)\n", + new_mtu, efx_xdp_max_mtu(efx)); + return -EINVAL; + } + + netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); + + efx_device_detach_sync(efx); + efx_stop_all(efx); + + mutex_lock(&efx->mac_lock); + net_dev->mtu = new_mtu; + efx_mac_reconfigure(efx); + mutex_unlock(&efx->mac_lock); + + efx_start_all(efx); + efx_device_attach_if_not_resetting(efx); + return 0; +} + +/************************************************************************** + * + * Hardware monitor + * + **************************************************************************/ + +/* Run periodically off the general workqueue */ +static void efx_monitor(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, + monitor_work.work); + + netif_vdbg(efx, timer, efx->net_dev, + "hardware monitor executing on CPU %d\n", + raw_smp_processor_id()); + BUG_ON(efx->type->monitor == NULL); + + /* If the mac_lock is already held then it is likely a port + * reconfiguration is already in place, which will likely do + * most of the work of monitor() anyway. + */ + if (mutex_trylock(&efx->mac_lock)) { + if (efx->port_enabled && efx->type->monitor) + efx->type->monitor(efx); + mutex_unlock(&efx->mac_lock); + } + + efx_start_monitor(efx); +} + +void efx_start_monitor(struct efx_nic *efx) +{ + if (efx->type->monitor) + queue_delayed_work(efx->workqueue, &efx->monitor_work, + efx_monitor_interval); +} + +/************************************************************************** + * + * Event queue processing + * + *************************************************************************/ + +/* Channels are shutdown and reinitialised whilst the NIC is running + * to propagate configuration changes (mtu, checksum offload), or + * to clear hardware error conditions + */ +static void efx_start_datapath(struct efx_nic *efx) +{ + netdev_features_t old_features = efx->net_dev->features; + bool old_rx_scatter = efx->rx_scatter; + size_t rx_buf_len; + + /* Calculate the rx buffer allocation parameters required to + * support the current MTU, including padding for header + * alignment and overruns. + */ + efx->rx_dma_len = (efx->rx_prefix_size + + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + + efx->type->rx_buffer_padding); + rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + + efx->rx_ip_align + efx->rx_dma_len); + if (rx_buf_len <= PAGE_SIZE) { + efx->rx_scatter = efx->type->always_rx_scatter; + efx->rx_buffer_order = 0; + } else if (efx->type->can_rx_scatter) { + BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); + BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + + 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, + EFX_RX_BUF_ALIGNMENT) > + PAGE_SIZE); + efx->rx_scatter = true; + efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; + efx->rx_buffer_order = 0; + } else { + efx->rx_scatter = false; + efx->rx_buffer_order = get_order(rx_buf_len); + } + + efx_rx_config_page_split(efx); + if (efx->rx_buffer_order) + netif_dbg(efx, drv, efx->net_dev, + "RX buf len=%u; page order=%u batch=%u\n", + efx->rx_dma_len, efx->rx_buffer_order, + efx->rx_pages_per_batch); + else + netif_dbg(efx, drv, efx->net_dev, + "RX buf len=%u step=%u bpp=%u; page batch=%u\n", + efx->rx_dma_len, efx->rx_page_buf_step, + efx->rx_bufs_per_page, efx->rx_pages_per_batch); + + /* Restore previously fixed features in hw_features and remove + * features which are fixed now + */ + efx->net_dev->hw_features |= efx->net_dev->features; + efx->net_dev->hw_features &= ~efx->fixed_features; + efx->net_dev->features |= efx->fixed_features; + if (efx->net_dev->features != old_features) + netdev_features_change(efx->net_dev); + + /* RX filters may also have scatter-enabled flags */ + if ((efx->rx_scatter != old_rx_scatter) && + efx->type->filter_update_rx_scatter) + efx->type->filter_update_rx_scatter(efx); + + /* We must keep at least one descriptor in a TX ring empty. + * We could avoid this when the queue size does not exactly + * match the hardware ring size, but it's not that important. + * Therefore we stop the queue when one more skb might fill + * the ring completely. We wake it when half way back to + * empty. + */ + efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); + efx->txq_wake_thresh = efx->txq_stop_thresh / 2; + + /* Initialise the channels */ + efx_start_channels(efx); + + efx_ptp_start_datapath(efx); + + if (netif_device_present(efx->net_dev)) + netif_tx_wake_all_queues(efx->net_dev); +} + +static void efx_stop_datapath(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->port_enabled); + + efx_ptp_stop_datapath(efx); + + efx_stop_channels(efx); +} + +/************************************************************************** + * + * Port handling + * + **************************************************************************/ + +static void efx_start_port(struct efx_nic *efx) +{ + netif_dbg(efx, ifup, efx->net_dev, "start port\n"); + BUG_ON(efx->port_enabled); + + mutex_lock(&efx->mac_lock); + efx->port_enabled = true; + + /* Ensure MAC ingress/egress is enabled */ + efx_mac_reconfigure(efx); + + mutex_unlock(&efx->mac_lock); +} + +/* Cancel work for MAC reconfiguration, periodic hardware monitoring + * and the async self-test, wait for them to finish and prevent them + * being scheduled again. This doesn't cover online resets, which + * should only be cancelled when removing the device. + */ +static void efx_stop_port(struct efx_nic *efx) +{ + netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); + + EFX_ASSERT_RESET_SERIALISED(efx); + + mutex_lock(&efx->mac_lock); + efx->port_enabled = false; + mutex_unlock(&efx->mac_lock); + + /* Serialise against efx_set_multicast_list() */ + netif_addr_lock_bh(efx->net_dev); + netif_addr_unlock_bh(efx->net_dev); + + cancel_delayed_work_sync(&efx->monitor_work); + efx_selftest_async_cancel(efx); + cancel_work_sync(&efx->mac_work); +} + +/* If the interface is supposed to be running but is not, start + * the hardware and software data path, regular activity for the port + * (MAC statistics, link polling, etc.) and schedule the port to be + * reconfigured. Interrupts must already be enabled. This function + * is safe to call multiple times, so long as the NIC is not disabled. + * Requires the RTNL lock. + */ +void efx_start_all(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->state == STATE_DISABLED); + + /* Check that it is appropriate to restart the interface. All + * of these flags are safe to read under just the rtnl lock + */ + if (efx->port_enabled || !netif_running(efx->net_dev) || + efx->reset_pending) + return; + + efx_start_port(efx); + efx_start_datapath(efx); + + /* Start the hardware monitor if there is one */ + efx_start_monitor(efx); + + /* Link state detection is normally event-driven; we have + * to poll now because we could have missed a change + */ + mutex_lock(&efx->mac_lock); + if (efx->phy_op->poll(efx)) + efx_link_status_changed(efx); + mutex_unlock(&efx->mac_lock); + + if (efx->type->start_stats) { + efx->type->start_stats(efx); + efx->type->pull_stats(efx); + spin_lock_bh(&efx->stats_lock); + efx->type->update_stats(efx, NULL, NULL); + spin_unlock_bh(&efx->stats_lock); + } +} + +/* Quiesce the hardware and software data path, and regular activity + * for the port without bringing the link down. Safe to call multiple + * times with the NIC in almost any state, but interrupts should be + * enabled. Requires the RTNL lock. + */ +void efx_stop_all(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + + /* port_enabled can be read safely under the rtnl lock */ + if (!efx->port_enabled) + return; + + if (efx->type->update_stats) { + /* update stats before we go down so we can accurately count + * rx_nodesc_drops + */ + efx->type->pull_stats(efx); + spin_lock_bh(&efx->stats_lock); + efx->type->update_stats(efx, NULL, NULL); + spin_unlock_bh(&efx->stats_lock); + efx->type->stop_stats(efx); + } + + efx_stop_port(efx); + + /* Stop the kernel transmit interface. This is only valid if + * the device is stopped or detached; otherwise the watchdog + * may fire immediately. + */ + WARN_ON(netif_running(efx->net_dev) && + netif_device_present(efx->net_dev)); + netif_tx_disable(efx->net_dev); + + efx_stop_datapath(efx); +} + +/* Context: process, dev_base_lock or RTNL held, non-blocking. */ +void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + spin_lock_bh(&efx->stats_lock); + efx->type->update_stats(efx, NULL, stats); + spin_unlock_bh(&efx->stats_lock); +} + +/* Push loopback/power/transmit disable settings to the PHY, and reconfigure + * the MAC appropriately. All other PHY configuration changes are pushed + * through phy_op->set_settings(), and pushed asynchronously to the MAC + * through efx_monitor(). + * + * Callers must hold the mac_lock + */ +int __efx_reconfigure_port(struct efx_nic *efx) +{ + enum efx_phy_mode phy_mode; + int rc = 0; + + WARN_ON(!mutex_is_locked(&efx->mac_lock)); + + /* Disable PHY transmit in mac level loopbacks */ + phy_mode = efx->phy_mode; + if (LOOPBACK_INTERNAL(efx)) + efx->phy_mode |= PHY_MODE_TX_DISABLED; + else + efx->phy_mode &= ~PHY_MODE_TX_DISABLED; + + if (efx->type->reconfigure_port) + rc = efx->type->reconfigure_port(efx); + + if (rc) + efx->phy_mode = phy_mode; + + return rc; +} + +/* Reinitialise the MAC to pick up new PHY settings, even if the port is + * disabled. + */ +int efx_reconfigure_port(struct efx_nic *efx) +{ + int rc; + + EFX_ASSERT_RESET_SERIALISED(efx); + + mutex_lock(&efx->mac_lock); + rc = __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); + + return rc; +} + +/************************************************************************** + * + * Device reset and suspend + * + **************************************************************************/ + +static void efx_wait_for_bist_end(struct efx_nic *efx) +{ + int i; + + for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { + if (efx_mcdi_poll_reboot(efx)) + goto out; + msleep(BIST_WAIT_DELAY_MS); + } + + netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); +out: + /* Either way unset the BIST flag. If we found no reboot we probably + * won't recover, but we should try. + */ + efx->mc_bist_for_other_fn = false; +} + +/* Try recovery mechanisms. + * For now only EEH is supported. + * Returns 0 if the recovery mechanisms are unsuccessful. + * Returns a non-zero value otherwise. + */ +int efx_try_recovery(struct efx_nic *efx) +{ +#ifdef CONFIG_EEH + /* A PCI error can occur and not be seen by EEH because nothing + * happens on the PCI bus. In this case the driver may fail and + * schedule a 'recover or reset', leading to this recovery handler. + * Manually call the eeh failure check function. + */ + struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); + if (eeh_dev_check_failure(eehdev)) { + /* The EEH mechanisms will handle the error and reset the + * device if necessary. + */ + return 1; + } +#endif + return 0; +} + +/* Tears down the entire software state and most of the hardware state + * before reset. + */ +void efx_reset_down(struct efx_nic *efx, enum reset_type method) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + + if (method == RESET_TYPE_MCDI_TIMEOUT) + efx->type->prepare_flr(efx); + + efx_stop_all(efx); + efx_disable_interrupts(efx); + + mutex_lock(&efx->mac_lock); + down_write(&efx->filter_sem); + mutex_lock(&efx->rss_lock); + if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && + method != RESET_TYPE_DATAPATH) + efx->phy_op->fini(efx); + efx->type->fini(efx); +} + +/* This function will always ensure that the locks acquired in + * efx_reset_down() are released. A failure return code indicates + * that we were unable to reinitialise the hardware, and the + * driver should be disabled. If ok is false, then the rx and tx + * engines are not restarted, pending a RESET_DISABLE. + */ +int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) +{ + int rc; + + EFX_ASSERT_RESET_SERIALISED(efx); + + if (method == RESET_TYPE_MCDI_TIMEOUT) + efx->type->finish_flr(efx); + + /* Ensure that SRAM is initialised even if we're disabling the device */ + rc = efx->type->init(efx); + if (rc) { + netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); + goto fail; + } + + if (!ok) + goto fail; + + if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && + method != RESET_TYPE_DATAPATH) { + rc = efx->phy_op->init(efx); + if (rc) + goto fail; + rc = efx->phy_op->reconfigure(efx); + if (rc && rc != -EPERM) + netif_err(efx, drv, efx->net_dev, + "could not restore PHY settings\n"); + } + + rc = efx_enable_interrupts(efx); + if (rc) + goto fail; + +#ifdef CONFIG_SFC_SRIOV + rc = efx->type->vswitching_restore(efx); + if (rc) /* not fatal; the PF will still work fine */ + netif_warn(efx, probe, efx->net_dev, + "failed to restore vswitching rc=%d;" + " VFs may not function\n", rc); +#endif + + if (efx->type->rx_restore_rss_contexts) + efx->type->rx_restore_rss_contexts(efx); + mutex_unlock(&efx->rss_lock); + efx->type->filter_table_restore(efx); + up_write(&efx->filter_sem); + if (efx->type->sriov_reset) + efx->type->sriov_reset(efx); + + mutex_unlock(&efx->mac_lock); + + efx_start_all(efx); + + if (efx->type->udp_tnl_push_ports) + efx->type->udp_tnl_push_ports(efx); + + return 0; + +fail: + efx->port_initialized = false; + + mutex_unlock(&efx->rss_lock); + up_write(&efx->filter_sem); + mutex_unlock(&efx->mac_lock); + + return rc; +} + +/* Reset the NIC using the specified method. Note that the reset may + * fail, in which case the card will be left in an unusable state. + * + * Caller must hold the rtnl_lock. + */ +int efx_reset(struct efx_nic *efx, enum reset_type method) +{ + bool disabled; + int rc, rc2; + + netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", + RESET_TYPE(method)); + + efx_device_detach_sync(efx); + efx_reset_down(efx, method); + + rc = efx->type->reset(efx, method); + if (rc) { + netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); + goto out; + } + + /* Clear flags for the scopes we covered. We assume the NIC and + * driver are now quiescent so that there is no race here. + */ + if (method < RESET_TYPE_MAX_METHOD) + efx->reset_pending &= -(1 << (method + 1)); + else /* it doesn't fit into the well-ordered scope hierarchy */ + __clear_bit(method, &efx->reset_pending); + + /* Reinitialise bus-mastering, which may have been turned off before + * the reset was scheduled. This is still appropriate, even in the + * RESET_TYPE_DISABLE since this driver generally assumes the hardware + * can respond to requests. + */ + pci_set_master(efx->pci_dev); + +out: + /* Leave device stopped if necessary */ + disabled = rc || + method == RESET_TYPE_DISABLE || + method == RESET_TYPE_RECOVER_OR_DISABLE; + rc2 = efx_reset_up(efx, method, !disabled); + if (rc2) { + disabled = true; + if (!rc) + rc = rc2; + } + + if (disabled) { + dev_close(efx->net_dev); + netif_err(efx, drv, efx->net_dev, "has been disabled\n"); + efx->state = STATE_DISABLED; + } else { + netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); + efx_device_attach_if_not_resetting(efx); + } + return rc; +} + +/* The worker thread exists so that code that cannot sleep can + * schedule a reset for later. + */ +static void efx_reset_work(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); + unsigned long pending; + enum reset_type method; + + pending = READ_ONCE(efx->reset_pending); + method = fls(pending) - 1; + + if (method == RESET_TYPE_MC_BIST) + efx_wait_for_bist_end(efx); + + if ((method == RESET_TYPE_RECOVER_OR_DISABLE || + method == RESET_TYPE_RECOVER_OR_ALL) && + efx_try_recovery(efx)) + return; + + if (!pending) + return; + + rtnl_lock(); + + /* We checked the state in efx_schedule_reset() but it may + * have changed by now. Now that we have the RTNL lock, + * it cannot change again. + */ + if (efx->state == STATE_READY) + (void)efx_reset(efx, method); + + rtnl_unlock(); +} + +void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) +{ + enum reset_type method; + + if (efx->state == STATE_RECOVERY) { + netif_dbg(efx, drv, efx->net_dev, + "recovering: skip scheduling %s reset\n", + RESET_TYPE(type)); + return; + } + + switch (type) { + case RESET_TYPE_INVISIBLE: + case RESET_TYPE_ALL: + case RESET_TYPE_RECOVER_OR_ALL: + case RESET_TYPE_WORLD: + case RESET_TYPE_DISABLE: + case RESET_TYPE_RECOVER_OR_DISABLE: + case RESET_TYPE_DATAPATH: + case RESET_TYPE_MC_BIST: + case RESET_TYPE_MCDI_TIMEOUT: + method = type; + netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", + RESET_TYPE(method)); + break; + default: + method = efx->type->map_reset_reason(type); + netif_dbg(efx, drv, efx->net_dev, + "scheduling %s reset for %s\n", + RESET_TYPE(method), RESET_TYPE(type)); + break; + } + + set_bit(method, &efx->reset_pending); + smp_mb(); /* ensure we change reset_pending before checking state */ + + /* If we're not READY then just leave the flags set as the cue + * to abort probing or reschedule the reset later. + */ + if (READ_ONCE(efx->state) != STATE_READY) + return; + + /* efx_process_channel() will no longer read events once a + * reset is scheduled. So switch back to poll'd MCDI completions. + */ + efx_mcdi_mode_poll(efx); + + efx_queue_reset_work(efx); +} + +/************************************************************************** + * + * Dummy PHY/MAC operations + * + * Can be used for some unimplemented operations + * Needed so all function pointers are valid and do not have to be tested + * before use + * + **************************************************************************/ +int efx_port_dummy_op_int(struct efx_nic *efx) +{ + return 0; +} +void efx_port_dummy_op_void(struct efx_nic *efx) {} + +static bool efx_port_dummy_op_poll(struct efx_nic *efx) +{ + return false; +} + +static const struct efx_phy_operations efx_dummy_phy_operations = { + .init = efx_port_dummy_op_int, + .reconfigure = efx_port_dummy_op_int, + .poll = efx_port_dummy_op_poll, + .fini = efx_port_dummy_op_void, +}; + +/************************************************************************** + * + * Data housekeeping + * + **************************************************************************/ + +/* This zeroes out and then fills in the invariants in a struct + * efx_nic (including all sub-structures). + */ +int efx_init_struct(struct efx_nic *efx, + struct pci_dev *pci_dev, struct net_device *net_dev) +{ + int rc = -ENOMEM; + + /* Initialise common structures */ + INIT_LIST_HEAD(&efx->node); + INIT_LIST_HEAD(&efx->secondary_list); + spin_lock_init(&efx->biu_lock); +#ifdef CONFIG_SFC_MTD + INIT_LIST_HEAD(&efx->mtd_list); +#endif + INIT_WORK(&efx->reset_work, efx_reset_work); + INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); + efx_selftest_async_init(efx); + efx->pci_dev = pci_dev; + efx->msg_enable = debug; + efx->state = STATE_UNINIT; + strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); + + efx->net_dev = net_dev; + efx->rx_prefix_size = efx->type->rx_prefix_size; + efx->rx_ip_align = + NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; + efx->rx_packet_hash_offset = + efx->type->rx_hash_offset - efx->type->rx_prefix_size; + efx->rx_packet_ts_offset = + efx->type->rx_ts_offset - efx->type->rx_prefix_size; + INIT_LIST_HEAD(&efx->rss_context.list); + mutex_init(&efx->rss_lock); + spin_lock_init(&efx->stats_lock); + efx->vi_stride = EFX_DEFAULT_VI_STRIDE; + efx->num_mac_stats = MC_CMD_MAC_NSTATS; + BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); + mutex_init(&efx->mac_lock); +#ifdef CONFIG_RFS_ACCEL + mutex_init(&efx->rps_mutex); + spin_lock_init(&efx->rps_hash_lock); + /* Failure to allocate is not fatal, but may degrade ARFS performance */ + efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, + sizeof(*efx->rps_hash_table), GFP_KERNEL); +#endif + efx->phy_op = &efx_dummy_phy_operations; + efx->mdio.dev = net_dev; + INIT_WORK(&efx->mac_work, efx_mac_work); + init_waitqueue_head(&efx->flush_wq); + + rc = efx_init_channels(efx); + if (rc) + goto fail; + + /* Would be good to use the net_dev name, but we're too early */ + snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", + pci_name(pci_dev)); + efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); + if (!efx->workqueue) { + rc = -ENOMEM; + goto fail; + } + + return 0; + +fail: + efx_fini_struct(efx); + return rc; +} + +void efx_fini_struct(struct efx_nic *efx) +{ +#ifdef CONFIG_RFS_ACCEL + kfree(efx->rps_hash_table); +#endif + + efx_fini_channels(efx); + + kfree(efx->vpd_sn); + + if (efx->workqueue) { + destroy_workqueue(efx->workqueue); + efx->workqueue = NULL; + } +} + +/* This configures the PCI device to enable I/O and DMA. */ +int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, + unsigned int mem_map_size) +{ + struct pci_dev *pci_dev = efx->pci_dev; + int rc; + + netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); + + rc = pci_enable_device(pci_dev); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "failed to enable PCI device\n"); + goto fail1; + } + + pci_set_master(pci_dev); + + /* Set the PCI DMA mask. Try all possibilities from our + * genuine mask down to 32 bits, because some architectures + * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit + * masks event though they reject 46 bit masks. + */ + while (dma_mask > 0x7fffffffUL) { + rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); + if (rc == 0) + break; + dma_mask >>= 1; + } + if (rc) { + netif_err(efx, probe, efx->net_dev, + "could not find a suitable DMA mask\n"); + goto fail2; + } + netif_dbg(efx, probe, efx->net_dev, + "using DMA mask %llx\n", (unsigned long long)dma_mask); + + efx->membase_phys = pci_resource_start(efx->pci_dev, bar); + if (!efx->membase_phys) { + netif_err(efx, probe, efx->net_dev, + "ERROR: No BAR%d mapping from the BIOS. " + "Try pci=realloc on the kernel command line\n", bar); + rc = -ENODEV; + goto fail3; + } + + rc = pci_request_region(pci_dev, bar, "sfc"); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "request for memory BAR failed\n"); + rc = -EIO; + goto fail3; + } + + efx->membase = ioremap(efx->membase_phys, mem_map_size); + if (!efx->membase) { + netif_err(efx, probe, efx->net_dev, + "could not map memory BAR at %llx+%x\n", + (unsigned long long)efx->membase_phys, mem_map_size); + rc = -ENOMEM; + goto fail4; + } + netif_dbg(efx, probe, efx->net_dev, + "memory BAR at %llx+%x (virtual %p)\n", + (unsigned long long)efx->membase_phys, mem_map_size, + efx->membase); + + return 0; + +fail4: + pci_release_region(efx->pci_dev, bar); +fail3: + efx->membase_phys = 0; +fail2: + pci_disable_device(efx->pci_dev); +fail1: + return rc; +} + +void efx_fini_io(struct efx_nic *efx, int bar) +{ + netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); + + if (efx->membase) { + iounmap(efx->membase); + efx->membase = NULL; + } + + if (efx->membase_phys) { + pci_release_region(efx->pci_dev, bar); + efx->membase_phys = 0; + } + + /* Don't disable bus-mastering if VFs are assigned */ + if (!pci_vfs_assigned(efx->pci_dev)) + pci_disable_device(efx->pci_dev); +} + +#ifdef CONFIG_SFC_MCDI_LOGGING +static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct efx_nic *efx = dev_get_drvdata(dev); + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); +} + +static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct efx_nic *efx = dev_get_drvdata(dev); + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + bool enable = count > 0 && *buf != '0'; + + mcdi->logging_enabled = enable; + return count; +} + +static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log); + +void efx_init_mcdi_logging(struct efx_nic *efx) +{ + int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); + + if (rc) { + netif_warn(efx, drv, efx->net_dev, + "failed to init net dev attributes\n"); + } +} + +void efx_fini_mcdi_logging(struct efx_nic *efx) +{ + device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); +} +#endif diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h new file mode 100644 index 000000000000..fa2fc681e7f9 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_common.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_COMMON_H +#define EFX_COMMON_H + +int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, + unsigned int mem_map_size); +void efx_fini_io(struct efx_nic *efx, int bar); +int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, + struct net_device *net_dev); +void efx_fini_struct(struct efx_nic *efx); + +void efx_start_all(struct efx_nic *efx); +void efx_stop_all(struct efx_nic *efx); + +void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats); + +int efx_create_reset_workqueue(void); +void efx_queue_reset_work(struct efx_nic *efx); +void efx_flush_reset_workqueue(struct efx_nic *efx); +void efx_destroy_reset_workqueue(void); + +void efx_start_monitor(struct efx_nic *efx); + +int __efx_reconfigure_port(struct efx_nic *efx); +int efx_reconfigure_port(struct efx_nic *efx); + +#define EFX_ASSERT_RESET_SERIALISED(efx) \ + do { \ + if ((efx->state == STATE_READY) || \ + (efx->state == STATE_RECOVERY) || \ + (efx->state == STATE_DISABLED)) \ + ASSERT_RTNL(); \ + } while (0) + +int efx_try_recovery(struct efx_nic *efx); +void efx_reset_down(struct efx_nic *efx, enum reset_type method); +int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok); +int efx_reset(struct efx_nic *efx, enum reset_type method); +void efx_schedule_reset(struct efx_nic *efx, enum reset_type type); + +static inline int efx_check_disabled(struct efx_nic *efx) +{ + if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { + netif_err(efx, drv, efx->net_dev, + "device is disabled due to earlier errors\n"); + return -EIO; + } + return 0; +} + +#ifdef CONFIG_SFC_MCDI_LOGGING +void efx_init_mcdi_logging(struct efx_nic *efx); +void efx_fini_mcdi_logging(struct efx_nic *efx); +#else +static inline void efx_init_mcdi_logging(struct efx_nic *efx) {} +static inline void efx_fini_mcdi_logging(struct efx_nic *efx) {} +#endif + +void efx_mac_reconfigure(struct efx_nic *efx); +void efx_link_status_changed(struct efx_nic *efx); +unsigned int efx_xdp_max_mtu(struct efx_nic *efx); +int efx_change_mtu(struct net_device *net_dev, int new_mtu); + +#endif diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index b31032da4bcb..993b5769525b 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -13,92 +13,13 @@ #include "workarounds.h" #include "selftest.h" #include "efx.h" +#include "efx_channels.h" +#include "rx_common.h" +#include "tx_common.h" +#include "ethtool_common.h" #include "filter.h" #include "nic.h" -struct efx_sw_stat_desc { - const char *name; - enum { - EFX_ETHTOOL_STAT_SOURCE_nic, - EFX_ETHTOOL_STAT_SOURCE_channel, - EFX_ETHTOOL_STAT_SOURCE_tx_queue - } source; - unsigned offset; - u64(*get_stat) (void *field); /* Reader function */ -}; - -/* Initialiser for a struct efx_sw_stat_desc with type-checking */ -#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \ - get_stat_function) { \ - .name = #stat_name, \ - .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \ - .offset = ((((field_type *) 0) == \ - &((struct efx_##source_name *)0)->field) ? \ - offsetof(struct efx_##source_name, field) : \ - offsetof(struct efx_##source_name, field)), \ - .get_stat = get_stat_function, \ -} - -static u64 efx_get_uint_stat(void *field) -{ - return *(unsigned int *)field; -} - -static u64 efx_get_atomic_stat(void *field) -{ - return atomic_read((atomic_t *) field); -} - -#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \ - EFX_ETHTOOL_STAT(field, nic, field, \ - atomic_t, efx_get_atomic_stat) - -#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \ - EFX_ETHTOOL_STAT(field, channel, n_##field, \ - unsigned int, efx_get_uint_stat) -#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \ - EFX_ETHTOOL_STAT(field, channel, field, \ - unsigned int, efx_get_uint_stat) - -#define EFX_ETHTOOL_UINT_TXQ_STAT(field) \ - EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \ - unsigned int, efx_get_uint_stat) - -static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { - EFX_ETHTOOL_UINT_TXQ_STAT(merge_events), - EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts), - EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers), - EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets), - EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks), - EFX_ETHTOOL_UINT_TXQ_STAT(pushes), - EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets), - EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets), - EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect), -#ifdef CONFIG_RFS_ACCEL - EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded), - EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed), -#endif -}; - -#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) - #define EFX_ETHTOOL_EEPROM_MAGIC 0xEFAB /************************************************************************** @@ -185,18 +106,6 @@ efx_ethtool_set_link_ksettings(struct net_device *net_dev, return rc; } -static void efx_ethtool_get_drvinfo(struct net_device *net_dev, - struct ethtool_drvinfo *info) -{ - struct efx_nic *efx = netdev_priv(net_dev); - - strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); - strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version)); - efx_mcdi_print_fwver(efx, info->fw_version, - sizeof(info->fw_version)); - strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info)); -} - static int efx_ethtool_get_regs_len(struct net_device *net_dev) { return efx_nic_get_regs_len(netdev_priv(net_dev)); @@ -211,341 +120,6 @@ static void efx_ethtool_get_regs(struct net_device *net_dev, efx_nic_get_regs(efx, buf); } -static u32 efx_ethtool_get_msglevel(struct net_device *net_dev) -{ - struct efx_nic *efx = netdev_priv(net_dev); - return efx->msg_enable; -} - -static void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable) -{ - struct efx_nic *efx = netdev_priv(net_dev); - efx->msg_enable = msg_enable; -} - -/** - * efx_fill_test - fill in an individual self-test entry - * @test_index: Index of the test - * @strings: Ethtool strings, or %NULL - * @data: Ethtool test results, or %NULL - * @test: Pointer to test result (used only if data != %NULL) - * @unit_format: Unit name format (e.g. "chan\%d") - * @unit_id: Unit id (e.g. 0 for "chan0") - * @test_format: Test name format (e.g. "loopback.\%s.tx.sent") - * @test_id: Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent") - * - * Fill in an individual self-test entry. - */ -static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data, - int *test, const char *unit_format, int unit_id, - const char *test_format, const char *test_id) -{ - char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN]; - - /* Fill data value, if applicable */ - if (data) - data[test_index] = *test; - - /* Fill string, if applicable */ - if (strings) { - if (strchr(unit_format, '%')) - snprintf(unit_str, sizeof(unit_str), - unit_format, unit_id); - else - strcpy(unit_str, unit_format); - snprintf(test_str, sizeof(test_str), test_format, test_id); - snprintf(strings + test_index * ETH_GSTRING_LEN, - ETH_GSTRING_LEN, - "%-6s %-24s", unit_str, test_str); - } -} - -#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel -#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue -#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue -#define EFX_LOOPBACK_NAME(_mode, _counter) \ - "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode) - -/** - * efx_fill_loopback_test - fill in a block of loopback self-test entries - * @efx: Efx NIC - * @lb_tests: Efx loopback self-test results structure - * @mode: Loopback test mode - * @test_index: Starting index of the test - * @strings: Ethtool strings, or %NULL - * @data: Ethtool test results, or %NULL - * - * Fill in a block of loopback self-test entries. Return new test - * index. - */ -static int efx_fill_loopback_test(struct efx_nic *efx, - struct efx_loopback_self_tests *lb_tests, - enum efx_loopback_mode mode, - unsigned int test_index, - u8 *strings, u64 *data) -{ - struct efx_channel *channel = - efx_get_channel(efx, efx->tx_channel_offset); - struct efx_tx_queue *tx_queue; - - efx_for_each_channel_tx_queue(tx_queue, channel) { - efx_fill_test(test_index++, strings, data, - &lb_tests->tx_sent[tx_queue->queue], - EFX_TX_QUEUE_NAME(tx_queue), - EFX_LOOPBACK_NAME(mode, "tx_sent")); - efx_fill_test(test_index++, strings, data, - &lb_tests->tx_done[tx_queue->queue], - EFX_TX_QUEUE_NAME(tx_queue), - EFX_LOOPBACK_NAME(mode, "tx_done")); - } - efx_fill_test(test_index++, strings, data, - &lb_tests->rx_good, - "rx", 0, - EFX_LOOPBACK_NAME(mode, "rx_good")); - efx_fill_test(test_index++, strings, data, - &lb_tests->rx_bad, - "rx", 0, - EFX_LOOPBACK_NAME(mode, "rx_bad")); - - return test_index; -} - -/** - * efx_ethtool_fill_self_tests - get self-test details - * @efx: Efx NIC - * @tests: Efx self-test results structure, or %NULL - * @strings: Ethtool strings, or %NULL - * @data: Ethtool test results, or %NULL - * - * Get self-test number of strings, strings, and/or test results. - * Return number of strings (== number of test results). - * - * The reason for merging these three functions is to make sure that - * they can never be inconsistent. - */ -static int efx_ethtool_fill_self_tests(struct efx_nic *efx, - struct efx_self_tests *tests, - u8 *strings, u64 *data) -{ - struct efx_channel *channel; - unsigned int n = 0, i; - enum efx_loopback_mode mode; - - efx_fill_test(n++, strings, data, &tests->phy_alive, - "phy", 0, "alive", NULL); - efx_fill_test(n++, strings, data, &tests->nvram, - "core", 0, "nvram", NULL); - efx_fill_test(n++, strings, data, &tests->interrupt, - "core", 0, "interrupt", NULL); - - /* Event queues */ - efx_for_each_channel(channel, efx) { - efx_fill_test(n++, strings, data, - &tests->eventq_dma[channel->channel], - EFX_CHANNEL_NAME(channel), - "eventq.dma", NULL); - efx_fill_test(n++, strings, data, - &tests->eventq_int[channel->channel], - EFX_CHANNEL_NAME(channel), - "eventq.int", NULL); - } - - efx_fill_test(n++, strings, data, &tests->memory, - "core", 0, "memory", NULL); - efx_fill_test(n++, strings, data, &tests->registers, - "core", 0, "registers", NULL); - - if (efx->phy_op->run_tests != NULL) { - EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL); - - for (i = 0; true; ++i) { - const char *name; - - EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS); - name = efx->phy_op->test_name(efx, i); - if (name == NULL) - break; - - efx_fill_test(n++, strings, data, &tests->phy_ext[i], - "phy", 0, name, NULL); - } - } - - /* Loopback tests */ - for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) { - if (!(efx->loopback_modes & (1 << mode))) - continue; - n = efx_fill_loopback_test(efx, - &tests->loopback[mode], mode, n, - strings, data); - } - - return n; -} - -static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings) -{ - size_t n_stats = 0; - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) { - if (efx_channel_has_tx_queues(channel)) { - n_stats++; - if (strings != NULL) { - snprintf(strings, ETH_GSTRING_LEN, - "tx-%u.tx_packets", - channel->tx_queue[0].queue / - EFX_TXQ_TYPES); - - strings += ETH_GSTRING_LEN; - } - } - } - efx_for_each_channel(channel, efx) { - if (efx_channel_has_rx_queue(channel)) { - n_stats++; - if (strings != NULL) { - snprintf(strings, ETH_GSTRING_LEN, - "rx-%d.rx_packets", channel->channel); - strings += ETH_GSTRING_LEN; - } - } - } - if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { - unsigned short xdp; - - for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { - n_stats++; - if (strings) { - snprintf(strings, ETH_GSTRING_LEN, - "tx-xdp-cpu-%hu.tx_packets", xdp); - strings += ETH_GSTRING_LEN; - } - } - } - - return n_stats; -} - -static int efx_ethtool_get_sset_count(struct net_device *net_dev, - int string_set) -{ - struct efx_nic *efx = netdev_priv(net_dev); - - switch (string_set) { - case ETH_SS_STATS: - return efx->type->describe_stats(efx, NULL) + - EFX_ETHTOOL_SW_STAT_COUNT + - efx_describe_per_queue_stats(efx, NULL) + - efx_ptp_describe_stats(efx, NULL); - case ETH_SS_TEST: - return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL); - default: - return -EINVAL; - } -} - -static void efx_ethtool_get_strings(struct net_device *net_dev, - u32 string_set, u8 *strings) -{ - struct efx_nic *efx = netdev_priv(net_dev); - int i; - - switch (string_set) { - case ETH_SS_STATS: - strings += (efx->type->describe_stats(efx, strings) * - ETH_GSTRING_LEN); - for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) - strlcpy(strings + i * ETH_GSTRING_LEN, - efx_sw_stat_desc[i].name, ETH_GSTRING_LEN); - strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN; - strings += (efx_describe_per_queue_stats(efx, strings) * - ETH_GSTRING_LEN); - efx_ptp_describe_stats(efx, strings); - break; - case ETH_SS_TEST: - efx_ethtool_fill_self_tests(efx, NULL, strings, NULL); - break; - default: - /* No other string sets */ - break; - } -} - -static void efx_ethtool_get_stats(struct net_device *net_dev, - struct ethtool_stats *stats, - u64 *data) -{ - struct efx_nic *efx = netdev_priv(net_dev); - const struct efx_sw_stat_desc *stat; - struct efx_channel *channel; - struct efx_tx_queue *tx_queue; - struct efx_rx_queue *rx_queue; - int i; - - spin_lock_bh(&efx->stats_lock); - - /* Get NIC statistics */ - data += efx->type->update_stats(efx, data, NULL); - - /* Get software statistics */ - for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) { - stat = &efx_sw_stat_desc[i]; - switch (stat->source) { - case EFX_ETHTOOL_STAT_SOURCE_nic: - data[i] = stat->get_stat((void *)efx + stat->offset); - break; - case EFX_ETHTOOL_STAT_SOURCE_channel: - data[i] = 0; - efx_for_each_channel(channel, efx) - data[i] += stat->get_stat((void *)channel + - stat->offset); - break; - case EFX_ETHTOOL_STAT_SOURCE_tx_queue: - data[i] = 0; - efx_for_each_channel(channel, efx) { - efx_for_each_channel_tx_queue(tx_queue, channel) - data[i] += - stat->get_stat((void *)tx_queue - + stat->offset); - } - break; - } - } - data += EFX_ETHTOOL_SW_STAT_COUNT; - - spin_unlock_bh(&efx->stats_lock); - - efx_for_each_channel(channel, efx) { - if (efx_channel_has_tx_queues(channel)) { - *data = 0; - efx_for_each_channel_tx_queue(tx_queue, channel) { - *data += tx_queue->tx_packets; - } - data++; - } - } - efx_for_each_channel(channel, efx) { - if (efx_channel_has_rx_queue(channel)) { - *data = 0; - efx_for_each_channel_rx_queue(rx_queue, channel) { - *data += rx_queue->rx_packets; - } - data++; - } - } - if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { - int xdp; - - for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { - data[0] = efx->xdp_tx_queues[xdp]->tx_packets; - data++; - } - } - - efx_ptp_update_stats(efx, data); -} - static void efx_ethtool_self_test(struct net_device *net_dev, struct ethtool_test *test, u64 *data) { @@ -787,16 +361,6 @@ out: return rc; } -static void efx_ethtool_get_pauseparam(struct net_device *net_dev, - struct ethtool_pauseparam *pause) -{ - struct efx_nic *efx = netdev_priv(net_dev); - - pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX); - pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX); - pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO); -} - static void efx_ethtool_get_wol(struct net_device *net_dev, struct ethtool_wolinfo *wol) { @@ -1456,7 +1020,7 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev, rc = -ENOMEM; goto out_unlock; } - ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID; + ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* Initialise indir table and key to defaults */ efx_set_default_rx_indir_table(efx, ctx); netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key)); diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c new file mode 100644 index 000000000000..b8d281ab6c7a --- /dev/null +++ b/drivers/net/ethernet/sfc/ethtool_common.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#include <linux/module.h> +#include <linux/netdevice.h> +#include "net_driver.h" +#include "mcdi.h" +#include "nic.h" +#include "selftest.h" +#include "ethtool_common.h" + +struct efx_sw_stat_desc { + const char *name; + enum { + EFX_ETHTOOL_STAT_SOURCE_nic, + EFX_ETHTOOL_STAT_SOURCE_channel, + EFX_ETHTOOL_STAT_SOURCE_tx_queue + } source; + unsigned int offset; + u64 (*get_stat)(void *field); /* Reader function */ +}; + +/* Initialiser for a struct efx_sw_stat_desc with type-checking */ +#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \ + get_stat_function) { \ + .name = #stat_name, \ + .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \ + .offset = ((((field_type *) 0) == \ + &((struct efx_##source_name *)0)->field) ? \ + offsetof(struct efx_##source_name, field) : \ + offsetof(struct efx_##source_name, field)), \ + .get_stat = get_stat_function, \ +} + +static u64 efx_get_uint_stat(void *field) +{ + return *(unsigned int *)field; +} + +static u64 efx_get_atomic_stat(void *field) +{ + return atomic_read((atomic_t *) field); +} + +#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \ + EFX_ETHTOOL_STAT(field, nic, field, \ + atomic_t, efx_get_atomic_stat) + +#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \ + EFX_ETHTOOL_STAT(field, channel, n_##field, \ + unsigned int, efx_get_uint_stat) +#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \ + EFX_ETHTOOL_STAT(field, channel, field, \ + unsigned int, efx_get_uint_stat) + +#define EFX_ETHTOOL_UINT_TXQ_STAT(field) \ + EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \ + unsigned int, efx_get_uint_stat) + +static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { + EFX_ETHTOOL_UINT_TXQ_STAT(merge_events), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks), + EFX_ETHTOOL_UINT_TXQ_STAT(pushes), + EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets), + EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets), + EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect), +#ifdef CONFIG_RFS_ACCEL + EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed), +#endif +}; + +#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) + +void efx_ethtool_get_drvinfo(struct net_device *net_dev, + struct ethtool_drvinfo *info) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version)); + efx_mcdi_print_fwver(efx, info->fw_version, + sizeof(info->fw_version)); + strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info)); +} + +u32 efx_ethtool_get_msglevel(struct net_device *net_dev) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + return efx->msg_enable; +} + +void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + efx->msg_enable = msg_enable; +} + +void efx_ethtool_get_pauseparam(struct net_device *net_dev, + struct ethtool_pauseparam *pause) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX); + pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX); + pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO); +} + +/** + * efx_fill_test - fill in an individual self-test entry + * @test_index: Index of the test + * @strings: Ethtool strings, or %NULL + * @data: Ethtool test results, or %NULL + * @test: Pointer to test result (used only if data != %NULL) + * @unit_format: Unit name format (e.g. "chan\%d") + * @unit_id: Unit id (e.g. 0 for "chan0") + * @test_format: Test name format (e.g. "loopback.\%s.tx.sent") + * @test_id: Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent") + * + * Fill in an individual self-test entry. + */ +static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data, + int *test, const char *unit_format, int unit_id, + const char *test_format, const char *test_id) +{ + char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN]; + + /* Fill data value, if applicable */ + if (data) + data[test_index] = *test; + + /* Fill string, if applicable */ + if (strings) { + if (strchr(unit_format, '%')) + snprintf(unit_str, sizeof(unit_str), + unit_format, unit_id); + else + strcpy(unit_str, unit_format); + snprintf(test_str, sizeof(test_str), test_format, test_id); + snprintf(strings + test_index * ETH_GSTRING_LEN, + ETH_GSTRING_LEN, + "%-6s %-24s", unit_str, test_str); + } +} + +#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel +#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue +#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue +#define EFX_LOOPBACK_NAME(_mode, _counter) \ + "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode) + +/** + * efx_fill_loopback_test - fill in a block of loopback self-test entries + * @efx: Efx NIC + * @lb_tests: Efx loopback self-test results structure + * @mode: Loopback test mode + * @test_index: Starting index of the test + * @strings: Ethtool strings, or %NULL + * @data: Ethtool test results, or %NULL + * + * Fill in a block of loopback self-test entries. Return new test + * index. + */ +static int efx_fill_loopback_test(struct efx_nic *efx, + struct efx_loopback_self_tests *lb_tests, + enum efx_loopback_mode mode, + unsigned int test_index, + u8 *strings, u64 *data) +{ + struct efx_channel *channel = + efx_get_channel(efx, efx->tx_channel_offset); + struct efx_tx_queue *tx_queue; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + efx_fill_test(test_index++, strings, data, + &lb_tests->tx_sent[tx_queue->queue], + EFX_TX_QUEUE_NAME(tx_queue), + EFX_LOOPBACK_NAME(mode, "tx_sent")); + efx_fill_test(test_index++, strings, data, + &lb_tests->tx_done[tx_queue->queue], + EFX_TX_QUEUE_NAME(tx_queue), + EFX_LOOPBACK_NAME(mode, "tx_done")); + } + efx_fill_test(test_index++, strings, data, + &lb_tests->rx_good, + "rx", 0, + EFX_LOOPBACK_NAME(mode, "rx_good")); + efx_fill_test(test_index++, strings, data, + &lb_tests->rx_bad, + "rx", 0, + EFX_LOOPBACK_NAME(mode, "rx_bad")); + + return test_index; +} + +/** + * efx_ethtool_fill_self_tests - get self-test details + * @efx: Efx NIC + * @tests: Efx self-test results structure, or %NULL + * @strings: Ethtool strings, or %NULL + * @data: Ethtool test results, or %NULL + * + * Get self-test number of strings, strings, and/or test results. + * Return number of strings (== number of test results). + * + * The reason for merging these three functions is to make sure that + * they can never be inconsistent. + */ +int efx_ethtool_fill_self_tests(struct efx_nic *efx, + struct efx_self_tests *tests, + u8 *strings, u64 *data) +{ + struct efx_channel *channel; + unsigned int n = 0, i; + enum efx_loopback_mode mode; + + efx_fill_test(n++, strings, data, &tests->phy_alive, + "phy", 0, "alive", NULL); + efx_fill_test(n++, strings, data, &tests->nvram, + "core", 0, "nvram", NULL); + efx_fill_test(n++, strings, data, &tests->interrupt, + "core", 0, "interrupt", NULL); + + /* Event queues */ + efx_for_each_channel(channel, efx) { + efx_fill_test(n++, strings, data, + &tests->eventq_dma[channel->channel], + EFX_CHANNEL_NAME(channel), + "eventq.dma", NULL); + efx_fill_test(n++, strings, data, + &tests->eventq_int[channel->channel], + EFX_CHANNEL_NAME(channel), + "eventq.int", NULL); + } + + efx_fill_test(n++, strings, data, &tests->memory, + "core", 0, "memory", NULL); + efx_fill_test(n++, strings, data, &tests->registers, + "core", 0, "registers", NULL); + + if (efx->phy_op->run_tests != NULL) { + EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL); + + for (i = 0; true; ++i) { + const char *name; + + EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS); + name = efx->phy_op->test_name(efx, i); + if (name == NULL) + break; + + efx_fill_test(n++, strings, data, &tests->phy_ext[i], + "phy", 0, name, NULL); + } + } + + /* Loopback tests */ + for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) { + if (!(efx->loopback_modes & (1 << mode))) + continue; + n = efx_fill_loopback_test(efx, + &tests->loopback[mode], mode, n, + strings, data); + } + + return n; +} + +static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings) +{ + size_t n_stats = 0; + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) { + if (efx_channel_has_tx_queues(channel)) { + n_stats++; + if (strings != NULL) { + snprintf(strings, ETH_GSTRING_LEN, + "tx-%u.tx_packets", + channel->tx_queue[0].queue / + EFX_TXQ_TYPES); + + strings += ETH_GSTRING_LEN; + } + } + } + efx_for_each_channel(channel, efx) { + if (efx_channel_has_rx_queue(channel)) { + n_stats++; + if (strings != NULL) { + snprintf(strings, ETH_GSTRING_LEN, + "rx-%d.rx_packets", channel->channel); + strings += ETH_GSTRING_LEN; + } + } + } + if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { + unsigned short xdp; + + for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { + n_stats++; + if (strings) { + snprintf(strings, ETH_GSTRING_LEN, + "tx-xdp-cpu-%hu.tx_packets", xdp); + strings += ETH_GSTRING_LEN; + } + } + } + + return n_stats; +} + +int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + switch (string_set) { + case ETH_SS_STATS: + return efx->type->describe_stats(efx, NULL) + + EFX_ETHTOOL_SW_STAT_COUNT + + efx_describe_per_queue_stats(efx, NULL) + + efx_ptp_describe_stats(efx, NULL); + case ETH_SS_TEST: + return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL); + default: + return -EINVAL; + } +} + +void efx_ethtool_get_strings(struct net_device *net_dev, + u32 string_set, u8 *strings) +{ + struct efx_nic *efx = netdev_priv(net_dev); + int i; + + switch (string_set) { + case ETH_SS_STATS: + strings += (efx->type->describe_stats(efx, strings) * + ETH_GSTRING_LEN); + for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) + strlcpy(strings + i * ETH_GSTRING_LEN, + efx_sw_stat_desc[i].name, ETH_GSTRING_LEN); + strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN; + strings += (efx_describe_per_queue_stats(efx, strings) * + ETH_GSTRING_LEN); + efx_ptp_describe_stats(efx, strings); + break; + case ETH_SS_TEST: + efx_ethtool_fill_self_tests(efx, NULL, strings, NULL); + break; + default: + /* No other string sets */ + break; + } +} + +void efx_ethtool_get_stats(struct net_device *net_dev, + struct ethtool_stats *stats, + u64 *data) +{ + struct efx_nic *efx = netdev_priv(net_dev); + const struct efx_sw_stat_desc *stat; + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int i; + + spin_lock_bh(&efx->stats_lock); + + /* Get NIC statistics */ + data += efx->type->update_stats(efx, data, NULL); + + /* Get software statistics */ + for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) { + stat = &efx_sw_stat_desc[i]; + switch (stat->source) { + case EFX_ETHTOOL_STAT_SOURCE_nic: + data[i] = stat->get_stat((void *)efx + stat->offset); + break; + case EFX_ETHTOOL_STAT_SOURCE_channel: + data[i] = 0; + efx_for_each_channel(channel, efx) + data[i] += stat->get_stat((void *)channel + + stat->offset); + break; + case EFX_ETHTOOL_STAT_SOURCE_tx_queue: + data[i] = 0; + efx_for_each_channel(channel, efx) { + efx_for_each_channel_tx_queue(tx_queue, channel) + data[i] += + stat->get_stat((void *)tx_queue + + stat->offset); + } + break; + } + } + data += EFX_ETHTOOL_SW_STAT_COUNT; + + spin_unlock_bh(&efx->stats_lock); + + efx_for_each_channel(channel, efx) { + if (efx_channel_has_tx_queues(channel)) { + *data = 0; + efx_for_each_channel_tx_queue(tx_queue, channel) { + *data += tx_queue->tx_packets; + } + data++; + } + } + efx_for_each_channel(channel, efx) { + if (efx_channel_has_rx_queue(channel)) { + *data = 0; + efx_for_each_channel_rx_queue(rx_queue, channel) { + *data += rx_queue->rx_packets; + } + data++; + } + } + if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { + int xdp; + + for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { + data[0] = efx->xdp_tx_queues[xdp]->tx_packets; + data++; + } + } + + efx_ptp_update_stats(efx, data); +} diff --git a/drivers/net/ethernet/sfc/ethtool_common.h b/drivers/net/ethernet/sfc/ethtool_common.h new file mode 100644 index 000000000000..fa624313f330 --- /dev/null +++ b/drivers/net/ethernet/sfc/ethtool_common.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_ETHTOOL_COMMON_H +#define EFX_ETHTOOL_COMMON_H + +void efx_ethtool_get_drvinfo(struct net_device *net_dev, + struct ethtool_drvinfo *info); +u32 efx_ethtool_get_msglevel(struct net_device *net_dev); +void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable); +void efx_ethtool_get_pauseparam(struct net_device *net_dev, + struct ethtool_pauseparam *pause); +int efx_ethtool_fill_self_tests(struct efx_nic *efx, + struct efx_self_tests *tests, + u8 *strings, u64 *data); +int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set); +void efx_ethtool_get_strings(struct net_device *net_dev, u32 string_set, + u8 *strings); +void efx_ethtool_get_stats(struct net_device *net_dev, + struct ethtool_stats *stats __attribute__ ((unused)), + u64 *data); + +#endif diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 53ae9faeb4c3..42bcd34fc508 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2108,7 +2108,7 @@ static void ef4_net_stats(struct net_device *net_dev, } /* Context: netif_tx_lock held, BHs disabled. */ -static void ef4_watchdog(struct net_device *net_dev) +static void ef4_watchdog(struct net_device *net_dev, unsigned int txqueue) { struct ef4_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index eedd32e2bfcb..dbbb898adddb 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -15,6 +15,7 @@ #include "net_driver.h" #include "bitfield.h" #include "efx.h" +#include "rx_common.h" #include "nic.h" #include "farch_regs.h" #include "sriov.h" diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 9081f84a2604..54a45010b576 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -346,11 +346,8 @@ int efx_mcdi_flush_rxqs(struct efx_nic *efx); int efx_mcdi_port_probe(struct efx_nic *efx); void efx_mcdi_port_remove(struct efx_nic *efx); int efx_mcdi_port_reconfigure(struct efx_nic *efx); -int efx_mcdi_port_get_number(struct efx_nic *efx); u32 efx_mcdi_phy_get_caps(struct efx_nic *efx); void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev); -int efx_mcdi_set_mac(struct efx_nic *efx); -#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1)) void efx_mcdi_mac_start_stats(struct efx_nic *efx); void efx_mcdi_mac_stop_stats(struct efx_nic *efx); void efx_mcdi_mac_pull_stats(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c new file mode 100644 index 000000000000..4310ae5bd898 --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_filters.c @@ -0,0 +1,2270 @@ +#include "mcdi_filters.h" +#include "mcdi.h" +#include "nic.h" +#include "rx_common.h" + +/* The maximum size of a shared RSS context */ +/* TODO: this should really be from the mcdi protocol export */ +#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL + +#define EFX_EF10_FILTER_ID_INVALID 0xffff + +/* An arbitrary search limit for the software hash table */ +#define EFX_EF10_FILTER_SEARCH_LIMIT 200 + +static struct efx_filter_spec * +efx_mcdi_filter_entry_spec(const struct efx_mcdi_filter_table *table, + unsigned int filter_idx) +{ + return (struct efx_filter_spec *)(table->entry[filter_idx].spec & + ~EFX_EF10_FILTER_FLAGS); +} + +static unsigned int +efx_mcdi_filter_entry_flags(const struct efx_mcdi_filter_table *table, + unsigned int filter_idx) +{ + return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS; +} + +static u32 efx_mcdi_filter_get_unsafe_id(u32 filter_id) +{ + WARN_ON_ONCE(filter_id == EFX_EF10_FILTER_ID_INVALID); + return filter_id & (EFX_MCDI_FILTER_TBL_ROWS - 1); +} + +static unsigned int efx_mcdi_filter_get_unsafe_pri(u32 filter_id) +{ + return filter_id / (EFX_MCDI_FILTER_TBL_ROWS * 2); +} + +static u32 efx_mcdi_filter_make_filter_id(unsigned int pri, u16 idx) +{ + return pri * EFX_MCDI_FILTER_TBL_ROWS * 2 + idx; +} + +/* + * Decide whether a filter should be exclusive or else should allow + * delivery to additional recipients. Currently we decide that + * filters for specific local unicast MAC and IP addresses are + * exclusive. + */ +static bool efx_mcdi_filter_is_exclusive(const struct efx_filter_spec *spec) +{ + if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC && + !is_multicast_ether_addr(spec->loc_mac)) + return true; + + if ((spec->match_flags & + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { + if (spec->ether_type == htons(ETH_P_IP) && + !ipv4_is_multicast(spec->loc_host[0])) + return true; + if (spec->ether_type == htons(ETH_P_IPV6) && + ((const u8 *)spec->loc_host)[0] != 0xff) + return true; + } + + return false; +} + +static void +efx_mcdi_filter_set_entry(struct efx_mcdi_filter_table *table, + unsigned int filter_idx, + const struct efx_filter_spec *spec, + unsigned int flags) +{ + table->entry[filter_idx].spec = (unsigned long)spec | flags; +} + +static void +efx_mcdi_filter_push_prep_set_match_fields(struct efx_nic *efx, + const struct efx_filter_spec *spec, + efx_dword_t *inbuf) +{ + enum efx_encap_type encap_type = efx_filter_get_encap_type(spec); + u32 match_fields = 0, uc_match, mc_match; + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + efx_mcdi_filter_is_exclusive(spec) ? + MC_CMD_FILTER_OP_IN_OP_INSERT : + MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE); + + /* + * Convert match flags and values. Unlike almost + * everything else in MCDI, these fields are in + * network byte order. + */ +#define COPY_VALUE(value, mcdi_field) \ + do { \ + match_fields |= \ + 1 << MC_CMD_FILTER_OP_IN_MATCH_ ## \ + mcdi_field ## _LBN; \ + BUILD_BUG_ON( \ + MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \ + sizeof(value)); \ + memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ## mcdi_field), \ + &value, sizeof(value)); \ + } while (0) +#define COPY_FIELD(gen_flag, gen_field, mcdi_field) \ + if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) { \ + COPY_VALUE(spec->gen_field, mcdi_field); \ + } + /* + * Handle encap filters first. They will always be mismatch + * (unknown UC or MC) filters + */ + if (encap_type) { + /* + * ether_type and outer_ip_proto need to be variables + * because COPY_VALUE wants to memcpy them + */ + __be16 ether_type = + htons(encap_type & EFX_ENCAP_FLAG_IPV6 ? + ETH_P_IPV6 : ETH_P_IP); + u8 vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE; + u8 outer_ip_proto; + + switch (encap_type & EFX_ENCAP_TYPES_MASK) { + case EFX_ENCAP_TYPE_VXLAN: + vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN; + /* fallthrough */ + case EFX_ENCAP_TYPE_GENEVE: + COPY_VALUE(ether_type, ETHER_TYPE); + outer_ip_proto = IPPROTO_UDP; + COPY_VALUE(outer_ip_proto, IP_PROTO); + /* + * We always need to set the type field, even + * though we're not matching on the TNI. + */ + MCDI_POPULATE_DWORD_1(inbuf, + FILTER_OP_EXT_IN_VNI_OR_VSID, + FILTER_OP_EXT_IN_VNI_TYPE, + vni_type); + break; + case EFX_ENCAP_TYPE_NVGRE: + COPY_VALUE(ether_type, ETHER_TYPE); + outer_ip_proto = IPPROTO_GRE; + COPY_VALUE(outer_ip_proto, IP_PROTO); + break; + default: + WARN_ON(1); + } + + uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN; + mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN; + } else { + uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN; + mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN; + } + + if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) + match_fields |= + is_multicast_ether_addr(spec->loc_mac) ? + 1 << mc_match : + 1 << uc_match; + COPY_FIELD(REM_HOST, rem_host, SRC_IP); + COPY_FIELD(LOC_HOST, loc_host, DST_IP); + COPY_FIELD(REM_MAC, rem_mac, SRC_MAC); + COPY_FIELD(REM_PORT, rem_port, SRC_PORT); + COPY_FIELD(LOC_MAC, loc_mac, DST_MAC); + COPY_FIELD(LOC_PORT, loc_port, DST_PORT); + COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE); + COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN); + COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN); + COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO); +#undef COPY_FIELD +#undef COPY_VALUE + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS, + match_fields); +} + +static void efx_mcdi_filter_push_prep(struct efx_nic *efx, + const struct efx_filter_spec *spec, + efx_dword_t *inbuf, u64 handle, + struct efx_rss_context *ctx, + bool replacing) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u32 flags = spec->flags; + + memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN); + + /* If RSS filter, caller better have given us an RSS context */ + if (flags & EFX_FILTER_FLAG_RX_RSS) { + /* + * We don't have the ability to return an error, so we'll just + * log a warning and disable RSS for the filter. + */ + if (WARN_ON_ONCE(!ctx)) + flags &= ~EFX_FILTER_FLAG_RX_RSS; + else if (WARN_ON_ONCE(ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID)) + flags &= ~EFX_FILTER_FLAG_RX_RSS; + } + + if (replacing) { + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + MC_CMD_FILTER_OP_IN_OP_REPLACE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle); + } else { + efx_mcdi_filter_push_prep_set_match_fields(efx, spec, inbuf); + } + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST, + spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ? + MC_CMD_FILTER_OP_IN_RX_DEST_DROP : + MC_CMD_FILTER_OP_IN_RX_DEST_HOST); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST, + MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE, + spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ? + 0 : spec->dmaq_id); + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE, + (flags & EFX_FILTER_FLAG_RX_RSS) ? + MC_CMD_FILTER_OP_IN_RX_MODE_RSS : + MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE); + if (flags & EFX_FILTER_FLAG_RX_RSS) + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id); +} + +static int efx_mcdi_filter_push(struct efx_nic *efx, + const struct efx_filter_spec *spec, u64 *handle, + struct efx_rss_context *ctx, bool replacing) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN); + size_t outlen; + int rc; + + efx_mcdi_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc && spec->priority != EFX_FILTER_PRI_HINT) + efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf), + outbuf, outlen, rc); + if (rc == 0) + *handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE); + if (rc == -ENOSPC) + rc = -EBUSY; /* to match efx_farch_filter_insert() */ + return rc; +} + +static u32 efx_mcdi_filter_mcdi_flags_from_spec(const struct efx_filter_spec *spec) +{ + enum efx_encap_type encap_type = efx_filter_get_encap_type(spec); + unsigned int match_flags = spec->match_flags; + unsigned int uc_match, mc_match; + u32 mcdi_flags = 0; + +#define MAP_FILTER_TO_MCDI_FLAG(gen_flag, mcdi_field, encap) { \ + unsigned int old_match_flags = match_flags; \ + match_flags &= ~EFX_FILTER_MATCH_ ## gen_flag; \ + if (match_flags != old_match_flags) \ + mcdi_flags |= \ + (1 << ((encap) ? \ + MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_ ## \ + mcdi_field ## _LBN : \ + MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##\ + mcdi_field ## _LBN)); \ + } + /* inner or outer based on encap type */ + MAP_FILTER_TO_MCDI_FLAG(REM_HOST, SRC_IP, encap_type); + MAP_FILTER_TO_MCDI_FLAG(LOC_HOST, DST_IP, encap_type); + MAP_FILTER_TO_MCDI_FLAG(REM_MAC, SRC_MAC, encap_type); + MAP_FILTER_TO_MCDI_FLAG(REM_PORT, SRC_PORT, encap_type); + MAP_FILTER_TO_MCDI_FLAG(LOC_MAC, DST_MAC, encap_type); + MAP_FILTER_TO_MCDI_FLAG(LOC_PORT, DST_PORT, encap_type); + MAP_FILTER_TO_MCDI_FLAG(ETHER_TYPE, ETHER_TYPE, encap_type); + MAP_FILTER_TO_MCDI_FLAG(IP_PROTO, IP_PROTO, encap_type); + /* always outer */ + MAP_FILTER_TO_MCDI_FLAG(INNER_VID, INNER_VLAN, false); + MAP_FILTER_TO_MCDI_FLAG(OUTER_VID, OUTER_VLAN, false); +#undef MAP_FILTER_TO_MCDI_FLAG + + /* special handling for encap type, and mismatch */ + if (encap_type) { + match_flags &= ~EFX_FILTER_MATCH_ENCAP_TYPE; + mcdi_flags |= + (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN); + mcdi_flags |= (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN); + + uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN; + mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN; + } else { + uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN; + mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN; + } + + if (match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) { + match_flags &= ~EFX_FILTER_MATCH_LOC_MAC_IG; + mcdi_flags |= + is_multicast_ether_addr(spec->loc_mac) ? + 1 << mc_match : + 1 << uc_match; + } + + /* Did we map them all? */ + WARN_ON_ONCE(match_flags); + + return mcdi_flags; +} + +static int efx_mcdi_filter_pri(struct efx_mcdi_filter_table *table, + const struct efx_filter_spec *spec) +{ + u32 mcdi_flags = efx_mcdi_filter_mcdi_flags_from_spec(spec); + unsigned int match_pri; + + for (match_pri = 0; + match_pri < table->rx_match_count; + match_pri++) + if (table->rx_match_mcdi_flags[match_pri] == mcdi_flags) + return match_pri; + + return -EPROTONOSUPPORT; +} + +static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx, + struct efx_filter_spec *spec, + bool replace_equal) +{ + DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_mcdi_filter_table *table; + struct efx_filter_spec *saved_spec; + struct efx_rss_context *ctx = NULL; + unsigned int match_pri, hash; + unsigned int priv_flags; + bool rss_locked = false; + bool replacing = false; + unsigned int depth, i; + int ins_index = -1; + DEFINE_WAIT(wait); + bool is_mc_recip; + s32 rc; + + WARN_ON(!rwsem_is_locked(&efx->filter_sem)); + table = efx->filter_state; + down_write(&table->lock); + + /* For now, only support RX filters */ + if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) != + EFX_FILTER_FLAG_RX) { + rc = -EINVAL; + goto out_unlock; + } + + rc = efx_mcdi_filter_pri(table, spec); + if (rc < 0) + goto out_unlock; + match_pri = rc; + + hash = efx_filter_spec_hash(spec); + is_mc_recip = efx_filter_is_mc_recipient(spec); + if (is_mc_recip) + bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); + + if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { + mutex_lock(&efx->rss_lock); + rss_locked = true; + if (spec->rss_context) + ctx = efx_find_rss_context_entry(efx, spec->rss_context); + else + ctx = &efx->rss_context; + if (!ctx) { + rc = -ENOENT; + goto out_unlock; + } + if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) { + rc = -EOPNOTSUPP; + goto out_unlock; + } + } + + /* Find any existing filters with the same match tuple or + * else a free slot to insert at. + */ + for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) { + i = (hash + depth) & (EFX_MCDI_FILTER_TBL_ROWS - 1); + saved_spec = efx_mcdi_filter_entry_spec(table, i); + + if (!saved_spec) { + if (ins_index < 0) + ins_index = i; + } else if (efx_filter_spec_equal(spec, saved_spec)) { + if (spec->priority < saved_spec->priority && + spec->priority != EFX_FILTER_PRI_AUTO) { + rc = -EPERM; + goto out_unlock; + } + if (!is_mc_recip) { + /* This is the only one */ + if (spec->priority == + saved_spec->priority && + !replace_equal) { + rc = -EEXIST; + goto out_unlock; + } + ins_index = i; + break; + } else if (spec->priority > + saved_spec->priority || + (spec->priority == + saved_spec->priority && + replace_equal)) { + if (ins_index < 0) + ins_index = i; + else + __set_bit(depth, mc_rem_map); + } + } + } + + /* Once we reach the maximum search depth, use the first suitable + * slot, or return -EBUSY if there was none + */ + if (ins_index < 0) { + rc = -EBUSY; + goto out_unlock; + } + + /* Create a software table entry if necessary. */ + saved_spec = efx_mcdi_filter_entry_spec(table, ins_index); + if (saved_spec) { + if (spec->priority == EFX_FILTER_PRI_AUTO && + saved_spec->priority >= EFX_FILTER_PRI_AUTO) { + /* Just make sure it won't be removed */ + if (saved_spec->priority > EFX_FILTER_PRI_AUTO) + saved_spec->flags |= EFX_FILTER_FLAG_RX_OVER_AUTO; + table->entry[ins_index].spec &= + ~EFX_EF10_FILTER_FLAG_AUTO_OLD; + rc = ins_index; + goto out_unlock; + } + replacing = true; + priv_flags = efx_mcdi_filter_entry_flags(table, ins_index); + } else { + saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC); + if (!saved_spec) { + rc = -ENOMEM; + goto out_unlock; + } + *saved_spec = *spec; + priv_flags = 0; + } + efx_mcdi_filter_set_entry(table, ins_index, saved_spec, priv_flags); + + /* Actually insert the filter on the HW */ + rc = efx_mcdi_filter_push(efx, spec, &table->entry[ins_index].handle, + ctx, replacing); + + if (rc == -EINVAL && nic_data->must_realloc_vis) + /* The MC rebooted under us, causing it to reject our filter + * insertion as pointing to an invalid VI (spec->dmaq_id). + */ + rc = -EAGAIN; + + /* Finalise the software table entry */ + if (rc == 0) { + if (replacing) { + /* Update the fields that may differ */ + if (saved_spec->priority == EFX_FILTER_PRI_AUTO) + saved_spec->flags |= + EFX_FILTER_FLAG_RX_OVER_AUTO; + saved_spec->priority = spec->priority; + saved_spec->flags &= EFX_FILTER_FLAG_RX_OVER_AUTO; + saved_spec->flags |= spec->flags; + saved_spec->rss_context = spec->rss_context; + saved_spec->dmaq_id = spec->dmaq_id; + } + } else if (!replacing) { + kfree(saved_spec); + saved_spec = NULL; + } else { + /* We failed to replace, so the old filter is still present. + * Roll back the software table to reflect this. In fact the + * efx_mcdi_filter_set_entry() call below will do the right + * thing, so nothing extra is needed here. + */ + } + efx_mcdi_filter_set_entry(table, ins_index, saved_spec, priv_flags); + + /* Remove and finalise entries for lower-priority multicast + * recipients + */ + if (is_mc_recip) { + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); + unsigned int depth, i; + + memset(inbuf, 0, sizeof(inbuf)); + + for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) { + if (!test_bit(depth, mc_rem_map)) + continue; + + i = (hash + depth) & (EFX_MCDI_FILTER_TBL_ROWS - 1); + saved_spec = efx_mcdi_filter_entry_spec(table, i); + priv_flags = efx_mcdi_filter_entry_flags(table, i); + + if (rc == 0) { + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, + table->entry[i].handle); + rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, + inbuf, sizeof(inbuf), + NULL, 0, NULL); + } + + if (rc == 0) { + kfree(saved_spec); + saved_spec = NULL; + priv_flags = 0; + } + efx_mcdi_filter_set_entry(table, i, saved_spec, + priv_flags); + } + } + + /* If successful, return the inserted filter ID */ + if (rc == 0) + rc = efx_mcdi_filter_make_filter_id(match_pri, ins_index); + +out_unlock: + if (rss_locked) + mutex_unlock(&efx->rss_lock); + up_write(&table->lock); + return rc; +} + +s32 efx_mcdi_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec, + bool replace_equal) +{ + s32 ret; + + down_read(&efx->filter_sem); + ret = efx_mcdi_filter_insert_locked(efx, spec, replace_equal); + up_read(&efx->filter_sem); + + return ret; +} + +/* + * Remove a filter. + * If !by_index, remove by ID + * If by_index, remove by index + * Filter ID may come from userland and must be range-checked. + * Caller must hold efx->filter_sem for read, and efx->filter_state->lock + * for write. + */ +static int efx_mcdi_filter_remove_internal(struct efx_nic *efx, + unsigned int priority_mask, + u32 filter_id, bool by_index) +{ + unsigned int filter_idx = efx_mcdi_filter_get_unsafe_id(filter_id); + struct efx_mcdi_filter_table *table = efx->filter_state; + MCDI_DECLARE_BUF(inbuf, + MC_CMD_FILTER_OP_IN_HANDLE_OFST + + MC_CMD_FILTER_OP_IN_HANDLE_LEN); + struct efx_filter_spec *spec; + DEFINE_WAIT(wait); + int rc; + + spec = efx_mcdi_filter_entry_spec(table, filter_idx); + if (!spec || + (!by_index && + efx_mcdi_filter_pri(table, spec) != + efx_mcdi_filter_get_unsafe_pri(filter_id))) + return -ENOENT; + + if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO && + priority_mask == (1U << EFX_FILTER_PRI_AUTO)) { + /* Just remove flags */ + spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO; + table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD; + return 0; + } + + if (!(priority_mask & (1U << spec->priority))) + return -ENOENT; + + if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) { + /* Reset to an automatic filter */ + + struct efx_filter_spec new_spec = *spec; + + new_spec.priority = EFX_FILTER_PRI_AUTO; + new_spec.flags = (EFX_FILTER_FLAG_RX | + (efx_rss_active(&efx->rss_context) ? + EFX_FILTER_FLAG_RX_RSS : 0)); + new_spec.dmaq_id = 0; + new_spec.rss_context = 0; + rc = efx_mcdi_filter_push(efx, &new_spec, + &table->entry[filter_idx].handle, + &efx->rss_context, + true); + + if (rc == 0) + *spec = new_spec; + } else { + /* Really remove the filter */ + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + efx_mcdi_filter_is_exclusive(spec) ? + MC_CMD_FILTER_OP_IN_OP_REMOVE : + MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, + table->entry[filter_idx].handle); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, + inbuf, sizeof(inbuf), NULL, 0, NULL); + + if ((rc == 0) || (rc == -ENOENT)) { + /* Filter removed OK or didn't actually exist */ + kfree(spec); + efx_mcdi_filter_set_entry(table, filter_idx, NULL, 0); + } else { + efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, + MC_CMD_FILTER_OP_EXT_IN_LEN, + NULL, 0, rc); + } + } + + return rc; +} + +/* Remove filters that weren't renewed. */ +static void efx_mcdi_filter_remove_old(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + int remove_failed = 0; + int remove_noent = 0; + int rc; + int i; + + down_write(&table->lock); + for (i = 0; i < EFX_MCDI_FILTER_TBL_ROWS; i++) { + if (READ_ONCE(table->entry[i].spec) & + EFX_EF10_FILTER_FLAG_AUTO_OLD) { + rc = efx_mcdi_filter_remove_internal(efx, + 1U << EFX_FILTER_PRI_AUTO, i, true); + if (rc == -ENOENT) + remove_noent++; + else if (rc) + remove_failed++; + } + } + up_write(&table->lock); + + if (remove_failed) + netif_info(efx, drv, efx->net_dev, + "%s: failed to remove %d filters\n", + __func__, remove_failed); + if (remove_noent) + netif_info(efx, drv, efx->net_dev, + "%s: failed to remove %d non-existent filters\n", + __func__, remove_noent); +} + +int efx_mcdi_filter_remove_safe(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id) +{ + struct efx_mcdi_filter_table *table; + int rc; + + down_read(&efx->filter_sem); + table = efx->filter_state; + down_write(&table->lock); + rc = efx_mcdi_filter_remove_internal(efx, 1U << priority, filter_id, + false); + up_write(&table->lock); + up_read(&efx->filter_sem); + return rc; +} + +/* Caller must hold efx->filter_sem for read */ +static void efx_mcdi_filter_remove_unsafe(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + + if (filter_id == EFX_EF10_FILTER_ID_INVALID) + return; + + down_write(&table->lock); + efx_mcdi_filter_remove_internal(efx, 1U << priority, filter_id, + true); + up_write(&table->lock); +} + +int efx_mcdi_filter_get_safe(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id, struct efx_filter_spec *spec) +{ + unsigned int filter_idx = efx_mcdi_filter_get_unsafe_id(filter_id); + const struct efx_filter_spec *saved_spec; + struct efx_mcdi_filter_table *table; + int rc; + + down_read(&efx->filter_sem); + table = efx->filter_state; + down_read(&table->lock); + saved_spec = efx_mcdi_filter_entry_spec(table, filter_idx); + if (saved_spec && saved_spec->priority == priority && + efx_mcdi_filter_pri(table, saved_spec) == + efx_mcdi_filter_get_unsafe_pri(filter_id)) { + *spec = *saved_spec; + rc = 0; + } else { + rc = -ENOENT; + } + up_read(&table->lock); + up_read(&efx->filter_sem); + return rc; +} + +static int efx_mcdi_filter_insert_addr_list(struct efx_nic *efx, + struct efx_mcdi_filter_vlan *vlan, + bool multicast, bool rollback) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct efx_mcdi_dev_addr *addr_list; + enum efx_filter_flags filter_flags; + struct efx_filter_spec spec; + u8 baddr[ETH_ALEN]; + unsigned int i, j; + int addr_count; + u16 *ids; + int rc; + + if (multicast) { + addr_list = table->dev_mc_list; + addr_count = table->dev_mc_count; + ids = vlan->mc; + } else { + addr_list = table->dev_uc_list; + addr_count = table->dev_uc_count; + ids = vlan->uc; + } + + filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; + + /* Insert/renew filters */ + for (i = 0; i < addr_count; i++) { + EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID); + efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); + efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr); + rc = efx_mcdi_filter_insert_locked(efx, &spec, true); + if (rc < 0) { + if (rollback) { + netif_info(efx, drv, efx->net_dev, + "efx_mcdi_filter_insert failed rc=%d\n", + rc); + /* Fall back to promiscuous */ + for (j = 0; j < i; j++) { + efx_mcdi_filter_remove_unsafe( + efx, EFX_FILTER_PRI_AUTO, + ids[j]); + ids[j] = EFX_EF10_FILTER_ID_INVALID; + } + return rc; + } else { + /* keep invalid ID, and carry on */ + } + } else { + ids[i] = efx_mcdi_filter_get_unsafe_id(rc); + } + } + + if (multicast && rollback) { + /* Also need an Ethernet broadcast filter */ + EFX_WARN_ON_PARANOID(vlan->default_filters[EFX_EF10_BCAST] != + EFX_EF10_FILTER_ID_INVALID); + efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); + eth_broadcast_addr(baddr); + efx_filter_set_eth_local(&spec, vlan->vid, baddr); + rc = efx_mcdi_filter_insert_locked(efx, &spec, true); + if (rc < 0) { + netif_warn(efx, drv, efx->net_dev, + "Broadcast filter insert failed rc=%d\n", rc); + /* Fall back to promiscuous */ + for (j = 0; j < i; j++) { + efx_mcdi_filter_remove_unsafe( + efx, EFX_FILTER_PRI_AUTO, + ids[j]); + ids[j] = EFX_EF10_FILTER_ID_INVALID; + } + return rc; + } else { + vlan->default_filters[EFX_EF10_BCAST] = + efx_mcdi_filter_get_unsafe_id(rc); + } + } + + return 0; +} + +static int efx_mcdi_filter_insert_def(struct efx_nic *efx, + struct efx_mcdi_filter_vlan *vlan, + enum efx_encap_type encap_type, + bool multicast, bool rollback) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + enum efx_filter_flags filter_flags; + struct efx_filter_spec spec; + u8 baddr[ETH_ALEN]; + int rc; + u16 *id; + + filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; + + efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); + + if (multicast) + efx_filter_set_mc_def(&spec); + else + efx_filter_set_uc_def(&spec); + + if (encap_type) { + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)) + efx_filter_set_encap_type(&spec, encap_type); + else + /* + * don't insert encap filters on non-supporting + * platforms. ID will be left as INVALID. + */ + return 0; + } + + if (vlan->vid != EFX_FILTER_VID_UNSPEC) + efx_filter_set_eth_local(&spec, vlan->vid, NULL); + + rc = efx_mcdi_filter_insert_locked(efx, &spec, true); + if (rc < 0) { + const char *um = multicast ? "Multicast" : "Unicast"; + const char *encap_name = ""; + const char *encap_ipv = ""; + + if ((encap_type & EFX_ENCAP_TYPES_MASK) == + EFX_ENCAP_TYPE_VXLAN) + encap_name = "VXLAN "; + else if ((encap_type & EFX_ENCAP_TYPES_MASK) == + EFX_ENCAP_TYPE_NVGRE) + encap_name = "NVGRE "; + else if ((encap_type & EFX_ENCAP_TYPES_MASK) == + EFX_ENCAP_TYPE_GENEVE) + encap_name = "GENEVE "; + if (encap_type & EFX_ENCAP_FLAG_IPV6) + encap_ipv = "IPv6 "; + else if (encap_type) + encap_ipv = "IPv4 "; + + /* + * unprivileged functions can't insert mismatch filters + * for encapsulated or unicast traffic, so downgrade + * those warnings to debug. + */ + netif_cond_dbg(efx, drv, efx->net_dev, + rc == -EPERM && (encap_type || !multicast), warn, + "%s%s%s mismatch filter insert failed rc=%d\n", + encap_name, encap_ipv, um, rc); + } else if (multicast) { + /* mapping from encap types to default filter IDs (multicast) */ + static enum efx_mcdi_filter_default_filters map[] = { + [EFX_ENCAP_TYPE_NONE] = EFX_EF10_MCDEF, + [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_MCDEF, + [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_MCDEF, + [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_MCDEF, + [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] = + EFX_EF10_VXLAN6_MCDEF, + [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] = + EFX_EF10_NVGRE6_MCDEF, + [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] = + EFX_EF10_GENEVE6_MCDEF, + }; + + /* quick bounds check (BCAST result impossible) */ + BUILD_BUG_ON(EFX_EF10_BCAST != 0); + if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) { + WARN_ON(1); + return -EINVAL; + } + /* then follow map */ + id = &vlan->default_filters[map[encap_type]]; + + EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID); + *id = efx_mcdi_filter_get_unsafe_id(rc); + if (!nic_data->workaround_26807 && !encap_type) { + /* Also need an Ethernet broadcast filter */ + efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, + filter_flags, 0); + eth_broadcast_addr(baddr); + efx_filter_set_eth_local(&spec, vlan->vid, baddr); + rc = efx_mcdi_filter_insert_locked(efx, &spec, true); + if (rc < 0) { + netif_warn(efx, drv, efx->net_dev, + "Broadcast filter insert failed rc=%d\n", + rc); + if (rollback) { + /* Roll back the mc_def filter */ + efx_mcdi_filter_remove_unsafe( + efx, EFX_FILTER_PRI_AUTO, + *id); + *id = EFX_EF10_FILTER_ID_INVALID; + return rc; + } + } else { + EFX_WARN_ON_PARANOID( + vlan->default_filters[EFX_EF10_BCAST] != + EFX_EF10_FILTER_ID_INVALID); + vlan->default_filters[EFX_EF10_BCAST] = + efx_mcdi_filter_get_unsafe_id(rc); + } + } + rc = 0; + } else { + /* mapping from encap types to default filter IDs (unicast) */ + static enum efx_mcdi_filter_default_filters map[] = { + [EFX_ENCAP_TYPE_NONE] = EFX_EF10_UCDEF, + [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_UCDEF, + [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_UCDEF, + [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_UCDEF, + [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] = + EFX_EF10_VXLAN6_UCDEF, + [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] = + EFX_EF10_NVGRE6_UCDEF, + [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] = + EFX_EF10_GENEVE6_UCDEF, + }; + + /* quick bounds check (BCAST result impossible) */ + BUILD_BUG_ON(EFX_EF10_BCAST != 0); + if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) { + WARN_ON(1); + return -EINVAL; + } + /* then follow map */ + id = &vlan->default_filters[map[encap_type]]; + EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID); + *id = rc; + rc = 0; + } + return rc; +} + +/* + * Caller must hold efx->filter_sem for read if race against + * efx_mcdi_filter_table_remove() is possible + */ +static void efx_mcdi_filter_vlan_sync_rx_mode(struct efx_nic *efx, + struct efx_mcdi_filter_vlan *vlan) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + /* + * Do not install unspecified VID if VLAN filtering is enabled. + * Do not install all specified VIDs if VLAN filtering is disabled. + */ + if ((vlan->vid == EFX_FILTER_VID_UNSPEC) == table->vlan_filter) + return; + + /* Insert/renew unicast filters */ + if (table->uc_promisc) { + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NONE, + false, false); + efx_mcdi_filter_insert_addr_list(efx, vlan, false, false); + } else { + /* + * If any of the filters failed to insert, fall back to + * promiscuous mode - add in the uc_def filter. But keep + * our individual unicast filters. + */ + if (efx_mcdi_filter_insert_addr_list(efx, vlan, false, false)) + efx_mcdi_filter_insert_def(efx, vlan, + EFX_ENCAP_TYPE_NONE, + false, false); + } + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN, + false, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN | + EFX_ENCAP_FLAG_IPV6, + false, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE, + false, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE | + EFX_ENCAP_FLAG_IPV6, + false, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE, + false, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE | + EFX_ENCAP_FLAG_IPV6, + false, false); + + /* + * Insert/renew multicast filters + * + * If changing promiscuous state with cascaded multicast filters, remove + * old filters first, so that packets are dropped rather than duplicated + */ + if (nic_data->workaround_26807 && + table->mc_promisc_last != table->mc_promisc) + efx_mcdi_filter_remove_old(efx); + if (table->mc_promisc) { + if (nic_data->workaround_26807) { + /* + * If we failed to insert promiscuous filters, rollback + * and fall back to individual multicast filters + */ + if (efx_mcdi_filter_insert_def(efx, vlan, + EFX_ENCAP_TYPE_NONE, + true, true)) { + /* Changing promisc state, so remove old filters */ + efx_mcdi_filter_remove_old(efx); + efx_mcdi_filter_insert_addr_list(efx, vlan, + true, false); + } + } else { + /* + * If we failed to insert promiscuous filters, don't + * rollback. Regardless, also insert the mc_list, + * unless it's incomplete due to overflow + */ + efx_mcdi_filter_insert_def(efx, vlan, + EFX_ENCAP_TYPE_NONE, + true, false); + if (!table->mc_overflow) + efx_mcdi_filter_insert_addr_list(efx, vlan, + true, false); + } + } else { + /* + * If any filters failed to insert, rollback and fall back to + * promiscuous mode - mc_def filter and maybe broadcast. If + * that fails, roll back again and insert as many of our + * individual multicast filters as we can. + */ + if (efx_mcdi_filter_insert_addr_list(efx, vlan, true, true)) { + /* Changing promisc state, so remove old filters */ + if (nic_data->workaround_26807) + efx_mcdi_filter_remove_old(efx); + if (efx_mcdi_filter_insert_def(efx, vlan, + EFX_ENCAP_TYPE_NONE, + true, true)) + efx_mcdi_filter_insert_addr_list(efx, vlan, + true, false); + } + } + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN, + true, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN | + EFX_ENCAP_FLAG_IPV6, + true, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE, + true, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE | + EFX_ENCAP_FLAG_IPV6, + true, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE, + true, false); + efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE | + EFX_ENCAP_FLAG_IPV6, + true, false); +} + +int efx_mcdi_filter_clear_rx(struct efx_nic *efx, + enum efx_filter_priority priority) +{ + struct efx_mcdi_filter_table *table; + unsigned int priority_mask; + unsigned int i; + int rc; + + priority_mask = (((1U << (priority + 1)) - 1) & + ~(1U << EFX_FILTER_PRI_AUTO)); + + down_read(&efx->filter_sem); + table = efx->filter_state; + down_write(&table->lock); + for (i = 0; i < EFX_MCDI_FILTER_TBL_ROWS; i++) { + rc = efx_mcdi_filter_remove_internal(efx, priority_mask, + i, true); + if (rc && rc != -ENOENT) + break; + rc = 0; + } + + up_write(&table->lock); + up_read(&efx->filter_sem); + return rc; +} + +u32 efx_mcdi_filter_count_rx_used(struct efx_nic *efx, + enum efx_filter_priority priority) +{ + struct efx_mcdi_filter_table *table; + unsigned int filter_idx; + s32 count = 0; + + down_read(&efx->filter_sem); + table = efx->filter_state; + down_read(&table->lock); + for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) { + if (table->entry[filter_idx].spec && + efx_mcdi_filter_entry_spec(table, filter_idx)->priority == + priority) + ++count; + } + up_read(&table->lock); + up_read(&efx->filter_sem); + return count; +} + +u32 efx_mcdi_filter_get_rx_id_limit(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + + return table->rx_match_count * EFX_MCDI_FILTER_TBL_ROWS * 2; +} + +s32 efx_mcdi_filter_get_rx_ids(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 *buf, u32 size) +{ + struct efx_mcdi_filter_table *table; + struct efx_filter_spec *spec; + unsigned int filter_idx; + s32 count = 0; + + down_read(&efx->filter_sem); + table = efx->filter_state; + down_read(&table->lock); + + for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) { + spec = efx_mcdi_filter_entry_spec(table, filter_idx); + if (spec && spec->priority == priority) { + if (count == size) { + count = -EMSGSIZE; + break; + } + buf[count++] = + efx_mcdi_filter_make_filter_id( + efx_mcdi_filter_pri(table, spec), + filter_idx); + } + } + up_read(&table->lock); + up_read(&efx->filter_sem); + return count; +} + +static int efx_mcdi_filter_match_flags_from_mcdi(bool encap, u32 mcdi_flags) +{ + int match_flags = 0; + +#define MAP_FLAG(gen_flag, mcdi_field) do { \ + u32 old_mcdi_flags = mcdi_flags; \ + mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ ## \ + mcdi_field ## _LBN); \ + if (mcdi_flags != old_mcdi_flags) \ + match_flags |= EFX_FILTER_MATCH_ ## gen_flag; \ + } while (0) + + if (encap) { + /* encap filters must specify encap type */ + match_flags |= EFX_FILTER_MATCH_ENCAP_TYPE; + /* and imply ethertype and ip proto */ + mcdi_flags &= + ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN); + mcdi_flags &= + ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN); + /* VLAN tags refer to the outer packet */ + MAP_FLAG(INNER_VID, INNER_VLAN); + MAP_FLAG(OUTER_VID, OUTER_VLAN); + /* everything else refers to the inner packet */ + MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_UCAST_DST); + MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_MCAST_DST); + MAP_FLAG(REM_HOST, IFRM_SRC_IP); + MAP_FLAG(LOC_HOST, IFRM_DST_IP); + MAP_FLAG(REM_MAC, IFRM_SRC_MAC); + MAP_FLAG(REM_PORT, IFRM_SRC_PORT); + MAP_FLAG(LOC_MAC, IFRM_DST_MAC); + MAP_FLAG(LOC_PORT, IFRM_DST_PORT); + MAP_FLAG(ETHER_TYPE, IFRM_ETHER_TYPE); + MAP_FLAG(IP_PROTO, IFRM_IP_PROTO); + } else { + MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST); + MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST); + MAP_FLAG(REM_HOST, SRC_IP); + MAP_FLAG(LOC_HOST, DST_IP); + MAP_FLAG(REM_MAC, SRC_MAC); + MAP_FLAG(REM_PORT, SRC_PORT); + MAP_FLAG(LOC_MAC, DST_MAC); + MAP_FLAG(LOC_PORT, DST_PORT); + MAP_FLAG(ETHER_TYPE, ETHER_TYPE); + MAP_FLAG(INNER_VID, INNER_VLAN); + MAP_FLAG(OUTER_VID, OUTER_VLAN); + MAP_FLAG(IP_PROTO, IP_PROTO); + } +#undef MAP_FLAG + + /* Did we map them all? */ + if (mcdi_flags) + return -EINVAL; + + return match_flags; +} + +bool efx_mcdi_filter_match_supported(struct efx_mcdi_filter_table *table, + bool encap, + enum efx_filter_match_flags match_flags) +{ + unsigned int match_pri; + int mf; + + for (match_pri = 0; + match_pri < table->rx_match_count; + match_pri++) { + mf = efx_mcdi_filter_match_flags_from_mcdi(encap, + table->rx_match_mcdi_flags[match_pri]); + if (mf == match_flags) + return true; + } + + return false; +} + +static int +efx_mcdi_filter_table_probe_matches(struct efx_nic *efx, + struct efx_mcdi_filter_table *table, + bool encap) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX); + unsigned int pd_match_pri, pd_match_count; + size_t outlen; + int rc; + + /* Find out which RX filter types are supported, and their priorities */ + MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP, + encap ? + MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES : + MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO, + inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), + &outlen); + if (rc) + return rc; + + pd_match_count = MCDI_VAR_ARRAY_LEN( + outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES); + + for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) { + u32 mcdi_flags = + MCDI_ARRAY_DWORD( + outbuf, + GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES, + pd_match_pri); + rc = efx_mcdi_filter_match_flags_from_mcdi(encap, mcdi_flags); + if (rc < 0) { + netif_dbg(efx, probe, efx->net_dev, + "%s: fw flags %#x pri %u not supported in driver\n", + __func__, mcdi_flags, pd_match_pri); + } else { + netif_dbg(efx, probe, efx->net_dev, + "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n", + __func__, mcdi_flags, pd_match_pri, + rc, table->rx_match_count); + table->rx_match_mcdi_flags[table->rx_match_count] = mcdi_flags; + table->rx_match_count++; + } + } + + return 0; +} + +int efx_mcdi_filter_table_probe(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct net_device *net_dev = efx->net_dev; + struct efx_mcdi_filter_table *table; + struct efx_mcdi_filter_vlan *vlan; + int rc; + + if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) + return -EINVAL; + + if (efx->filter_state) /* already probed */ + return 0; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->rx_match_count = 0; + rc = efx_mcdi_filter_table_probe_matches(efx, table, false); + if (rc) + goto fail; + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)) + rc = efx_mcdi_filter_table_probe_matches(efx, table, true); + if (rc) + goto fail; + if ((efx_supported_features(efx) & NETIF_F_HW_VLAN_CTAG_FILTER) && + !(efx_mcdi_filter_match_supported(table, false, + (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC)) && + efx_mcdi_filter_match_supported(table, false, + (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC_IG)))) { + netif_info(efx, probe, net_dev, + "VLAN filters are not supported in this firmware variant\n"); + net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + net_dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + } + + table->entry = vzalloc(array_size(EFX_MCDI_FILTER_TBL_ROWS, + sizeof(*table->entry))); + if (!table->entry) { + rc = -ENOMEM; + goto fail; + } + + table->mc_promisc_last = false; + table->vlan_filter = + !!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER); + INIT_LIST_HEAD(&table->vlan_list); + init_rwsem(&table->lock); + + efx->filter_state = table; + + list_for_each_entry(vlan, &nic_data->vlan_list, list) { + rc = efx_mcdi_filter_add_vlan(efx, vlan->vid); + if (rc) + goto fail_add_vlan; + } + + return 0; + +fail_add_vlan: + efx_mcdi_filter_cleanup_vlans(efx); + efx->filter_state = NULL; +fail: + kfree(table); + return rc; +} + +/* + * Caller must hold efx->filter_sem for read if race against + * efx_mcdi_filter_table_remove() is possible + */ +void efx_mcdi_filter_table_restore(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + unsigned int invalid_filters = 0, failed = 0; + struct efx_mcdi_filter_vlan *vlan; + struct efx_filter_spec *spec; + struct efx_rss_context *ctx; + unsigned int filter_idx; + u32 mcdi_flags; + int match_pri; + int rc, i; + + WARN_ON(!rwsem_is_locked(&efx->filter_sem)); + + if (!nic_data->must_restore_filters) + return; + + if (!table) + return; + + down_write(&table->lock); + mutex_lock(&efx->rss_lock); + + for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) { + spec = efx_mcdi_filter_entry_spec(table, filter_idx); + if (!spec) + continue; + + mcdi_flags = efx_mcdi_filter_mcdi_flags_from_spec(spec); + match_pri = 0; + while (match_pri < table->rx_match_count && + table->rx_match_mcdi_flags[match_pri] != mcdi_flags) + ++match_pri; + if (match_pri >= table->rx_match_count) { + invalid_filters++; + goto not_restored; + } + if (spec->rss_context) + ctx = efx_find_rss_context_entry(efx, spec->rss_context); + else + ctx = &efx->rss_context; + if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { + if (!ctx) { + netif_warn(efx, drv, efx->net_dev, + "Warning: unable to restore a filter with nonexistent RSS context %u.\n", + spec->rss_context); + invalid_filters++; + goto not_restored; + } + if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) { + netif_warn(efx, drv, efx->net_dev, + "Warning: unable to restore a filter with RSS context %u as it was not created.\n", + spec->rss_context); + invalid_filters++; + goto not_restored; + } + } + + rc = efx_mcdi_filter_push(efx, spec, + &table->entry[filter_idx].handle, + ctx, false); + if (rc) + failed++; + + if (rc) { +not_restored: + list_for_each_entry(vlan, &table->vlan_list, list) + for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; ++i) + if (vlan->default_filters[i] == filter_idx) + vlan->default_filters[i] = + EFX_EF10_FILTER_ID_INVALID; + + kfree(spec); + efx_mcdi_filter_set_entry(table, filter_idx, NULL, 0); + } + } + + mutex_unlock(&efx->rss_lock); + up_write(&table->lock); + + /* + * This can happen validly if the MC's capabilities have changed, so + * is not an error. + */ + if (invalid_filters) + netif_dbg(efx, drv, efx->net_dev, + "Did not restore %u filters that are now unsupported.\n", + invalid_filters); + + if (failed) + netif_err(efx, hw, efx->net_dev, + "unable to restore %u filters\n", failed); + else + nic_data->must_restore_filters = false; +} + +void efx_mcdi_filter_table_remove(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); + struct efx_filter_spec *spec; + unsigned int filter_idx; + int rc; + + efx_mcdi_filter_cleanup_vlans(efx); + efx->filter_state = NULL; + /* + * If we were called without locking, then it's not safe to free + * the table as others might be using it. So we just WARN, leak + * the memory, and potentially get an inconsistent filter table + * state. + * This should never actually happen. + */ + if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) + return; + + if (!table) + return; + + for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) { + spec = efx_mcdi_filter_entry_spec(table, filter_idx); + if (!spec) + continue; + + MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, + efx_mcdi_filter_is_exclusive(spec) ? + MC_CMD_FILTER_OP_IN_OP_REMOVE : + MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); + MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, + table->entry[filter_idx].handle); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, + sizeof(inbuf), NULL, 0, NULL); + if (rc) + netif_info(efx, drv, efx->net_dev, + "%s: filter %04x remove failed\n", + __func__, filter_idx); + kfree(spec); + } + + vfree(table->entry); + kfree(table); +} + +static void efx_mcdi_filter_mark_one_old(struct efx_nic *efx, uint16_t *id) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + unsigned int filter_idx; + + efx_rwsem_assert_write_locked(&table->lock); + + if (*id != EFX_EF10_FILTER_ID_INVALID) { + filter_idx = efx_mcdi_filter_get_unsafe_id(*id); + if (!table->entry[filter_idx].spec) + netif_dbg(efx, drv, efx->net_dev, + "marked null spec old %04x:%04x\n", *id, + filter_idx); + table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD; + *id = EFX_EF10_FILTER_ID_INVALID; + } +} + +/* Mark old per-VLAN filters that may need to be removed */ +static void _efx_mcdi_filter_vlan_mark_old(struct efx_nic *efx, + struct efx_mcdi_filter_vlan *vlan) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + unsigned int i; + + for (i = 0; i < table->dev_uc_count; i++) + efx_mcdi_filter_mark_one_old(efx, &vlan->uc[i]); + for (i = 0; i < table->dev_mc_count; i++) + efx_mcdi_filter_mark_one_old(efx, &vlan->mc[i]); + for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++) + efx_mcdi_filter_mark_one_old(efx, &vlan->default_filters[i]); +} + +/* + * Mark old filters that may need to be removed. + * Caller must hold efx->filter_sem for read if race against + * efx_mcdi_filter_table_remove() is possible + */ +static void efx_mcdi_filter_mark_old(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct efx_mcdi_filter_vlan *vlan; + + down_write(&table->lock); + list_for_each_entry(vlan, &table->vlan_list, list) + _efx_mcdi_filter_vlan_mark_old(efx, vlan); + up_write(&table->lock); +} + +int efx_mcdi_filter_add_vlan(struct efx_nic *efx, u16 vid) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct efx_mcdi_filter_vlan *vlan; + unsigned int i; + + if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) + return -EINVAL; + + vlan = efx_mcdi_filter_find_vlan(efx, vid); + if (WARN_ON(vlan)) { + netif_err(efx, drv, efx->net_dev, + "VLAN %u already added\n", vid); + return -EALREADY; + } + + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return -ENOMEM; + + vlan->vid = vid; + + for (i = 0; i < ARRAY_SIZE(vlan->uc); i++) + vlan->uc[i] = EFX_EF10_FILTER_ID_INVALID; + for (i = 0; i < ARRAY_SIZE(vlan->mc); i++) + vlan->mc[i] = EFX_EF10_FILTER_ID_INVALID; + for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++) + vlan->default_filters[i] = EFX_EF10_FILTER_ID_INVALID; + + list_add_tail(&vlan->list, &table->vlan_list); + + if (efx_dev_registered(efx)) + efx_mcdi_filter_vlan_sync_rx_mode(efx, vlan); + + return 0; +} + +static void efx_mcdi_filter_del_vlan_internal(struct efx_nic *efx, + struct efx_mcdi_filter_vlan *vlan) +{ + unsigned int i; + + /* See comment in efx_mcdi_filter_table_remove() */ + if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) + return; + + list_del(&vlan->list); + + for (i = 0; i < ARRAY_SIZE(vlan->uc); i++) + efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, + vlan->uc[i]); + for (i = 0; i < ARRAY_SIZE(vlan->mc); i++) + efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, + vlan->mc[i]); + for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++) + if (vlan->default_filters[i] != EFX_EF10_FILTER_ID_INVALID) + efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO, + vlan->default_filters[i]); + + kfree(vlan); +} + +void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid) +{ + struct efx_mcdi_filter_vlan *vlan; + + /* See comment in efx_mcdi_filter_table_remove() */ + if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) + return; + + vlan = efx_mcdi_filter_find_vlan(efx, vid); + if (!vlan) { + netif_err(efx, drv, efx->net_dev, + "VLAN %u not found in filter state\n", vid); + return; + } + + efx_mcdi_filter_del_vlan_internal(efx, vlan); +} + +struct efx_mcdi_filter_vlan *efx_mcdi_filter_find_vlan(struct efx_nic *efx, + u16 vid) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct efx_mcdi_filter_vlan *vlan; + + WARN_ON(!rwsem_is_locked(&efx->filter_sem)); + + list_for_each_entry(vlan, &table->vlan_list, list) { + if (vlan->vid == vid) + return vlan; + } + + return NULL; +} + +void efx_mcdi_filter_cleanup_vlans(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct efx_mcdi_filter_vlan *vlan, *next_vlan; + + /* See comment in efx_mcdi_filter_table_remove() */ + if (!efx_rwsem_assert_write_locked(&efx->filter_sem)) + return; + + if (!table) + return; + + list_for_each_entry_safe(vlan, next_vlan, &table->vlan_list, list) + efx_mcdi_filter_del_vlan_internal(efx, vlan); +} + +static void efx_mcdi_filter_uc_addr_list(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct net_device *net_dev = efx->net_dev; + struct netdev_hw_addr *uc; + unsigned int i; + + table->uc_promisc = !!(net_dev->flags & IFF_PROMISC); + ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr); + i = 1; + netdev_for_each_uc_addr(uc, net_dev) { + if (i >= EFX_EF10_FILTER_DEV_UC_MAX) { + table->uc_promisc = true; + break; + } + ether_addr_copy(table->dev_uc_list[i].addr, uc->addr); + i++; + } + + table->dev_uc_count = i; +} + +static void efx_mcdi_filter_mc_addr_list(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct net_device *net_dev = efx->net_dev; + struct netdev_hw_addr *mc; + unsigned int i; + + table->mc_overflow = false; + table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI)); + + i = 0; + netdev_for_each_mc_addr(mc, net_dev) { + if (i >= EFX_EF10_FILTER_DEV_MC_MAX) { + table->mc_promisc = true; + table->mc_overflow = true; + break; + } + ether_addr_copy(table->dev_mc_list[i].addr, mc->addr); + i++; + } + + table->dev_mc_count = i; +} + +/* + * Caller must hold efx->filter_sem for read if race against + * efx_mcdi_filter_table_remove() is possible + */ +void efx_mcdi_filter_sync_rx_mode(struct efx_nic *efx) +{ + struct efx_mcdi_filter_table *table = efx->filter_state; + struct net_device *net_dev = efx->net_dev; + struct efx_mcdi_filter_vlan *vlan; + bool vlan_filter; + + if (!efx_dev_registered(efx)) + return; + + if (!table) + return; + + efx_mcdi_filter_mark_old(efx); + + /* + * Copy/convert the address lists; add the primary station + * address and broadcast address + */ + netif_addr_lock_bh(net_dev); + efx_mcdi_filter_uc_addr_list(efx); + efx_mcdi_filter_mc_addr_list(efx); + netif_addr_unlock_bh(net_dev); + + /* + * If VLAN filtering changes, all old filters are finally removed. + * Do it in advance to avoid conflicts for unicast untagged and + * VLAN 0 tagged filters. + */ + vlan_filter = !!(net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER); + if (table->vlan_filter != vlan_filter) { + table->vlan_filter = vlan_filter; + efx_mcdi_filter_remove_old(efx); + } + + list_for_each_entry(vlan, &table->vlan_list, list) + efx_mcdi_filter_vlan_sync_rx_mode(efx, vlan); + + efx_mcdi_filter_remove_old(efx); + table->mc_promisc_last = table->mc_promisc; +} + +#ifdef CONFIG_RFS_ACCEL + +bool efx_mcdi_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, + unsigned int filter_idx) +{ + struct efx_filter_spec *spec, saved_spec; + struct efx_mcdi_filter_table *table; + struct efx_arfs_rule *rule = NULL; + bool ret = true, force = false; + u16 arfs_id; + + down_read(&efx->filter_sem); + table = efx->filter_state; + down_write(&table->lock); + spec = efx_mcdi_filter_entry_spec(table, filter_idx); + + if (!spec || spec->priority != EFX_FILTER_PRI_HINT) + goto out_unlock; + + spin_lock_bh(&efx->rps_hash_lock); + if (!efx->rps_hash_table) { + /* In the absence of the table, we always return 0 to ARFS. */ + arfs_id = 0; + } else { + rule = efx_rps_hash_find(efx, spec); + if (!rule) + /* ARFS table doesn't know of this filter, so remove it */ + goto expire; + arfs_id = rule->arfs_id; + ret = efx_rps_check_rule(rule, filter_idx, &force); + if (force) + goto expire; + if (!ret) { + spin_unlock_bh(&efx->rps_hash_lock); + goto out_unlock; + } + } + if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id)) + ret = false; + else if (rule) + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; +expire: + saved_spec = *spec; /* remove operation will kfree spec */ + spin_unlock_bh(&efx->rps_hash_lock); + /* + * At this point (since we dropped the lock), another thread might queue + * up a fresh insertion request (but the actual insertion will be held + * up by our possession of the filter table lock). In that case, it + * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that + * the rule is not removed by efx_rps_hash_del() below. + */ + if (ret) + ret = efx_mcdi_filter_remove_internal(efx, 1U << spec->priority, + filter_idx, true) == 0; + /* + * While we can't safely dereference rule (we dropped the lock), we can + * still test it for NULL. + */ + if (ret && rule) { + /* Expiring, so remove entry from ARFS table */ + spin_lock_bh(&efx->rps_hash_lock); + efx_rps_hash_del(efx, &saved_spec); + spin_unlock_bh(&efx->rps_hash_lock); + } +out_unlock: + up_write(&table->lock); + up_read(&efx->filter_sem); + return ret; +} + +#endif /* CONFIG_RFS_ACCEL */ + +#define RSS_MODE_HASH_ADDRS (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\ + 1 << RSS_MODE_HASH_DST_ADDR_LBN) +#define RSS_MODE_HASH_PORTS (1 << RSS_MODE_HASH_SRC_PORT_LBN |\ + 1 << RSS_MODE_HASH_DST_PORT_LBN) +#define RSS_CONTEXT_FLAGS_DEFAULT (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\ + 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\ + (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\ + (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\ + RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN) + +int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags) +{ + /* + * Firmware had a bug (sfc bug 61952) where it would not actually + * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS. + * This meant that it would always contain whatever was previously + * in the MCDI buffer. Fortunately, all firmware versions with + * this bug have the same default flags value for a newly-allocated + * RSS context, and the only time we want to get the flags is just + * after allocating. Moreover, the response has a 32-bit hole + * where the context ID would be in the request, so we can use an + * overlength buffer in the request and pre-fill the flags field + * with what we believe the default to be. Thus if the firmware + * has the bug, it will leave our pre-filled value in the flags + * field of the response, and we will get the right answer. + * + * However, this does mean that this function should NOT be used if + * the RSS context flags might not be their defaults - it is ONLY + * reliably correct for a newly-allocated RSS context. + */ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN); + size_t outlen; + int rc; + + /* Check we have a hole for the context ID */ + BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS, + RSS_CONTEXT_FLAGS_DEFAULT); + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + if (rc == 0) { + if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN) + rc = -EIO; + else + *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS); + } + return rc; +} + +/* + * Attempt to enable 4-tuple UDP hashing on the specified RSS context. + * If we fail, we just leave the RSS context at its default hash settings, + * which is safe but may slightly reduce performance. + * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we + * just need to set the UDP ports flags (for both IP versions). + */ +void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, + struct efx_rss_context *ctx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN); + u32 flags; + + BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0); + + if (efx_mcdi_get_rss_context_flags(efx, ctx->context_id, &flags) != 0) + return; + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, + ctx->context_id); + flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN; + flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN; + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags); + if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf), + NULL, 0, NULL)) + /* Succeeded, so UDP 4-tuple is now enabled */ + ctx->rx_hash_udp_4tuple = true; +} + +static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive, + struct efx_rss_context *ctx, + unsigned *context_size) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + u32 alloc_type = exclusive ? + MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE : + MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED; + unsigned rss_spread = exclusive ? + efx->rss_spread : + min(rounddown_pow_of_two(efx->rss_spread), + EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE); + + if (!exclusive && rss_spread == 1) { + ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + if (context_size) + *context_size = 1; + return 0; + } + + if (nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_RSS_LIMITED_LBN) + return -EOPNOTSUPP; + + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID, + nic_data->vport_id); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type); + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread); + + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc != 0) + return rc; + + if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN) + return -EIO; + + ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID); + + if (context_size) + *context_size = rss_spread; + + if (nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN) + efx_mcdi_set_rss_context_flags(efx, ctx); + + return 0; +} + +static int efx_mcdi_filter_free_rss_context(struct efx_nic *efx, u32 context) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN); + + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID, + context); + return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_mcdi_filter_populate_rss_table(struct efx_nic *efx, u32 context, + const u32 *rx_indir_table, const u8 *key) +{ + MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN); + MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN); + int i, rc; + + MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID, + context); + BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) != + MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN); + + /* This iterates over the length of efx->rss_context.rx_indir_table, but + * copies bytes from rx_indir_table. That's because the latter is a + * pointer rather than an array, but should have the same length. + * The efx->rss_context.rx_hash_key loop below is similar. + */ + for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i) + MCDI_PTR(tablebuf, + RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] = + (u8) rx_indir_table[i]; + + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf, + sizeof(tablebuf), NULL, 0, NULL); + if (rc != 0) + return rc; + + MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID, + context); + BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) != + MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN); + for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i) + MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i]; + + return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf, + sizeof(keybuf), NULL, 0, NULL); +} + +void efx_mcdi_rx_free_indir_table(struct efx_nic *efx) +{ + int rc; + + if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) { + rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id); + WARN_ON(rc != 0); + } + efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; +} + +static int efx_mcdi_filter_rx_push_shared_rss_config(struct efx_nic *efx, + unsigned *context_size) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc = efx_mcdi_filter_alloc_rss_context(efx, false, &efx->rss_context, + context_size); + + if (rc != 0) + return rc; + + nic_data->rx_rss_context_exclusive = false; + efx_set_default_rx_indir_table(efx, &efx->rss_context); + return 0; +} + +static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx, + const u32 *rx_indir_table, + const u8 *key) +{ + u32 old_rx_rss_context = efx->rss_context.context_id; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID || + !nic_data->rx_rss_context_exclusive) { + rc = efx_mcdi_filter_alloc_rss_context(efx, true, &efx->rss_context, + NULL); + if (rc == -EOPNOTSUPP) + return rc; + else if (rc != 0) + goto fail1; + } + + rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.context_id, + rx_indir_table, key); + if (rc != 0) + goto fail2; + + if (efx->rss_context.context_id != old_rx_rss_context && + old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID) + WARN_ON(efx_mcdi_filter_free_rss_context(efx, old_rx_rss_context) != 0); + nic_data->rx_rss_context_exclusive = true; + if (rx_indir_table != efx->rss_context.rx_indir_table) + memcpy(efx->rss_context.rx_indir_table, rx_indir_table, + sizeof(efx->rss_context.rx_indir_table)); + if (key != efx->rss_context.rx_hash_key) + memcpy(efx->rss_context.rx_hash_key, key, + efx->type->rx_hash_key_size); + + return 0; + +fail2: + if (old_rx_rss_context != efx->rss_context.context_id) { + WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id) != 0); + efx->rss_context.context_id = old_rx_rss_context; + } +fail1: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx, + struct efx_rss_context *ctx, + const u32 *rx_indir_table, + const u8 *key) +{ + int rc; + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) { + rc = efx_mcdi_filter_alloc_rss_context(efx, true, ctx, NULL); + if (rc) + return rc; + } + + if (!rx_indir_table) /* Delete this context */ + return efx_mcdi_filter_free_rss_context(efx, ctx->context_id); + + rc = efx_mcdi_filter_populate_rss_table(efx, ctx->context_id, + rx_indir_table, key); + if (rc) + return rc; + + memcpy(ctx->rx_indir_table, rx_indir_table, + sizeof(efx->rss_context.rx_indir_table)); + memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size); + + return 0; +} + +int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, + struct efx_rss_context *ctx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN); + MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN); + MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN); + size_t outlen; + int rc, i; + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN != + MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN); + + if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) + return -ENOENT; + + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID, + ctx->context_id); + BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) != + MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN); + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf), + tablebuf, sizeof(tablebuf), &outlen); + if (rc != 0) + return rc; + + if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN)) + return -EIO; + + for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) + ctx->rx_indir_table[i] = MCDI_PTR(tablebuf, + RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i]; + + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID, + ctx->context_id); + BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) != + MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN); + rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf), + keybuf, sizeof(keybuf), &outlen); + if (rc != 0) + return rc; + + if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN)) + return -EIO; + + for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i) + ctx->rx_hash_key[i] = MCDI_PTR( + keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i]; + + return 0; +} + +int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx) +{ + int rc; + + mutex_lock(&efx->rss_lock); + rc = efx_mcdi_rx_pull_rss_context_config(efx, &efx->rss_context); + mutex_unlock(&efx->rss_lock); + return rc; +} + +void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_rss_context *ctx; + int rc; + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + if (!nic_data->must_restore_rss_contexts) + return; + + list_for_each_entry(ctx, &efx->rss_context.list, list) { + /* previous NIC RSS context is gone */ + ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + /* so try to allocate a new one */ + rc = efx_mcdi_rx_push_rss_context_config(efx, ctx, + ctx->rx_indir_table, + ctx->rx_hash_key); + if (rc) + netif_warn(efx, probe, efx->net_dev, + "failed to restore RSS context %u, rc=%d" + "; RSS filters may fail to be applied\n", + ctx->user_id, rc); + } + nic_data->must_restore_rss_contexts = false; +} + +int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table, + const u8 *key) +{ + int rc; + + if (efx->rss_spread == 1) + return 0; + + if (!key) + key = efx->rss_context.rx_hash_key; + + rc = efx_mcdi_filter_rx_push_exclusive_rss_config(efx, rx_indir_table, key); + + if (rc == -ENOBUFS && !user) { + unsigned context_size; + bool mismatch = false; + size_t i; + + for (i = 0; + i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch; + i++) + mismatch = rx_indir_table[i] != + ethtool_rxfh_indir_default(i, efx->rss_spread); + + rc = efx_mcdi_filter_rx_push_shared_rss_config(efx, &context_size); + if (rc == 0) { + if (context_size != efx->rss_spread) + netif_warn(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one of" + " different size." + " Wanted %u, got %u.\n", + efx->rss_spread, context_size); + else if (mismatch) + netif_warn(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one but" + " could not apply custom" + " indirection.\n"); + else + netif_info(efx, probe, efx->net_dev, + "Could not allocate an exclusive RSS" + " context; allocated a shared one.\n"); + } + } + return rc; +} + +int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table + __attribute__ ((unused)), + const u8 *key + __attribute__ ((unused))) +{ + if (user) + return -EOPNOTSUPP; + if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) + return 0; + return efx_mcdi_filter_rx_push_shared_rss_config(efx, NULL); +} diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h new file mode 100644 index 000000000000..1837f4f5d661 --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_filters.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#ifndef EFX_MCDI_FILTERS_H +#define EFX_MCDI_FILTERS_H + +#include "net_driver.h" +#include "filter.h" +#include "mcdi_pcol.h" + +#define EFX_EF10_FILTER_DEV_UC_MAX 32 +#define EFX_EF10_FILTER_DEV_MC_MAX 256 + +enum efx_mcdi_filter_default_filters { + EFX_EF10_BCAST, + EFX_EF10_UCDEF, + EFX_EF10_MCDEF, + EFX_EF10_VXLAN4_UCDEF, + EFX_EF10_VXLAN4_MCDEF, + EFX_EF10_VXLAN6_UCDEF, + EFX_EF10_VXLAN6_MCDEF, + EFX_EF10_NVGRE4_UCDEF, + EFX_EF10_NVGRE4_MCDEF, + EFX_EF10_NVGRE6_UCDEF, + EFX_EF10_NVGRE6_MCDEF, + EFX_EF10_GENEVE4_UCDEF, + EFX_EF10_GENEVE4_MCDEF, + EFX_EF10_GENEVE6_UCDEF, + EFX_EF10_GENEVE6_MCDEF, + + EFX_EF10_NUM_DEFAULT_FILTERS +}; + +/* Per-VLAN filters information */ +struct efx_mcdi_filter_vlan { + struct list_head list; + u16 vid; + u16 uc[EFX_EF10_FILTER_DEV_UC_MAX]; + u16 mc[EFX_EF10_FILTER_DEV_MC_MAX]; + u16 default_filters[EFX_EF10_NUM_DEFAULT_FILTERS]; +}; + +struct efx_mcdi_dev_addr { + u8 addr[ETH_ALEN]; +}; + +struct efx_mcdi_filter_table { +/* The MCDI match masks supported by this fw & hw, in order of priority */ + u32 rx_match_mcdi_flags[ + MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2]; + unsigned int rx_match_count; + + struct rw_semaphore lock; /* Protects entries */ + struct { + unsigned long spec; /* pointer to spec plus flag bits */ +/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */ +/* unused flag 1UL */ +#define EFX_EF10_FILTER_FLAG_AUTO_OLD 2UL +#define EFX_EF10_FILTER_FLAGS 3UL + u64 handle; /* firmware handle */ + } *entry; +/* Shadow of net_device address lists, guarded by mac_lock */ + struct efx_mcdi_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX]; + struct efx_mcdi_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX]; + int dev_uc_count; + int dev_mc_count; + bool uc_promisc; + bool mc_promisc; +/* Whether in multicast promiscuous mode when last changed */ + bool mc_promisc_last; + bool mc_overflow; /* Too many MC addrs; should always imply mc_promisc */ + bool vlan_filter; + struct list_head vlan_list; +}; + +int efx_mcdi_filter_table_probe(struct efx_nic *efx); +void efx_mcdi_filter_table_remove(struct efx_nic *efx); +void efx_mcdi_filter_table_restore(struct efx_nic *efx); + +/* + * The filter table(s) are managed by firmware and we have write-only + * access. When removing filters we must identify them to the + * firmware by a 64-bit handle, but this is too wide for Linux kernel + * interfaces (32-bit for RX NFC, 16-bit for RFS). Also, we need to + * be able to tell in advance whether a requested insertion will + * replace an existing filter. Therefore we maintain a software hash + * table, which should be at least as large as the hardware hash + * table. + * + * Huntington has a single 8K filter table shared between all filter + * types and both ports. + */ +#define EFX_MCDI_FILTER_TBL_ROWS 8192 + +bool efx_mcdi_filter_match_supported(struct efx_mcdi_filter_table *table, + bool encap, + enum efx_filter_match_flags match_flags); + +void efx_mcdi_filter_sync_rx_mode(struct efx_nic *efx); +s32 efx_mcdi_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec, + bool replace_equal); +int efx_mcdi_filter_remove_safe(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id); +int efx_mcdi_filter_get_safe(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 filter_id, struct efx_filter_spec *spec); + +u32 efx_mcdi_filter_count_rx_used(struct efx_nic *efx, + enum efx_filter_priority priority); +int efx_mcdi_filter_clear_rx(struct efx_nic *efx, + enum efx_filter_priority priority); +u32 efx_mcdi_filter_get_rx_id_limit(struct efx_nic *efx); +s32 efx_mcdi_filter_get_rx_ids(struct efx_nic *efx, + enum efx_filter_priority priority, + u32 *buf, u32 size); + +void efx_mcdi_filter_cleanup_vlans(struct efx_nic *efx); +int efx_mcdi_filter_add_vlan(struct efx_nic *efx, u16 vid); +struct efx_mcdi_filter_vlan *efx_mcdi_filter_find_vlan(struct efx_nic *efx, u16 vid); +void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid); + +void efx_mcdi_rx_free_indir_table(struct efx_nic *efx); +int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx, + struct efx_rss_context *ctx, + const u32 *rx_indir_table, + const u8 *key); +int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table, + const u8 *key); +int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user, + const u32 *rx_indir_table + __attribute__ ((unused)), + const u8 *key + __attribute__ ((unused))); +int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx); +int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, + struct efx_rss_context *ctx); +int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, + u32 *flags); +void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, + struct efx_rss_context *ctx); +void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx); + +static inline void efx_mcdi_update_rx_scatter(struct efx_nic *efx) +{ + /* no need to do anything here */ +} + +bool efx_mcdi_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, + unsigned int filter_idx); + +#endif diff --git a/drivers/net/ethernet/sfc/mcdi_functions.c b/drivers/net/ethernet/sfc/mcdi_functions.c new file mode 100644 index 000000000000..dcfe78b0fa5a --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_functions.c @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2019 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include "efx.h" +#include "nic.h" +#include "mcdi_functions.h" +#include "mcdi.h" +#include "mcdi_pcol.h" + +int efx_mcdi_free_vis(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF_ERR(outbuf); + size_t outlen; + int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + + /* -EALREADY means nothing to free, so ignore */ + if (rc == -EALREADY) + rc = 0; + if (rc) + efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen, + rc); + return rc; +} + +int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis, + unsigned int max_vis, unsigned int *vi_base, + unsigned int *allocated_vis) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN); + MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis); + MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis); + rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc != 0) + return rc; + + if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN) + return -EIO; + + netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n", + MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE)); + + if (vi_base) + *vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE); + if (allocated_vis) + *allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT); + return 0; +} + +int efx_mcdi_ev_probe(struct efx_channel *channel) +{ + return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf, + (channel->eventq_mask + 1) * + sizeof(efx_qword_t), + GFP_KERNEL); +} + +int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2) +{ + MCDI_DECLARE_BUF(inbuf, + MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / + EFX_BUF_SIZE)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN); + size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE; + struct efx_nic *efx = channel->efx; + size_t inlen, outlen; + dma_addr_t dma_addr; + int rc, i; + + /* Fill event queue with all ones (i.e. empty events) */ + memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len); + + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel); + /* INIT_EVQ expects index in vector table, not absolute */ + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE, + MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE, + MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS); + MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0); + + if (v2) { + /* Use the new generic approach to specifying event queue + * configuration, requesting lower latency or higher throughput. + * The options that actually get used appear in the output. + */ + MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS, + INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_V2_IN_FLAG_TYPE, + MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO); + } else { + MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, + INIT_EVQ_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_IN_FLAG_RX_MERGE, 1, + INIT_EVQ_IN_FLAG_TX_MERGE, 1, + INIT_EVQ_IN_FLAG_CUT_THRU, v1_cut_thru); + } + + dma_addr = channel->eventq.buf.dma_addr; + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_EVQ_IN_LEN(entries); + + rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen, + outbuf, sizeof(outbuf), &outlen); + + if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN) + netif_dbg(efx, drv, efx->net_dev, + "Channel %d using event queue flags %08x\n", + channel->channel, + MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS)); + + return rc; +} + +void efx_mcdi_ev_remove(struct efx_channel *channel) +{ + efx_nic_free_buffer(channel->efx, &channel->eventq.buf); +} + +void efx_mcdi_ev_fini(struct efx_channel *channel) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + struct efx_nic *efx = channel->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN, + outbuf, outlen, rc); +} + +int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / + EFX_BUF_SIZE)); + bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD; + size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; + struct efx_channel *channel = tx_queue->channel; + struct efx_nic *efx = tx_queue->efx; + struct efx_ef10_nic_data *nic_data; + dma_addr_t dma_addr; + size_t inlen; + int rc, i; + + BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0); + + nic_data = efx->nic_data; + + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id); + + dma_addr = tx_queue->txd.buf.dma_addr; + + netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n", + tx_queue->queue, entries, (u64)dma_addr); + + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); + + do { + MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS, + /* This flag was removed from mcdi_pcol.h for + * the non-_EXT version of INIT_TXQ. However, + * firmware still honours it. + */ + INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, + INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, + INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload, + INIT_TXQ_EXT_IN_FLAG_TIMESTAMP, + tx_queue->timestamping); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen, + NULL, 0, NULL); + if (rc == -ENOSPC && tso_v2) { + /* Retry without TSOv2 if we're short on contexts. */ + tso_v2 = false; + netif_warn(efx, probe, efx->net_dev, + "TSOv2 context not available to segment in " + "hardware. TCP performance may be reduced.\n" + ); + } else if (rc) { + efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ, + MC_CMD_INIT_TXQ_EXT_IN_LEN, + NULL, 0, rc); + goto fail; + } + } while (rc); + + return 0; + +fail: + return rc; +} + +void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue) +{ + efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf); +} + +void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + struct efx_nic *efx = tx_queue->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE, + tx_queue->queue); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN, + outbuf, outlen, rc); +} + +int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue) +{ + return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf, + (rx_queue->ptr_mask + 1) * + sizeof(efx_qword_t), + GFP_KERNEL); +} + +void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue) +{ + MCDI_DECLARE_BUF(inbuf, + MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / + EFX_BUF_SIZE)); + struct efx_channel *channel = efx_rx_queue_channel(rx_queue); + size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE; + struct efx_nic *efx = rx_queue->efx; + struct efx_ef10_nic_data *nic_data = efx->nic_data; + dma_addr_t dma_addr; + size_t inlen; + int rc; + int i; + BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0); + + rx_queue->scatter_n = 0; + rx_queue->scatter_len = 0; + + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue)); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE, + efx_rx_queue_index(rx_queue)); + MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS, + INIT_RXQ_IN_FLAG_PREFIX, 1, + INIT_RXQ_IN_FLAG_TIMESTAMP, 1); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0); + MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id); + + dma_addr = rx_queue->rxd.buf.dma_addr; + + netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n", + efx_rx_queue_index(rx_queue), entries, (u64)dma_addr); + + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_RXQ_IN_LEN(entries); + + rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen, + NULL, 0, NULL); + if (rc) + netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n", + efx_rx_queue_index(rx_queue)); +} + +void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue) +{ + efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf); +} + +void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN); + MCDI_DECLARE_BUF_ERR(outbuf); + struct efx_nic *efx = rx_queue->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE, + efx_rx_queue_index(rx_queue)); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN, + outbuf, outlen, rc); +} + +int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode) +{ + switch (vi_window_mode) { + case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K: + efx->vi_stride = 8192; + break; + case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K: + efx->vi_stride = 16384; + break; + case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K: + efx->vi_stride = 65536; + break; + default: + netif_err(efx, probe, efx->net_dev, + "Unrecognised VI window mode %d\n", + vi_window_mode); + return -EIO; + } + netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n", + efx->vi_stride); + return 0; +} + +int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf, + sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + + *pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF); + return 0; +} diff --git a/drivers/net/ethernet/sfc/mcdi_functions.h b/drivers/net/ethernet/sfc/mcdi_functions.h new file mode 100644 index 000000000000..ca4a5ac1a66b --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_functions.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#ifndef EFX_MCDI_FUNCTIONS_H +#define EFX_MCDI_FUNCTIONS_H + +int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis, + unsigned int max_vis, unsigned int *vi_base, + unsigned int *allocated_vis); +int efx_mcdi_free_vis(struct efx_nic *efx); + +int efx_mcdi_ev_probe(struct efx_channel *channel); +int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2); +void efx_mcdi_ev_remove(struct efx_channel *channel); +void efx_mcdi_ev_fini(struct efx_channel *channel); +int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2); +void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue); +void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue); +int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue); +void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue); +void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue); +void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue); +int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode); +int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index); + +#endif diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index fb7cde4980ed..ab5227b13ae6 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -14,106 +14,7 @@ #include "mcdi_pcol.h" #include "nic.h" #include "selftest.h" - -struct efx_mcdi_phy_data { - u32 flags; - u32 type; - u32 supported_cap; - u32 channel; - u32 port; - u32 stats_mask; - u8 name[20]; - u32 media; - u32 mmd_mask; - u8 revision[20]; - u32 forced_cap; -}; - -static int -efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN); - size_t outlen; - int rc; - - BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0); - BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name)); - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - if (rc) - goto fail; - - if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) { - rc = -EIO; - goto fail; - } - - cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS); - cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE); - cfg->supported_cap = - MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP); - cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL); - cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT); - cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK); - memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME), - sizeof(cfg->name)); - cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE); - cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK); - memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION), - sizeof(cfg->revision)); - - return 0; - -fail: - netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); - return rc; -} - -static int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities, - u32 flags, u32 loopback_mode, - u32 loopback_speed) -{ - MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN); - int rc; - - BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0); - - MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities); - MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags); - MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode); - MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed); - - rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf), - NULL, 0, NULL); - return rc; -} - -static int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN); - size_t outlen; - int rc; - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - if (rc) - goto fail; - - if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST + - MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) { - rc = -EIO; - goto fail; - } - - *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED); - - return 0; - -fail: - netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); - return rc; -} +#include "mcdi_port_common.h" static int efx_mcdi_mdio_read(struct net_device *net_dev, int prtad, int devad, u16 addr) @@ -168,246 +69,6 @@ static int efx_mcdi_mdio_write(struct net_device *net_dev, return 0; } -static void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset) -{ - #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ - linkset) - - bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS); - switch (media) { - case MC_CMD_MEDIA_KX4: - SET_BIT(Backplane); - if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) - SET_BIT(1000baseKX_Full); - if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) - SET_BIT(10000baseKX4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) - SET_BIT(40000baseKR4_Full); - break; - - case MC_CMD_MEDIA_XFP: - case MC_CMD_MEDIA_SFP_PLUS: - case MC_CMD_MEDIA_QSFP_PLUS: - SET_BIT(FIBRE); - if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) - SET_BIT(1000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) - SET_BIT(10000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) - SET_BIT(40000baseCR4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) - SET_BIT(100000baseCR4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) - SET_BIT(25000baseCR_Full); - if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN)) - SET_BIT(50000baseCR2_Full); - break; - - case MC_CMD_MEDIA_BASE_T: - SET_BIT(TP); - if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN)) - SET_BIT(10baseT_Half); - if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN)) - SET_BIT(10baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN)) - SET_BIT(100baseT_Half); - if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN)) - SET_BIT(100baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN)) - SET_BIT(1000baseT_Half); - if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) - SET_BIT(1000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) - SET_BIT(10000baseT_Full); - break; - } - - if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) - SET_BIT(Pause); - if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) - SET_BIT(Asym_Pause); - if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) - SET_BIT(Autoneg); - - #undef SET_BIT -} - -static u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset) -{ - u32 result = 0; - - #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ - linkset) - - if (TEST_BIT(10baseT_Half)) - result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN); - if (TEST_BIT(10baseT_Full)) - result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN); - if (TEST_BIT(100baseT_Half)) - result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN); - if (TEST_BIT(100baseT_Full)) - result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN); - if (TEST_BIT(1000baseT_Half)) - result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN); - if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full)) - result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN); - if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full)) - result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN); - if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full)) - result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN); - if (TEST_BIT(100000baseCR4_Full)) - result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN); - if (TEST_BIT(25000baseCR_Full)) - result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN); - if (TEST_BIT(50000baseCR2_Full)) - result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN); - if (TEST_BIT(Pause)) - result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN); - if (TEST_BIT(Asym_Pause)) - result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN); - if (TEST_BIT(Autoneg)) - result |= (1 << MC_CMD_PHY_CAP_AN_LBN); - - #undef TEST_BIT - - return result; -} - -static u32 efx_get_mcdi_phy_flags(struct efx_nic *efx) -{ - struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; - enum efx_phy_mode mode, supported; - u32 flags; - - /* TODO: Advertise the capabilities supported by this PHY */ - supported = 0; - if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN)) - supported |= PHY_MODE_TX_DISABLED; - if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN)) - supported |= PHY_MODE_LOW_POWER; - if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN)) - supported |= PHY_MODE_OFF; - - mode = efx->phy_mode & supported; - - flags = 0; - if (mode & PHY_MODE_TX_DISABLED) - flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN); - if (mode & PHY_MODE_LOW_POWER) - flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN); - if (mode & PHY_MODE_OFF) - flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN); - - return flags; -} - -static u8 mcdi_to_ethtool_media(u32 media) -{ - switch (media) { - case MC_CMD_MEDIA_XAUI: - case MC_CMD_MEDIA_CX4: - case MC_CMD_MEDIA_KX4: - return PORT_OTHER; - - case MC_CMD_MEDIA_XFP: - case MC_CMD_MEDIA_SFP_PLUS: - case MC_CMD_MEDIA_QSFP_PLUS: - return PORT_FIBRE; - - case MC_CMD_MEDIA_BASE_T: - return PORT_TP; - - default: - return PORT_OTHER; - } -} - -static void efx_mcdi_phy_decode_link(struct efx_nic *efx, - struct efx_link_state *link_state, - u32 speed, u32 flags, u32 fcntl) -{ - switch (fcntl) { - case MC_CMD_FCNTL_AUTO: - WARN_ON(1); /* This is not a link mode */ - link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX; - break; - case MC_CMD_FCNTL_BIDIR: - link_state->fc = EFX_FC_TX | EFX_FC_RX; - break; - case MC_CMD_FCNTL_RESPOND: - link_state->fc = EFX_FC_RX; - break; - default: - WARN_ON(1); - /* Fall through */ - case MC_CMD_FCNTL_OFF: - link_state->fc = 0; - break; - } - - link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN)); - link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN)); - link_state->speed = speed; -} - -/* The semantics of the ethtool FEC mode bitmask are not well defined, - * particularly the meaning of combinations of bits. Which means we get to - * define our own semantics, as follows: - * OFF overrides any other bits, and means "disable all FEC" (with the - * exception of 25G KR4/CR4, where it is not possible to reject it if AN - * partner requests it). - * AUTO on its own means use cable requirements and link partner autoneg with - * fw-default preferences for the cable type. - * AUTO and either RS or BASER means use the specified FEC type if cable and - * link partner support it, otherwise autoneg/fw-default. - * RS or BASER alone means use the specified FEC type if cable and link partner - * support it and either requests it, otherwise no FEC. - * Both RS and BASER (whether AUTO or not) means use FEC if cable and link - * partner support it, preferring RS to BASER. - */ -static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap) -{ - u32 ret = 0; - - if (ethtool_cap & ETHTOOL_FEC_OFF) - return 0; - - if (ethtool_cap & ETHTOOL_FEC_AUTO) - ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_RS_FEC_LBN); - if (ethtool_cap & ETHTOOL_FEC_RS) - ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN); - if (ethtool_cap & ETHTOOL_FEC_BASER) - ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | - (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) | - (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN); - return ret; -} - -/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function - * can never produce, (baser xor rs) and neither req; the implementation below - * maps both of those to AUTO. This should never matter, and it's not clear - * what a better mapping would be anyway. - */ -static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g) -{ - bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN), - rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN), - baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) - : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN), - baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN) - : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN); - - if (!baser && !rs) - return ETHTOOL_FEC_OFF; - return (rs_req ? ETHTOOL_FEC_RS : 0) | - (baser_req ? ETHTOOL_FEC_BASER : 0) | - (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO); -} - static int efx_mcdi_phy_probe(struct efx_nic *efx) { struct efx_mcdi_phy_data *phy_data; @@ -527,58 +188,6 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx) efx->loopback_mode, 0); } -/* Verify that the forced flow control settings (!EFX_FC_AUTO) are - * supported by the link partner. Warn the user if this isn't the case - */ -static void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa) -{ - struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; - u32 rmtadv; - - /* The link partner capabilities are only relevant if the - * link supports flow control autonegotiation */ - if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) - return; - - /* If flow control autoneg is supported and enabled, then fine */ - if (efx->wanted_fc & EFX_FC_AUTO) - return; - - rmtadv = 0; - if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) - rmtadv |= ADVERTISED_Pause; - if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) - rmtadv |= ADVERTISED_Asym_Pause; - - if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause) - netif_err(efx, link, efx->net_dev, - "warning: link partner doesn't support pause frames"); -} - -static bool efx_mcdi_phy_poll(struct efx_nic *efx) -{ - struct efx_link_state old_state = efx->link_state; - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); - int rc; - - WARN_ON(!mutex_is_locked(&efx->mac_lock)); - - BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, - outbuf, sizeof(outbuf), NULL); - if (rc) - efx->link_state.up = false; - else - efx_mcdi_phy_decode_link( - efx, &efx->link_state, - MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED), - MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS), - MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL)); - - return !efx_link_state_equal(&efx->link_state, &old_state); -} - static void efx_mcdi_phy_remove(struct efx_nic *efx) { struct efx_mcdi_phy_data *phy_data = efx->phy_data; @@ -666,58 +275,6 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx, return 0; } -static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, - struct ethtool_fecparam *fec) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN); - u32 caps, active, speed; /* MCDI format */ - bool is_25g = false; - size_t outlen; - int rc; - - BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); - rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - if (rc) - return rc; - if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN) - return -EOPNOTSUPP; - - /* behaviour for 25G/50G links depends on 25G BASER bit */ - speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED); - is_25g = speed == 25000 || speed == 50000; - - caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP); - fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g); - /* BASER is never supported on 100G */ - if (speed == 100000) - fec->fec &= ~ETHTOOL_FEC_BASER; - - active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE); - switch (active) { - case MC_CMD_FEC_NONE: - fec->active_fec = ETHTOOL_FEC_OFF; - break; - case MC_CMD_FEC_BASER: - fec->active_fec = ETHTOOL_FEC_BASER; - break; - case MC_CMD_FEC_RS: - fec->active_fec = ETHTOOL_FEC_RS; - break; - default: - netif_warn(efx, hw, efx->net_dev, - "Firmware reports unrecognised FEC_TYPE %u\n", - active); - /* We don't know what firmware has picked. AUTO is as good a - * "can't happen" value as any other. - */ - fec->active_fec = ETHTOOL_FEC_AUTO; - break; - } - - return 0; -} - static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, const struct ethtool_fecparam *fec) { @@ -745,27 +302,6 @@ static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx, return 0; } -static int efx_mcdi_phy_test_alive(struct efx_nic *efx) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN); - size_t outlen; - int rc; - - BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0); - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0, - outbuf, sizeof(outbuf), &outlen); - if (rc) - return rc; - - if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN) - return -EIO; - if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK) - return -EINVAL; - - return 0; -} - static const char *const mcdi_sft9001_cable_diag_names[] = { "cable.pairA.length", "cable.pairB.length", @@ -1139,84 +675,6 @@ u32 efx_mcdi_phy_get_caps(struct efx_nic *efx) return phy_data->supported_cap; } -static unsigned int efx_mcdi_event_link_speed[] = { - [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100, - [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000, - [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000, - [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000, - [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000, - [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000, - [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000, -}; - -void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev) -{ - u32 flags, fcntl, speed, lpa; - - speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED); - EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed)); - speed = efx_mcdi_event_link_speed[speed]; - - flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS); - fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL); - lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP); - - /* efx->link_state is only modified by efx_mcdi_phy_get_link(), - * which is only run after flushing the event queues. Therefore, it - * is safe to modify the link state outside of the mac_lock here. - */ - efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl); - - efx_mcdi_phy_check_fcntl(efx, lpa); - - efx_link_status_changed(efx); -} - -int efx_mcdi_set_mac(struct efx_nic *efx) -{ - u32 fcntl; - MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN); - - BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0); - - /* This has no effect on EF10 */ - ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR), - efx->net_dev->dev_addr); - - MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU, - EFX_MAX_FRAME_LEN(efx->net_dev->mtu)); - MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0); - - /* Set simple MAC filter for Siena */ - MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT, - SET_MAC_IN_REJECT_UNCST, efx->unicast_filter); - - MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS, - SET_MAC_IN_FLAG_INCLUDE_FCS, - !!(efx->net_dev->features & NETIF_F_RXFCS)); - - switch (efx->wanted_fc) { - case EFX_FC_RX | EFX_FC_TX: - fcntl = MC_CMD_FCNTL_BIDIR; - break; - case EFX_FC_RX: - fcntl = MC_CMD_FCNTL_RESPOND; - break; - default: - fcntl = MC_CMD_FCNTL_OFF; - break; - } - if (efx->wanted_fc & EFX_FC_AUTO) - fcntl = MC_CMD_FCNTL_AUTO; - if (efx->fc_disable) - fcntl = MC_CMD_FCNTL_OFF; - - MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl); - - return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes), - NULL, 0, NULL); -} - bool efx_mcdi_mac_check_fault(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); @@ -1348,17 +806,3 @@ void efx_mcdi_port_remove(struct efx_nic *efx) efx->phy_op->remove(efx); efx_nic_free_buffer(efx, &efx->stats_buffer); } - -/* Get physical port number (EF10 only; on Siena it is same as PF number) */ -int efx_mcdi_port_get_number(struct efx_nic *efx) -{ - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN); - int rc; - - rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0, - outbuf, sizeof(outbuf), NULL); - if (rc) - return rc; - - return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT); -} diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c new file mode 100644 index 000000000000..a6a072ba46d3 --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_port_common.c @@ -0,0 +1,568 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "mcdi_port_common.h" +#include "efx_common.h" + +int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0); + BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name)); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) { + rc = -EIO; + goto fail; + } + + cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS); + cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE); + cfg->supported_cap = + MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP); + cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL); + cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT); + cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK); + memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME), + sizeof(cfg->name)); + cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE); + cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK); + memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION), + sizeof(cfg->revision)); + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +void efx_link_set_advertising(struct efx_nic *efx, + const unsigned long *advertising) +{ + memcpy(efx->link_advertising, advertising, + sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); + + efx->link_advertising[0] |= ADVERTISED_Autoneg; + if (advertising[0] & ADVERTISED_Pause) + efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); + else + efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); + if (advertising[0] & ADVERTISED_Asym_Pause) + efx->wanted_fc ^= EFX_FC_TX; +} + +int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities, + u32 flags, u32 loopback_mode, u32 loopback_speed) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN); + int rc; + + BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0); + + MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode); + MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed); + + rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf), + NULL, 0, NULL); + return rc; +} + +int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN); + size_t outlen; + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST + + MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) { + rc = -EIO; + goto fail; + } + + *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED); + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); + return rc; +} + +void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset) +{ + #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ + linkset) + + bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS); + switch (media) { + case MC_CMD_MEDIA_KX4: + SET_BIT(Backplane); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + SET_BIT(1000baseKX_Full); + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + SET_BIT(10000baseKX4_Full); + if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + SET_BIT(40000baseKR4_Full); + break; + + case MC_CMD_MEDIA_XFP: + case MC_CMD_MEDIA_SFP_PLUS: + case MC_CMD_MEDIA_QSFP_PLUS: + SET_BIT(FIBRE); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + SET_BIT(1000baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + SET_BIT(10000baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + SET_BIT(40000baseCR4_Full); + if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) + SET_BIT(100000baseCR4_Full); + if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) + SET_BIT(25000baseCR_Full); + if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN)) + SET_BIT(50000baseCR2_Full); + break; + + case MC_CMD_MEDIA_BASE_T: + SET_BIT(TP); + if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN)) + SET_BIT(10baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN)) + SET_BIT(10baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN)) + SET_BIT(100baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN)) + SET_BIT(100baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN)) + SET_BIT(1000baseT_Half); + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + SET_BIT(1000baseT_Full); + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) + SET_BIT(10000baseT_Full); + break; + } + + if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) + SET_BIT(Pause); + if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) + SET_BIT(Asym_Pause); + if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) + SET_BIT(Autoneg); + + #undef SET_BIT +} + +u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset) +{ + u32 result = 0; + + #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \ + linkset) + + if (TEST_BIT(10baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN); + if (TEST_BIT(10baseT_Full)) + result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN); + if (TEST_BIT(100baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN); + if (TEST_BIT(100baseT_Full)) + result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN); + if (TEST_BIT(1000baseT_Half)) + result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN); + if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full)) + result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN); + if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full)) + result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN); + if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full)) + result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN); + if (TEST_BIT(100000baseCR4_Full)) + result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN); + if (TEST_BIT(25000baseCR_Full)) + result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN); + if (TEST_BIT(50000baseCR2_Full)) + result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN); + if (TEST_BIT(Pause)) + result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN); + if (TEST_BIT(Asym_Pause)) + result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN); + if (TEST_BIT(Autoneg)) + result |= (1 << MC_CMD_PHY_CAP_AN_LBN); + + #undef TEST_BIT + + return result; +} + +u32 efx_get_mcdi_phy_flags(struct efx_nic *efx) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + enum efx_phy_mode mode, supported; + u32 flags; + + /* TODO: Advertise the capabilities supported by this PHY */ + supported = 0; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN)) + supported |= PHY_MODE_TX_DISABLED; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN)) + supported |= PHY_MODE_LOW_POWER; + if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN)) + supported |= PHY_MODE_OFF; + + mode = efx->phy_mode & supported; + + flags = 0; + if (mode & PHY_MODE_TX_DISABLED) + flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN); + if (mode & PHY_MODE_LOW_POWER) + flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN); + if (mode & PHY_MODE_OFF) + flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN); + + return flags; +} + +u8 mcdi_to_ethtool_media(u32 media) +{ + switch (media) { + case MC_CMD_MEDIA_XAUI: + case MC_CMD_MEDIA_CX4: + case MC_CMD_MEDIA_KX4: + return PORT_OTHER; + + case MC_CMD_MEDIA_XFP: + case MC_CMD_MEDIA_SFP_PLUS: + case MC_CMD_MEDIA_QSFP_PLUS: + return PORT_FIBRE; + + case MC_CMD_MEDIA_BASE_T: + return PORT_TP; + + default: + return PORT_OTHER; + } +} + +void efx_mcdi_phy_decode_link(struct efx_nic *efx, + struct efx_link_state *link_state, + u32 speed, u32 flags, u32 fcntl) +{ + switch (fcntl) { + case MC_CMD_FCNTL_AUTO: + WARN_ON(1); /* This is not a link mode */ + link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX; + break; + case MC_CMD_FCNTL_BIDIR: + link_state->fc = EFX_FC_TX | EFX_FC_RX; + break; + case MC_CMD_FCNTL_RESPOND: + link_state->fc = EFX_FC_RX; + break; + default: + WARN_ON(1); + /* Fall through */ + case MC_CMD_FCNTL_OFF: + link_state->fc = 0; + break; + } + + link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN)); + link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN)); + link_state->speed = speed; +} + +/* The semantics of the ethtool FEC mode bitmask are not well defined, + * particularly the meaning of combinations of bits. Which means we get to + * define our own semantics, as follows: + * OFF overrides any other bits, and means "disable all FEC" (with the + * exception of 25G KR4/CR4, where it is not possible to reject it if AN + * partner requests it). + * AUTO on its own means use cable requirements and link partner autoneg with + * fw-default preferences for the cable type. + * AUTO and either RS or BASER means use the specified FEC type if cable and + * link partner support it, otherwise autoneg/fw-default. + * RS or BASER alone means use the specified FEC type if cable and link partner + * support it and either requests it, otherwise no FEC. + * Both RS and BASER (whether AUTO or not) means use FEC if cable and link + * partner support it, preferring RS to BASER. + */ +u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap) +{ + u32 ret = 0; + + if (ethtool_cap & ETHTOOL_FEC_OFF) + return 0; + + if (ethtool_cap & ETHTOOL_FEC_AUTO) + ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_RS_FEC_LBN); + if (ethtool_cap & ETHTOOL_FEC_RS) + ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN); + if (ethtool_cap & ETHTOOL_FEC_BASER) + ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) | + (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) | + (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN); + return ret; +} + +/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function + * can never produce, (baser xor rs) and neither req; the implementation below + * maps both of those to AUTO. This should never matter, and it's not clear + * what a better mapping would be anyway. + */ +u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g) +{ + bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN), + rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN), + baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) + : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN), + baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN) + : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN); + + if (!baser && !rs) + return ETHTOOL_FEC_OFF; + return (rs_req ? ETHTOOL_FEC_RS : 0) | + (baser_req ? ETHTOOL_FEC_BASER : 0) | + (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO); +} + +/* Verify that the forced flow control settings (!EFX_FC_AUTO) are + * supported by the link partner. Warn the user if this isn't the case + */ +void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa) +{ + struct efx_mcdi_phy_data *phy_cfg = efx->phy_data; + u32 rmtadv; + + /* The link partner capabilities are only relevant if the + * link supports flow control autonegotiation + */ + if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN)) + return; + + /* If flow control autoneg is supported and enabled, then fine */ + if (efx->wanted_fc & EFX_FC_AUTO) + return; + + rmtadv = 0; + if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN)) + rmtadv |= ADVERTISED_Pause; + if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN)) + rmtadv |= ADVERTISED_Asym_Pause; + + if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause) + netif_err(efx, link, efx->net_dev, + "warning: link partner doesn't support pause frames"); +} + +bool efx_mcdi_phy_poll(struct efx_nic *efx) +{ + struct efx_link_state old_state = efx->link_state; + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN); + int rc; + + WARN_ON(!mutex_is_locked(&efx->mac_lock)); + + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + efx->link_state.up = false; + else + efx_mcdi_phy_decode_link( + efx, &efx->link_state, + MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED), + MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS), + MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL)); + + return !efx_link_state_equal(&efx->link_state, &old_state); +} + +int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN); + u32 caps, active, speed; /* MCDI format */ + bool is_25g = false; + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0); + rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN) + return -EOPNOTSUPP; + + /* behaviour for 25G/50G links depends on 25G BASER bit */ + speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED); + is_25g = speed == 25000 || speed == 50000; + + caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP); + fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g); + /* BASER is never supported on 100G */ + if (speed == 100000) + fec->fec &= ~ETHTOOL_FEC_BASER; + + active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE); + switch (active) { + case MC_CMD_FEC_NONE: + fec->active_fec = ETHTOOL_FEC_OFF; + break; + case MC_CMD_FEC_BASER: + fec->active_fec = ETHTOOL_FEC_BASER; + break; + case MC_CMD_FEC_RS: + fec->active_fec = ETHTOOL_FEC_RS; + break; + default: + netif_warn(efx, hw, efx->net_dev, + "Firmware reports unrecognised FEC_TYPE %u\n", + active); + /* We don't know what firmware has picked. AUTO is as good a + * "can't happen" value as any other. + */ + fec->active_fec = ETHTOOL_FEC_AUTO; + break; + } + + return 0; +} + +int efx_mcdi_phy_test_alive(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN) + return -EIO; + if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK) + return -EINVAL; + + return 0; +} + +int efx_mcdi_set_mac(struct efx_nic *efx) +{ + u32 fcntl; + MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN); + + BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0); + + /* This has no effect on EF10 */ + ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR), + efx->net_dev->dev_addr); + + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU, + EFX_MAX_FRAME_LEN(efx->net_dev->mtu)); + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0); + + /* Set simple MAC filter for Siena */ + MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT, + SET_MAC_IN_REJECT_UNCST, efx->unicast_filter); + + MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS, + SET_MAC_IN_FLAG_INCLUDE_FCS, + !!(efx->net_dev->features & NETIF_F_RXFCS)); + + switch (efx->wanted_fc) { + case EFX_FC_RX | EFX_FC_TX: + fcntl = MC_CMD_FCNTL_BIDIR; + break; + case EFX_FC_RX: + fcntl = MC_CMD_FCNTL_RESPOND; + break; + default: + fcntl = MC_CMD_FCNTL_OFF; + break; + } + if (efx->wanted_fc & EFX_FC_AUTO) + fcntl = MC_CMD_FCNTL_AUTO; + if (efx->fc_disable) + fcntl = MC_CMD_FCNTL_OFF; + + MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl); + + return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes), + NULL, 0, NULL); +} + +/* Get physical port number (EF10 only; on Siena it is same as PF number) */ +int efx_mcdi_port_get_number(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN); + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + return rc; + + return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT); +} + +static unsigned int efx_mcdi_event_link_speed[] = { + [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100, + [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000, + [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000, + [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000, + [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000, + [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000, + [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000, +}; + +void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev) +{ + u32 flags, fcntl, speed, lpa; + + speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED); + EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed)); + speed = efx_mcdi_event_link_speed[speed]; + + flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS); + fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL); + lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP); + + /* efx->link_state is only modified by efx_mcdi_phy_get_link(), + * which is only run after flushing the event queues. Therefore, it + * is safe to modify the link state outside of the mac_lock here. + */ + efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl); + + efx_mcdi_phy_check_fcntl(efx, lpa); + + efx_link_status_changed(efx); +} diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.h b/drivers/net/ethernet/sfc/mcdi_port_common.h new file mode 100644 index 000000000000..b16f11265269 --- /dev/null +++ b/drivers/net/ethernet/sfc/mcdi_port_common.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ +#ifndef EFX_MCDI_PORT_COMMON_H +#define EFX_MCDI_PORT_COMMON_H + +#include "net_driver.h" +#include "mcdi.h" +#include "mcdi_pcol.h" + +struct efx_mcdi_phy_data { + u32 flags; + u32 type; + u32 supported_cap; + u32 channel; + u32 port; + u32 stats_mask; + u8 name[20]; + u32 media; + u32 mmd_mask; + u8 revision[20]; + u32 forced_cap; +}; + +#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1)) + +int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg); +void efx_link_set_advertising(struct efx_nic *efx, + const unsigned long *advertising); +int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities, + u32 flags, u32 loopback_mode, u32 loopback_speed); +int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes); +void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset); +u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset); +u32 efx_get_mcdi_phy_flags(struct efx_nic *efx); +u8 mcdi_to_ethtool_media(u32 media); +void efx_mcdi_phy_decode_link(struct efx_nic *efx, + struct efx_link_state *link_state, + u32 speed, u32 flags, u32 fcntl); +u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap); +u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g); +void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa); +bool efx_mcdi_phy_poll(struct efx_nic *efx); +int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, + struct ethtool_fecparam *fec); +int efx_mcdi_phy_test_alive(struct efx_nic *efx); +int efx_mcdi_set_mac(struct efx_nic *efx); +int efx_mcdi_port_get_number(struct efx_nic *efx); +void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev); + +#endif diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index dfd5182d9e47..9f9886f222c8 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -24,7 +24,6 @@ #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/vmalloc.h> -#include <linux/i2c.h> #include <linux/mtd/mtd.h> #include <net/busy_poll.h> #include <net/xdp.h> @@ -139,6 +138,8 @@ struct efx_special_buffer { * freed when descriptor completes * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data * member is the associated buffer to drop a page reference on. + * @option: When @flags & %EFX_TX_BUF_OPTION, an EF10-specific option + * descriptor. * @dma_addr: DMA address of the fragment. * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. @@ -153,7 +154,7 @@ struct efx_tx_buffer { struct xdp_frame *xdpf; }; union { - efx_qword_t option; + efx_qword_t option; /* EF10 */ dma_addr_t dma_addr; }; unsigned short flags; @@ -743,13 +744,13 @@ union efx_multicast_hash { struct vfdi_status; /* The reserved RSS context value */ -#define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff +#define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff /** * struct efx_rss_context - A user-defined RSS context for filtering * @list: node of linked list on which this struct is stored * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or - * %EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC. - * For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID. + * %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC. + * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID. * @user_id: the rss_context ID exposed to userspace over ethtool. * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled * @rx_hash_key: Toeplitz hash key for this RSS context @@ -1611,6 +1612,15 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue, return &rx_queue->buffer[index]; } +static inline struct efx_rx_buffer * +efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) +{ + if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) + return efx_rx_buffer(rx_queue, 0); + else + return rx_buf + 1; +} + /** * EFX_MAX_FRAME_LEN - calculate maximum frame length * diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 1f7c5717de75..6670fda8f35a 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -9,9 +9,9 @@ #define EFX_NIC_H #include <linux/net_tstamp.h> -#include <linux/i2c-algo-bit.h> #include "net_driver.h" #include "efx.h" +#include "efx_common.h" #include "mcdi.h" enum { @@ -506,6 +506,9 @@ static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) tx_queue->efx->type->tx_write(tx_queue); } +int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + bool *data_mapped); + /* RX data path */ static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue) { @@ -554,6 +557,7 @@ static inline void efx_nic_eventq_read_ack(struct efx_channel *channel) { channel->efx->type->ev_read_ack(channel); } + void efx_nic_event_test_start(struct efx_channel *channel); /* Falcon/Siena queue operations */ @@ -671,6 +675,7 @@ struct efx_farch_register_test { unsigned address; efx_oword_t mask; }; + int efx_farch_test_registers(struct efx_nic *efx, const struct efx_farch_register_test *regs, size_t n_regs); diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index c29bf862a94c..a2042f16babc 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -21,6 +21,7 @@ #include <linux/bpf_trace.h> #include "net_driver.h" #include "efx.h" +#include "rx_common.h" #include "filter.h" #include "nic.h" #include "selftest.h" @@ -32,60 +33,13 @@ /* Maximum rx prefix used by any architecture. */ #define EFX_MAX_RX_PREFIX_SIZE 16 -/* Number of RX buffers to recycle pages for. When creating the RX page recycle - * ring, this number is divided by the number of buffers per page to calculate - * the number of pages to store in the RX page recycle ring. - */ -#define EFX_RECYCLE_RING_SIZE_IOMMU 4096 -#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) - /* Size of buffer allocated for skb header area. */ #define EFX_SKB_HEADERS 128u -/* This is the percentage fill level below which new RX descriptors - * will be added to the RX descriptor ring. - */ -static unsigned int rx_refill_threshold; - /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ EFX_RX_USR_BUF_SIZE) -/* - * RX maximum head room required. - * - * This must be at least 1 to prevent overflow, plus one packet-worth - * to allow pipelined receives. - */ -#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) - -static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) -{ - return page_address(buf->page) + buf->page_offset; -} - -static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh) -{ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) - return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset)); -#else - const u8 *data = eh + efx->rx_packet_hash_offset; - return (u32)data[0] | - (u32)data[1] << 8 | - (u32)data[2] << 16 | - (u32)data[3] << 24; -#endif -} - -static inline struct efx_rx_buffer * -efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf) -{ - if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask))) - return efx_rx_buffer(rx_queue, 0); - else - return rx_buf + 1; -} - static inline void efx_sync_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf, unsigned int len) @@ -94,301 +48,6 @@ static inline void efx_sync_rx_buffer(struct efx_nic *efx, DMA_FROM_DEVICE); } -void efx_rx_config_page_split(struct efx_nic *efx) -{ - efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + - XDP_PACKET_HEADROOM, - EFX_RX_BUF_ALIGNMENT); - efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : - ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / - efx->rx_page_buf_step); - efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / - efx->rx_bufs_per_page; - efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, - efx->rx_bufs_per_page); -} - -/* Check the RX page recycle ring for a page that can be reused. */ -static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) -{ - struct efx_nic *efx = rx_queue->efx; - struct page *page; - struct efx_rx_page_state *state; - unsigned index; - - index = rx_queue->page_remove & rx_queue->page_ptr_mask; - page = rx_queue->page_ring[index]; - if (page == NULL) - return NULL; - - rx_queue->page_ring[index] = NULL; - /* page_remove cannot exceed page_add. */ - if (rx_queue->page_remove != rx_queue->page_add) - ++rx_queue->page_remove; - - /* If page_count is 1 then we hold the only reference to this page. */ - if (page_count(page) == 1) { - ++rx_queue->page_recycle_count; - return page; - } else { - state = page_address(page); - dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, - PAGE_SIZE << efx->rx_buffer_order, - DMA_FROM_DEVICE); - put_page(page); - ++rx_queue->page_recycle_failed; - } - - return NULL; -} - -/** - * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers - * - * @rx_queue: Efx RX queue - * - * This allocates a batch of pages, maps them for DMA, and populates - * struct efx_rx_buffers for each one. Return a negative error code or - * 0 on success. If a single page can be used for multiple buffers, - * then the page will either be inserted fully, or not at all. - */ -static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) -{ - struct efx_nic *efx = rx_queue->efx; - struct efx_rx_buffer *rx_buf; - struct page *page; - unsigned int page_offset; - struct efx_rx_page_state *state; - dma_addr_t dma_addr; - unsigned index, count; - - count = 0; - do { - page = efx_reuse_page(rx_queue); - if (page == NULL) { - page = alloc_pages(__GFP_COMP | - (atomic ? GFP_ATOMIC : GFP_KERNEL), - efx->rx_buffer_order); - if (unlikely(page == NULL)) - return -ENOMEM; - dma_addr = - dma_map_page(&efx->pci_dev->dev, page, 0, - PAGE_SIZE << efx->rx_buffer_order, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&efx->pci_dev->dev, - dma_addr))) { - __free_pages(page, efx->rx_buffer_order); - return -EIO; - } - state = page_address(page); - state->dma_addr = dma_addr; - } else { - state = page_address(page); - dma_addr = state->dma_addr; - } - - dma_addr += sizeof(struct efx_rx_page_state); - page_offset = sizeof(struct efx_rx_page_state); - - do { - index = rx_queue->added_count & rx_queue->ptr_mask; - rx_buf = efx_rx_buffer(rx_queue, index); - rx_buf->dma_addr = dma_addr + efx->rx_ip_align + - XDP_PACKET_HEADROOM; - rx_buf->page = page; - rx_buf->page_offset = page_offset + efx->rx_ip_align + - XDP_PACKET_HEADROOM; - rx_buf->len = efx->rx_dma_len; - rx_buf->flags = 0; - ++rx_queue->added_count; - get_page(page); - dma_addr += efx->rx_page_buf_step; - page_offset += efx->rx_page_buf_step; - } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); - - rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; - } while (++count < efx->rx_pages_per_batch); - - return 0; -} - -/* Unmap a DMA-mapped page. This function is only called for the final RX - * buffer in a page. - */ -static void efx_unmap_rx_buffer(struct efx_nic *efx, - struct efx_rx_buffer *rx_buf) -{ - struct page *page = rx_buf->page; - - if (page) { - struct efx_rx_page_state *state = page_address(page); - dma_unmap_page(&efx->pci_dev->dev, - state->dma_addr, - PAGE_SIZE << efx->rx_buffer_order, - DMA_FROM_DEVICE); - } -} - -static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, - struct efx_rx_buffer *rx_buf, - unsigned int num_bufs) -{ - do { - if (rx_buf->page) { - put_page(rx_buf->page); - rx_buf->page = NULL; - } - rx_buf = efx_rx_buf_next(rx_queue, rx_buf); - } while (--num_bufs); -} - -/* Attempt to recycle the page if there is an RX recycle ring; the page can - * only be added if this is the final RX buffer, to prevent pages being used in - * the descriptor ring and appearing in the recycle ring simultaneously. - */ -static void efx_recycle_rx_page(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf) -{ - struct page *page = rx_buf->page; - struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - struct efx_nic *efx = rx_queue->efx; - unsigned index; - - /* Only recycle the page after processing the final buffer. */ - if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) - return; - - index = rx_queue->page_add & rx_queue->page_ptr_mask; - if (rx_queue->page_ring[index] == NULL) { - unsigned read_index = rx_queue->page_remove & - rx_queue->page_ptr_mask; - - /* The next slot in the recycle ring is available, but - * increment page_remove if the read pointer currently - * points here. - */ - if (read_index == index) - ++rx_queue->page_remove; - rx_queue->page_ring[index] = page; - ++rx_queue->page_add; - return; - } - ++rx_queue->page_recycle_full; - efx_unmap_rx_buffer(efx, rx_buf); - put_page(rx_buf->page); -} - -static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, - struct efx_rx_buffer *rx_buf) -{ - /* Release the page reference we hold for the buffer. */ - if (rx_buf->page) - put_page(rx_buf->page); - - /* If this is the last buffer in a page, unmap and free it. */ - if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { - efx_unmap_rx_buffer(rx_queue->efx, rx_buf); - efx_free_rx_buffers(rx_queue, rx_buf, 1); - } - rx_buf->page = NULL; -} - -/* Recycle the pages that are used by buffers that have just been received. */ -static void efx_recycle_rx_pages(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf, - unsigned int n_frags) -{ - struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - - do { - efx_recycle_rx_page(channel, rx_buf); - rx_buf = efx_rx_buf_next(rx_queue, rx_buf); - } while (--n_frags); -} - -static void efx_discard_rx_packet(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf, - unsigned int n_frags) -{ - struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); - - efx_recycle_rx_pages(channel, rx_buf, n_frags); - - efx_free_rx_buffers(rx_queue, rx_buf, n_frags); -} - -/** - * efx_fast_push_rx_descriptors - push new RX descriptors quickly - * @rx_queue: RX descriptor queue - * - * This will aim to fill the RX descriptor queue up to - * @rx_queue->@max_fill. If there is insufficient atomic - * memory to do so, a slow fill will be scheduled. - * - * The caller must provide serialisation (none is used here). In practise, - * this means this function must run from the NAPI handler, or be called - * when NAPI is disabled. - */ -void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) -{ - struct efx_nic *efx = rx_queue->efx; - unsigned int fill_level, batch_size; - int space, rc = 0; - - if (!rx_queue->refill_enabled) - return; - - /* Calculate current fill level, and exit if we don't need to fill */ - fill_level = (rx_queue->added_count - rx_queue->removed_count); - EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries); - if (fill_level >= rx_queue->fast_fill_trigger) - goto out; - - /* Record minimum fill level */ - if (unlikely(fill_level < rx_queue->min_fill)) { - if (fill_level) - rx_queue->min_fill = fill_level; - } - - batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; - space = rx_queue->max_fill - fill_level; - EFX_WARN_ON_ONCE_PARANOID(space < batch_size); - - netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, - "RX queue %d fast-filling descriptor ring from" - " level %d to level %d\n", - efx_rx_queue_index(rx_queue), fill_level, - rx_queue->max_fill); - - - do { - rc = efx_init_rx_buffers(rx_queue, atomic); - if (unlikely(rc)) { - /* Ensure that we don't leave the rx queue empty */ - efx_schedule_slow_fill(rx_queue); - goto out; - } - } while ((space -= batch_size) >= batch_size); - - netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, - "RX queue %d fast-filled descriptor ring " - "to level %d\n", efx_rx_queue_index(rx_queue), - rx_queue->added_count - rx_queue->removed_count); - - out: - if (rx_queue->notified_count != rx_queue->added_count) - efx_nic_notify_rx_desc(rx_queue); -} - -void efx_rx_slow_fill(struct timer_list *t) -{ - struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); - - /* Post an event to cause NAPI to run and refill the queue */ - efx_nic_generate_fill_event(rx_queue); - ++rx_queue->slow_fill_count; -} - static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf, int len) @@ -412,53 +71,6 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, efx_rx_queue_channel(rx_queue)->n_rx_overlength++; } -/* Pass a received packet up through GRO. GRO can handle pages - * regardless of checksum state and skbs with a good checksum. - */ -static void -efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, - unsigned int n_frags, u8 *eh) -{ - struct napi_struct *napi = &channel->napi_str; - struct efx_nic *efx = channel->efx; - struct sk_buff *skb; - - skb = napi_get_frags(napi); - if (unlikely(!skb)) { - struct efx_rx_queue *rx_queue; - - rx_queue = efx_channel_get_rx_queue(channel); - efx_free_rx_buffers(rx_queue, rx_buf, n_frags); - return; - } - - if (efx->net_dev->features & NETIF_F_RXHASH) - skb_set_hash(skb, efx_rx_buf_hash(efx, eh), - PKT_HASH_TYPE_L3); - skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? - CHECKSUM_UNNECESSARY : CHECKSUM_NONE); - skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); - - for (;;) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buf->page, rx_buf->page_offset, - rx_buf->len); - rx_buf->page = NULL; - skb->len += rx_buf->len; - if (skb_shinfo(skb)->nr_frags == n_frags) - break; - - rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); - } - - skb->data_len = skb->len; - skb->truesize += n_frags * efx->rx_buffer_truesize; - - skb_record_rx_queue(skb, channel->rx_queue.core_index); - - napi_gro_frags(napi); -} - /* Allocate and construct an SKB around page fragments */ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, @@ -805,174 +417,6 @@ out: channel->rx_pkt_n_frags = 0; } -int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) -{ - struct efx_nic *efx = rx_queue->efx; - unsigned int entries; - int rc; - - /* Create the smallest power-of-two aligned ring */ - entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE); - EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); - rx_queue->ptr_mask = entries - 1; - - netif_dbg(efx, probe, efx->net_dev, - "creating RX queue %d size %#x mask %#x\n", - efx_rx_queue_index(rx_queue), efx->rxq_entries, - rx_queue->ptr_mask); - - /* Allocate RX buffers */ - rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer), - GFP_KERNEL); - if (!rx_queue->buffer) - return -ENOMEM; - - rc = efx_nic_probe_rx(rx_queue); - if (rc) { - kfree(rx_queue->buffer); - rx_queue->buffer = NULL; - } - - return rc; -} - -static void efx_init_rx_recycle_ring(struct efx_nic *efx, - struct efx_rx_queue *rx_queue) -{ - unsigned int bufs_in_recycle_ring, page_ring_size; - - /* Set the RX recycle ring size */ -#ifdef CONFIG_PPC64 - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; -#else - if (iommu_present(&pci_bus_type)) - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; - else - bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; -#endif /* CONFIG_PPC64 */ - - page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / - efx->rx_bufs_per_page); - rx_queue->page_ring = kcalloc(page_ring_size, - sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; -} - -void efx_init_rx_queue(struct efx_rx_queue *rx_queue) -{ - struct efx_nic *efx = rx_queue->efx; - unsigned int max_fill, trigger, max_trigger; - int rc = 0; - - netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, - "initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); - - /* Initialise ptr fields */ - rx_queue->added_count = 0; - rx_queue->notified_count = 0; - rx_queue->removed_count = 0; - rx_queue->min_fill = -1U; - efx_init_rx_recycle_ring(efx, rx_queue); - - rx_queue->page_remove = 0; - rx_queue->page_add = rx_queue->page_ptr_mask + 1; - rx_queue->page_recycle_count = 0; - rx_queue->page_recycle_failed = 0; - rx_queue->page_recycle_full = 0; - - /* Initialise limit fields */ - max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; - max_trigger = - max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; - if (rx_refill_threshold != 0) { - trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; - if (trigger > max_trigger) - trigger = max_trigger; - } else { - trigger = max_trigger; - } - - rx_queue->max_fill = max_fill; - rx_queue->fast_fill_trigger = trigger; - rx_queue->refill_enabled = true; - - /* Initialise XDP queue information */ - rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev, - rx_queue->core_index); - - if (rc) { - netif_err(efx, rx_err, efx->net_dev, - "Failure to initialise XDP queue information rc=%d\n", - rc); - efx->xdp_rxq_info_failed = true; - } else { - rx_queue->xdp_rxq_info_valid = true; - } - - /* Set up RX descriptor ring */ - efx_nic_init_rx(rx_queue); -} - -void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) -{ - int i; - struct efx_nic *efx = rx_queue->efx; - struct efx_rx_buffer *rx_buf; - - netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, - "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); - - del_timer_sync(&rx_queue->slow_fill); - - /* Release RX buffers from the current read ptr to the write ptr */ - if (rx_queue->buffer) { - for (i = rx_queue->removed_count; i < rx_queue->added_count; - i++) { - unsigned index = i & rx_queue->ptr_mask; - rx_buf = efx_rx_buffer(rx_queue, index); - efx_fini_rx_buffer(rx_queue, rx_buf); - } - } - - /* Unmap and release the pages in the recycle ring. Remove the ring. */ - for (i = 0; i <= rx_queue->page_ptr_mask; i++) { - struct page *page = rx_queue->page_ring[i]; - struct efx_rx_page_state *state; - - if (page == NULL) - continue; - - state = page_address(page); - dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, - PAGE_SIZE << efx->rx_buffer_order, - DMA_FROM_DEVICE); - put_page(page); - } - kfree(rx_queue->page_ring); - rx_queue->page_ring = NULL; - - if (rx_queue->xdp_rxq_info_valid) - xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info); - - rx_queue->xdp_rxq_info_valid = false; -} - -void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) -{ - netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, - "destroying RX queue %d\n", efx_rx_queue_index(rx_queue)); - - efx_nic_remove_rx(rx_queue); - - kfree(rx_queue->buffer); - rx_queue->buffer = NULL; -} - - -module_param(rx_refill_threshold, uint, 0444); -MODULE_PARM_DESC(rx_refill_threshold, - "RX descriptor ring refill threshold (%)"); - #ifdef CONFIG_RFS_ACCEL static void efx_filter_rfs_work(struct work_struct *data) @@ -1206,37 +650,3 @@ bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) } #endif /* CONFIG_RFS_ACCEL */ - -/** - * efx_filter_is_mc_recipient - test whether spec is a multicast recipient - * @spec: Specification to test - * - * Return: %true if the specification is a non-drop RX filter that - * matches a local MAC address I/G bit value of 1 or matches a local - * IPv4 or IPv6 address value in the respective multicast address - * range. Otherwise %false. - */ -bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) -{ - if (!(spec->flags & EFX_FILTER_FLAG_RX) || - spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) - return false; - - if (spec->match_flags & - (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && - is_multicast_ether_addr(spec->loc_mac)) - return true; - - if ((spec->match_flags & - (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == - (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { - if (spec->ether_type == htons(ETH_P_IP) && - ipv4_is_multicast(spec->loc_host[0])) - return true; - if (spec->ether_type == htons(ETH_P_IPV6) && - ((const u8 *)spec->loc_host)[0] == 0xff) - return true; - } - - return false; -} diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c new file mode 100644 index 000000000000..ee8beb87bdc1 --- /dev/null +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -0,0 +1,851 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include <linux/module.h> +#include <linux/iommu.h> +#include "efx.h" +#include "nic.h" +#include "rx_common.h" + +/* This is the percentage fill level below which new RX descriptors + * will be added to the RX descriptor ring. + */ +static unsigned int rx_refill_threshold; +module_param(rx_refill_threshold, uint, 0444); +MODULE_PARM_DESC(rx_refill_threshold, + "RX descriptor ring refill threshold (%)"); + +/* Number of RX buffers to recycle pages for. When creating the RX page recycle + * ring, this number is divided by the number of buffers per page to calculate + * the number of pages to store in the RX page recycle ring. + */ +#define EFX_RECYCLE_RING_SIZE_IOMMU 4096 +#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH) + +/* RX maximum head room required. + * + * This must be at least 1 to prevent overflow, plus one packet-worth + * to allow pipelined receives. + */ +#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS) + +/* Check the RX page recycle ring for a page that can be reused. */ +static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + struct efx_rx_page_state *state; + unsigned int index; + struct page *page; + + index = rx_queue->page_remove & rx_queue->page_ptr_mask; + page = rx_queue->page_ring[index]; + if (page == NULL) + return NULL; + + rx_queue->page_ring[index] = NULL; + /* page_remove cannot exceed page_add. */ + if (rx_queue->page_remove != rx_queue->page_add) + ++rx_queue->page_remove; + + /* If page_count is 1 then we hold the only reference to this page. */ + if (page_count(page) == 1) { + ++rx_queue->page_recycle_count; + return page; + } else { + state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + put_page(page); + ++rx_queue->page_recycle_failed; + } + + return NULL; +} + +/* Attempt to recycle the page if there is an RX recycle ring; the page can + * only be added if this is the final RX buffer, to prevent pages being used in + * the descriptor ring and appearing in the recycle ring simultaneously. + */ +static void efx_recycle_rx_page(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + struct efx_nic *efx = rx_queue->efx; + struct page *page = rx_buf->page; + unsigned int index; + + /* Only recycle the page after processing the final buffer. */ + if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE)) + return; + + index = rx_queue->page_add & rx_queue->page_ptr_mask; + if (rx_queue->page_ring[index] == NULL) { + unsigned int read_index = rx_queue->page_remove & + rx_queue->page_ptr_mask; + + /* The next slot in the recycle ring is available, but + * increment page_remove if the read pointer currently + * points here. + */ + if (read_index == index) + ++rx_queue->page_remove; + rx_queue->page_ring[index] = page; + ++rx_queue->page_add; + return; + } + ++rx_queue->page_recycle_full; + efx_unmap_rx_buffer(efx, rx_buf); + put_page(rx_buf->page); +} + +/* Recycle the pages that are used by buffers that have just been received. */ +void efx_recycle_rx_pages(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + + do { + efx_recycle_rx_page(channel, rx_buf); + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--n_frags); +} + +void efx_discard_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + + efx_recycle_rx_pages(channel, rx_buf, n_frags); + + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); +} + +static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) +{ + unsigned int bufs_in_recycle_ring, page_ring_size; + struct efx_nic *efx = rx_queue->efx; + + /* Set the RX recycle ring size */ +#ifdef CONFIG_PPC64 + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; +#else + if (iommu_present(&pci_bus_type)) + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU; + else + bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU; +#endif /* CONFIG_PPC64 */ + + page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring / + efx->rx_bufs_per_page); + rx_queue->page_ring = kcalloc(page_ring_size, + sizeof(*rx_queue->page_ring), GFP_KERNEL); + rx_queue->page_ptr_mask = page_ring_size - 1; +} + +static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + int i; + + /* Unmap and release the pages in the recycle ring. Remove the ring. */ + for (i = 0; i <= rx_queue->page_ptr_mask; i++) { + struct page *page = rx_queue->page_ring[i]; + struct efx_rx_page_state *state; + + if (page == NULL) + continue; + + state = page_address(page); + dma_unmap_page(&efx->pci_dev->dev, state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + put_page(page); + } + kfree(rx_queue->page_ring); + rx_queue->page_ring = NULL; +} + +static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf) +{ + /* Release the page reference we hold for the buffer. */ + if (rx_buf->page) + put_page(rx_buf->page); + + /* If this is the last buffer in a page, unmap and free it. */ + if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) { + efx_unmap_rx_buffer(rx_queue->efx, rx_buf); + efx_free_rx_buffers(rx_queue, rx_buf, 1); + } + rx_buf->page = NULL; +} + +int efx_probe_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int entries; + int rc; + + /* Create the smallest power-of-two aligned ring */ + entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE); + EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); + rx_queue->ptr_mask = entries - 1; + + netif_dbg(efx, probe, efx->net_dev, + "creating RX queue %d size %#x mask %#x\n", + efx_rx_queue_index(rx_queue), efx->rxq_entries, + rx_queue->ptr_mask); + + /* Allocate RX buffers */ + rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer), + GFP_KERNEL); + if (!rx_queue->buffer) + return -ENOMEM; + + rc = efx_nic_probe_rx(rx_queue); + if (rc) { + kfree(rx_queue->buffer); + rx_queue->buffer = NULL; + } + + return rc; +} + +void efx_init_rx_queue(struct efx_rx_queue *rx_queue) +{ + unsigned int max_fill, trigger, max_trigger; + struct efx_nic *efx = rx_queue->efx; + int rc = 0; + + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); + + /* Initialise ptr fields */ + rx_queue->added_count = 0; + rx_queue->notified_count = 0; + rx_queue->removed_count = 0; + rx_queue->min_fill = -1U; + efx_init_rx_recycle_ring(rx_queue); + + rx_queue->page_remove = 0; + rx_queue->page_add = rx_queue->page_ptr_mask + 1; + rx_queue->page_recycle_count = 0; + rx_queue->page_recycle_failed = 0; + rx_queue->page_recycle_full = 0; + + /* Initialise limit fields */ + max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM; + max_trigger = + max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page; + if (rx_refill_threshold != 0) { + trigger = max_fill * min(rx_refill_threshold, 100U) / 100U; + if (trigger > max_trigger) + trigger = max_trigger; + } else { + trigger = max_trigger; + } + + rx_queue->max_fill = max_fill; + rx_queue->fast_fill_trigger = trigger; + rx_queue->refill_enabled = true; + + /* Initialise XDP queue information */ + rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev, + rx_queue->core_index); + + if (rc) { + netif_err(efx, rx_err, efx->net_dev, + "Failure to initialise XDP queue information rc=%d\n", + rc); + efx->xdp_rxq_info_failed = true; + } else { + rx_queue->xdp_rxq_info_valid = true; + } + + /* Set up RX descriptor ring */ + efx_nic_init_rx(rx_queue); +} + +void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_rx_buffer *rx_buf; + int i; + + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue)); + + del_timer_sync(&rx_queue->slow_fill); + + /* Release RX buffers from the current read ptr to the write ptr */ + if (rx_queue->buffer) { + for (i = rx_queue->removed_count; i < rx_queue->added_count; + i++) { + unsigned int index = i & rx_queue->ptr_mask; + + rx_buf = efx_rx_buffer(rx_queue, index); + efx_fini_rx_buffer(rx_queue, rx_buf); + } + } + + efx_fini_rx_recycle_ring(rx_queue); + + if (rx_queue->xdp_rxq_info_valid) + xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info); + + rx_queue->xdp_rxq_info_valid = false; +} + +void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) +{ + netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, + "destroying RX queue %d\n", efx_rx_queue_index(rx_queue)); + + efx_nic_remove_rx(rx_queue); + + kfree(rx_queue->buffer); + rx_queue->buffer = NULL; +} + +/* Unmap a DMA-mapped page. This function is only called for the final RX + * buffer in a page. + */ +void efx_unmap_rx_buffer(struct efx_nic *efx, + struct efx_rx_buffer *rx_buf) +{ + struct page *page = rx_buf->page; + + if (page) { + struct efx_rx_page_state *state = page_address(page); + + dma_unmap_page(&efx->pci_dev->dev, + state->dma_addr, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + } +} + +void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + unsigned int num_bufs) +{ + do { + if (rx_buf->page) { + put_page(rx_buf->page); + rx_buf->page = NULL; + } + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--num_bufs); +} + +void efx_rx_slow_fill(struct timer_list *t) +{ + struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); + + /* Post an event to cause NAPI to run and refill the queue */ + efx_nic_generate_fill_event(rx_queue); + ++rx_queue->slow_fill_count; +} + +void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) +{ + mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); +} + +/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers + * + * @rx_queue: Efx RX queue + * + * This allocates a batch of pages, maps them for DMA, and populates + * struct efx_rx_buffers for each one. Return a negative error code or + * 0 on success. If a single page can be used for multiple buffers, + * then the page will either be inserted fully, or not at all. + */ +static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) +{ + unsigned int page_offset, index, count; + struct efx_nic *efx = rx_queue->efx; + struct efx_rx_page_state *state; + struct efx_rx_buffer *rx_buf; + dma_addr_t dma_addr; + struct page *page; + + count = 0; + do { + page = efx_reuse_page(rx_queue); + if (page == NULL) { + page = alloc_pages(__GFP_COMP | + (atomic ? GFP_ATOMIC : GFP_KERNEL), + efx->rx_buffer_order); + if (unlikely(page == NULL)) + return -ENOMEM; + dma_addr = + dma_map_page(&efx->pci_dev->dev, page, 0, + PAGE_SIZE << efx->rx_buffer_order, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(&efx->pci_dev->dev, + dma_addr))) { + __free_pages(page, efx->rx_buffer_order); + return -EIO; + } + state = page_address(page); + state->dma_addr = dma_addr; + } else { + state = page_address(page); + dma_addr = state->dma_addr; + } + + dma_addr += sizeof(struct efx_rx_page_state); + page_offset = sizeof(struct efx_rx_page_state); + + do { + index = rx_queue->added_count & rx_queue->ptr_mask; + rx_buf = efx_rx_buffer(rx_queue, index); + rx_buf->dma_addr = dma_addr + efx->rx_ip_align + + XDP_PACKET_HEADROOM; + rx_buf->page = page; + rx_buf->page_offset = page_offset + efx->rx_ip_align + + XDP_PACKET_HEADROOM; + rx_buf->len = efx->rx_dma_len; + rx_buf->flags = 0; + ++rx_queue->added_count; + get_page(page); + dma_addr += efx->rx_page_buf_step; + page_offset += efx->rx_page_buf_step; + } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE); + + rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE; + } while (++count < efx->rx_pages_per_batch); + + return 0; +} + +void efx_rx_config_page_split(struct efx_nic *efx) +{ + efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align + + XDP_PACKET_HEADROOM, + EFX_RX_BUF_ALIGNMENT); + efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : + ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / + efx->rx_page_buf_step); + efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / + efx->rx_bufs_per_page; + efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, + efx->rx_bufs_per_page); +} + +/* efx_fast_push_rx_descriptors - push new RX descriptors quickly + * @rx_queue: RX descriptor queue + * + * This will aim to fill the RX descriptor queue up to + * @rx_queue->@max_fill. If there is insufficient atomic + * memory to do so, a slow fill will be scheduled. + * + * The caller must provide serialisation (none is used here). In practise, + * this means this function must run from the NAPI handler, or be called + * when NAPI is disabled. + */ +void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned int fill_level, batch_size; + int space, rc = 0; + + if (!rx_queue->refill_enabled) + return; + + /* Calculate current fill level, and exit if we don't need to fill */ + fill_level = (rx_queue->added_count - rx_queue->removed_count); + EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries); + if (fill_level >= rx_queue->fast_fill_trigger) + goto out; + + /* Record minimum fill level */ + if (unlikely(fill_level < rx_queue->min_fill)) { + if (fill_level) + rx_queue->min_fill = fill_level; + } + + batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page; + space = rx_queue->max_fill - fill_level; + EFX_WARN_ON_ONCE_PARANOID(space < batch_size); + + netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, + "RX queue %d fast-filling descriptor ring from" + " level %d to level %d\n", + efx_rx_queue_index(rx_queue), fill_level, + rx_queue->max_fill); + + do { + rc = efx_init_rx_buffers(rx_queue, atomic); + if (unlikely(rc)) { + /* Ensure that we don't leave the rx queue empty */ + efx_schedule_slow_fill(rx_queue); + goto out; + } + } while ((space -= batch_size) >= batch_size); + + netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev, + "RX queue %d fast-filled descriptor ring " + "to level %d\n", efx_rx_queue_index(rx_queue), + rx_queue->added_count - rx_queue->removed_count); + + out: + if (rx_queue->notified_count != rx_queue->added_count) + efx_nic_notify_rx_desc(rx_queue); +} + +/* Pass a received packet up through GRO. GRO can handle pages + * regardless of checksum state and skbs with a good checksum. + */ +void +efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, + unsigned int n_frags, u8 *eh) +{ + struct napi_struct *napi = &channel->napi_str; + struct efx_nic *efx = channel->efx; + struct sk_buff *skb; + + skb = napi_get_frags(napi); + if (unlikely(!skb)) { + struct efx_rx_queue *rx_queue; + + rx_queue = efx_channel_get_rx_queue(channel); + efx_free_rx_buffers(rx_queue, rx_buf, n_frags); + return; + } + + if (efx->net_dev->features & NETIF_F_RXHASH) + skb_set_hash(skb, efx_rx_buf_hash(efx, eh), + PKT_HASH_TYPE_L3); + skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE); + skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); + + for (;;) { + skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, + rx_buf->page, rx_buf->page_offset, + rx_buf->len); + rx_buf->page = NULL; + skb->len += rx_buf->len; + if (skb_shinfo(skb)->nr_frags == n_frags) + break; + + rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); + } + + skb->data_len = skb->len; + skb->truesize += n_frags * efx->rx_buffer_truesize; + + skb_record_rx_queue(skb, channel->rx_queue.core_index); + + napi_gro_frags(napi); +} + +/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because + * (a) this is an infrequent control-plane operation and (b) n is small (max 64) + */ +struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) +{ + struct list_head *head = &efx->rss_context.list; + struct efx_rss_context *ctx, *new; + u32 id = 1; /* Don't use zero, that refers to the master RSS context */ + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + /* Search for first gap in the numbering */ + list_for_each_entry(ctx, head, list) { + if (ctx->user_id != id) + break; + id++; + /* Check for wrap. If this happens, we have nearly 2^32 + * allocated RSS contexts, which seems unlikely. + */ + if (WARN_ON_ONCE(!id)) + return NULL; + } + + /* Create the new entry */ + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + new->rx_hash_udp_4tuple = false; + + /* Insert the new entry into the gap */ + new->user_id = id; + list_add_tail(&new->list, &ctx->list); + return new; +} + +struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) +{ + struct list_head *head = &efx->rss_context.list; + struct efx_rss_context *ctx; + + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + list_for_each_entry(ctx, head, list) + if (ctx->user_id == id) + return ctx; + return NULL; +} + +void efx_free_rss_context_entry(struct efx_rss_context *ctx) +{ + list_del(&ctx->list); + kfree(ctx); +} + +void efx_set_default_rx_indir_table(struct efx_nic *efx, + struct efx_rss_context *ctx) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) + ctx->rx_indir_table[i] = + ethtool_rxfh_indir_default(i, efx->rss_spread); +} + +/** + * efx_filter_is_mc_recipient - test whether spec is a multicast recipient + * @spec: Specification to test + * + * Return: %true if the specification is a non-drop RX filter that + * matches a local MAC address I/G bit value of 1 or matches a local + * IPv4 or IPv6 address value in the respective multicast address + * range. Otherwise %false. + */ +bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec) +{ + if (!(spec->flags & EFX_FILTER_FLAG_RX) || + spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP) + return false; + + if (spec->match_flags & + (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) && + is_multicast_ether_addr(spec->loc_mac)) + return true; + + if ((spec->match_flags & + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) == + (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) { + if (spec->ether_type == htons(ETH_P_IP) && + ipv4_is_multicast(spec->loc_host[0])) + return true; + if (spec->ether_type == htons(ETH_P_IPV6) && + ((const u8 *)spec->loc_host)[0] == 0xff) + return true; + } + + return false; +} + +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right) +{ + if ((left->match_flags ^ right->match_flags) | + ((left->flags ^ right->flags) & + (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) + return false; + + return memcmp(&left->outer_vid, &right->outer_vid, + sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) == 0; +} + +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) +{ + BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); + return jhash2((const u32 *)&spec->outer_vid, + (sizeof(struct efx_filter_spec) - + offsetof(struct efx_filter_spec, outer_vid)) / 4, + 0); +} + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force) +{ + if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { + /* ARFS is currently updating this entry, leave it */ + return false; + } + if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { + /* ARFS tried and failed to update this, so it's probably out + * of date. Remove the filter and the ARFS rule entry. + */ + rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; + *force = true; + return true; + } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ + /* ARFS has moved on, so old filter is not needed. Since we did + * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will + * not be removed by efx_rps_hash_del() subsequently. + */ + *force = true; + return true; + } + /* Remove it iff ARFS wants to. */ + return true; +} + +static +struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + u32 hash = efx_filter_spec_hash(spec); + + lockdep_assert_held(&efx->rps_hash_lock); + if (!efx->rps_hash_table) + return NULL; + return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; +} + +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) + return rule; + } + return NULL; +} + +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (!head) + return NULL; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + *new = false; + return rule; + } + } + rule = kmalloc(sizeof(*rule), GFP_ATOMIC); + *new = true; + if (rule) { + memcpy(&rule->spec, spec, sizeof(rule->spec)); + hlist_add_head(&rule->node, head); + } + return rule; +} + +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) +{ + struct efx_arfs_rule *rule; + struct hlist_head *head; + struct hlist_node *node; + + head = efx_rps_hash_bucket(efx, spec); + if (WARN_ON(!head)) + return; + hlist_for_each(node, head) { + rule = container_of(node, struct efx_arfs_rule, node); + if (efx_filter_spec_equal(spec, &rule->spec)) { + /* Someone already reused the entry. We know that if + * this check doesn't fire (i.e. filter_id == REMOVING) + * then the REMOVING mark was put there by our caller, + * because caller is holding a lock on filter table and + * only holders of that lock set REMOVING. + */ + if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) + return; + hlist_del(node); + kfree(rule); + return; + } + } + /* We didn't find it. */ + WARN_ON(1); +} +#endif + +int efx_probe_filters(struct efx_nic *efx) +{ + int rc; + + init_rwsem(&efx->filter_sem); + mutex_lock(&efx->mac_lock); + down_write(&efx->filter_sem); + rc = efx->type->filter_table_probe(efx); + if (rc) + goto out_unlock; + +#ifdef CONFIG_RFS_ACCEL + if (efx->type->offload_features & NETIF_F_NTUPLE) { + struct efx_channel *channel; + int i, success = 1; + + efx_for_each_channel(channel, efx) { + channel->rps_flow_id = + kcalloc(efx->type->max_rx_ip_filters, + sizeof(*channel->rps_flow_id), + GFP_KERNEL); + if (!channel->rps_flow_id) + success = 0; + else + for (i = 0; + i < efx->type->max_rx_ip_filters; + ++i) + channel->rps_flow_id[i] = + RPS_FLOW_ID_INVALID; + channel->rfs_expire_index = 0; + channel->rfs_filter_count = 0; + } + + if (!success) { + efx_for_each_channel(channel, efx) + kfree(channel->rps_flow_id); + efx->type->filter_table_remove(efx); + rc = -ENOMEM; + goto out_unlock; + } + } +#endif +out_unlock: + up_write(&efx->filter_sem); + mutex_unlock(&efx->mac_lock); + return rc; +} + +void efx_remove_filters(struct efx_nic *efx) +{ +#ifdef CONFIG_RFS_ACCEL + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) { + cancel_delayed_work_sync(&channel->filter_work); + kfree(channel->rps_flow_id); + } +#endif + down_write(&efx->filter_sem); + efx->type->filter_table_remove(efx); + up_write(&efx->filter_sem); +} diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h new file mode 100644 index 000000000000..c41f12a89477 --- /dev/null +++ b/drivers/net/ethernet/sfc/rx_common.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_RX_COMMON_H +#define EFX_RX_COMMON_H + +/* Preferred number of descriptors to fill at once */ +#define EFX_RX_PREFERRED_BATCH 8U + +/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ +#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ + EFX_RX_USR_BUF_SIZE) + +static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf) +{ + return page_address(buf->page) + buf->page_offset; +} + +static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset)); +#else + const u8 *data = eh + efx->rx_packet_hash_offset; + + return (u32)data[0] | + (u32)data[1] << 8 | + (u32)data[2] << 16 | + (u32)data[3] << 24; +#endif +} + +void efx_rx_slow_fill(struct timer_list *t); + +void efx_recycle_rx_pages(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags); +void efx_discard_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags); + +int efx_probe_rx_queue(struct efx_rx_queue *rx_queue); +void efx_init_rx_queue(struct efx_rx_queue *rx_queue); +void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); +void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); +void efx_destroy_rx_queue(struct efx_rx_queue *rx_queue); + +void efx_init_rx_buffer(struct efx_rx_queue *rx_queue, + struct page *page, + unsigned int page_offset, + u16 flags); +void efx_unmap_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf); +void efx_free_rx_buffers(struct efx_rx_queue *rx_queue, + struct efx_rx_buffer *rx_buf, + unsigned int num_bufs); + +void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue); +void efx_rx_config_page_split(struct efx_nic *efx); +void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic); + +void +efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, + unsigned int n_frags, u8 *eh); + +struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); +struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); +void efx_free_rss_context_entry(struct efx_rss_context *ctx); +void efx_set_default_rx_indir_table(struct efx_nic *efx, + struct efx_rss_context *ctx); + +bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); +bool efx_filter_spec_equal(const struct efx_filter_spec *left, + const struct efx_filter_spec *right); +u32 efx_filter_spec_hash(const struct efx_filter_spec *spec); + +#ifdef CONFIG_RFS_ACCEL +bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, + bool *force); +struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, + const struct efx_filter_spec *spec); +struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, + const struct efx_filter_spec *spec, + bool *new); +void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec); +#endif + +int efx_probe_filters(struct efx_nic *efx); +void efx_remove_filters(struct efx_nic *efx); + +#endif diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 8474cf8ea7d3..1ae369022d7d 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -18,6 +18,8 @@ #include <linux/slab.h> #include "net_driver.h" #include "efx.h" +#include "efx_common.h" +#include "efx_channels.h" #include "nic.h" #include "selftest.h" #include "workarounds.h" @@ -783,7 +785,7 @@ void efx_selftest_async_cancel(struct efx_nic *efx) cancel_delayed_work_sync(&efx->selftest_work); } -void efx_selftest_async_work(struct work_struct *data) +static void efx_selftest_async_work(struct work_struct *data) { struct efx_nic *efx = container_of(data, struct efx_nic, selftest_work.work); @@ -802,3 +804,8 @@ void efx_selftest_async_work(struct work_struct *data) channel->channel, cpu); } } + +void efx_selftest_async_init(struct efx_nic *efx) +{ + INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); +} diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h index a3553816d92c..ca88ebb4f6b1 100644 --- a/drivers/net/ethernet/sfc/selftest.h +++ b/drivers/net/ethernet/sfc/selftest.h @@ -45,8 +45,8 @@ void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr, int pkt_len); int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests, unsigned flags); +void efx_selftest_async_init(struct efx_nic *efx); void efx_selftest_async_start(struct efx_nic *efx); void efx_selftest_async_cancel(struct efx_nic *efx); -void efx_selftest_async_work(struct work_struct *data); #endif /* EFX_SELFTEST_H */ diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 81499244a4b4..baa464161626 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -14,12 +14,14 @@ #include "net_driver.h" #include "bitfield.h" #include "efx.h" +#include "efx_common.h" #include "nic.h" #include "farch_regs.h" #include "io.h" #include "workarounds.h" #include "mcdi.h" #include "mcdi_pcol.h" +#include "mcdi_port_common.h" #include "selftest.h" #include "siena_sriov.h" diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index dfbdf05dcf79..83dcfcae3d4b 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include "net_driver.h" #include "efx.h" +#include "efx_channels.h" #include "nic.h" #include "io.h" #include "mcdi.h" diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 00c1c4402451..04d7f41d7ed9 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -20,6 +20,7 @@ #include "io.h" #include "nic.h" #include "tx.h" +#include "tx_common.h" #include "workarounds.h" #include "ef10_regs.h" @@ -56,72 +57,6 @@ u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, return efx_tx_get_copy_buffer(tx_queue, buffer); } -static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer, - unsigned int *pkts_compl, - unsigned int *bytes_compl) -{ - if (buffer->unmap_len) { - struct device *dma_dev = &tx_queue->efx->pci_dev->dev; - dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; - if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) - dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - else - dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - buffer->unmap_len = 0; - } - - if (buffer->flags & EFX_TX_BUF_SKB) { - struct sk_buff *skb = (struct sk_buff *)buffer->skb; - - EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); - (*pkts_compl)++; - (*bytes_compl) += skb->len; - if (tx_queue->timestamping && - (tx_queue->completed_timestamp_major || - tx_queue->completed_timestamp_minor)) { - struct skb_shared_hwtstamps hwtstamp; - - hwtstamp.hwtstamp = - efx_ptp_nic_to_kernel_time(tx_queue); - skb_tstamp_tx(skb, &hwtstamp); - - tx_queue->completed_timestamp_major = 0; - tx_queue->completed_timestamp_minor = 0; - } - dev_consume_skb_any((struct sk_buff *)buffer->skb); - netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, - "TX queue %d transmission id %x complete\n", - tx_queue->queue, tx_queue->read_count); - } else if (buffer->flags & EFX_TX_BUF_XDP) { - xdp_return_frame_rx_napi(buffer->xdpf); - } - - buffer->len = 0; - buffer->flags = 0; -} - -unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) -{ - /* Header and payload descriptor for each output segment, plus - * one for every input fragment boundary within a segment - */ - unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; - - /* Possibly one more per segment for option descriptors */ - if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) - max_descs += EFX_TSO_MAX_SEGS; - - /* Possibly more for PCIe page boundaries within input fragments */ - if (PAGE_SIZE > EFX_PAGE_SIZE) - max_descs += max_t(unsigned int, MAX_SKB_FRAGS, - DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); - - return max_descs; -} - static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) { /* We need to consider both queues that the net core sees as one */ @@ -333,125 +268,6 @@ static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, } #endif /* EFX_USE_PIO */ -static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, - dma_addr_t dma_addr, - size_t len) -{ - const struct efx_nic_type *nic_type = tx_queue->efx->type; - struct efx_tx_buffer *buffer; - unsigned int dma_len; - - /* Map the fragment taking account of NIC-dependent DMA limits. */ - do { - buffer = efx_tx_queue_get_insert_buffer(tx_queue); - dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); - - buffer->len = dma_len; - buffer->dma_addr = dma_addr; - buffer->flags = EFX_TX_BUF_CONT; - len -= dma_len; - dma_addr += dma_len; - ++tx_queue->insert_count; - } while (len); - - return buffer; -} - -/* Map all data from an SKB for DMA and create descriptors on the queue. - */ -static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, - unsigned int segment_count) -{ - struct efx_nic *efx = tx_queue->efx; - struct device *dma_dev = &efx->pci_dev->dev; - unsigned int frag_index, nr_frags; - dma_addr_t dma_addr, unmap_addr; - unsigned short dma_flags; - size_t len, unmap_len; - - nr_frags = skb_shinfo(skb)->nr_frags; - frag_index = 0; - - /* Map header data. */ - len = skb_headlen(skb); - dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); - dma_flags = EFX_TX_BUF_MAP_SINGLE; - unmap_len = len; - unmap_addr = dma_addr; - - if (unlikely(dma_mapping_error(dma_dev, dma_addr))) - return -EIO; - - if (segment_count) { - /* For TSO we need to put the header in to a separate - * descriptor. Map this separately if necessary. - */ - size_t header_len = skb_transport_header(skb) - skb->data + - (tcp_hdr(skb)->doff << 2u); - - if (header_len != len) { - tx_queue->tso_long_headers++; - efx_tx_map_chunk(tx_queue, dma_addr, header_len); - len -= header_len; - dma_addr += header_len; - } - } - - /* Add descriptors for each fragment. */ - do { - struct efx_tx_buffer *buffer; - skb_frag_t *fragment; - - buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); - - /* The final descriptor for a fragment is responsible for - * unmapping the whole fragment. - */ - buffer->flags = EFX_TX_BUF_CONT | dma_flags; - buffer->unmap_len = unmap_len; - buffer->dma_offset = buffer->dma_addr - unmap_addr; - - if (frag_index >= nr_frags) { - /* Store SKB details with the final buffer for - * the completion. - */ - buffer->skb = skb; - buffer->flags = EFX_TX_BUF_SKB | dma_flags; - return 0; - } - - /* Move on to the next fragment. */ - fragment = &skb_shinfo(skb)->frags[frag_index++]; - len = skb_frag_size(fragment); - dma_addr = skb_frag_dma_map(dma_dev, fragment, - 0, len, DMA_TO_DEVICE); - dma_flags = 0; - unmap_len = len; - unmap_addr = dma_addr; - - if (unlikely(dma_mapping_error(dma_dev, dma_addr))) - return -EIO; - } while (1); -} - -/* Remove buffers put into a tx_queue for the current packet. - * None of the buffers must have an skb attached. - */ -static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, - unsigned int insert_count) -{ - struct efx_tx_buffer *buffer; - unsigned int bytes_compl = 0; - unsigned int pkts_compl = 0; - - /* Work backwards until we hit the original insert pointer value */ - while (tx_queue->insert_count != insert_count) { - --tx_queue->insert_count; - buffer = __efx_tx_queue_get_insert_buffer(tx_queue); - efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - } -} - /* * Fallback to software TSO. * @@ -473,12 +289,9 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, dev_consume_skb_any(skb); skb = segments; - while (skb) { - next = skb->next; - skb->next = NULL; - + skb_list_walk_safe(skb, skb, next) { + skb_mark_not_on_list(skb); efx_enqueue_skb(tx_queue, skb); - skb = next; } return 0; @@ -687,41 +500,6 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, return i; } -/* Remove packets from the TX queue - * - * This removes packets from the TX queue, up to and including the - * specified index. - */ -static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, - unsigned int index, - unsigned int *pkts_compl, - unsigned int *bytes_compl) -{ - struct efx_nic *efx = tx_queue->efx; - unsigned int stop_index, read_ptr; - - stop_index = (index + 1) & tx_queue->ptr_mask; - read_ptr = tx_queue->read_count & tx_queue->ptr_mask; - - while (read_ptr != stop_index) { - struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; - - if (!(buffer->flags & EFX_TX_BUF_OPTION) && - unlikely(buffer->len == 0)) { - netif_err(efx, tx_err, efx->net_dev, - "TX queue %d spurious TX completion id %x\n", - tx_queue->queue, read_ptr); - efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); - return; - } - - efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); - - ++tx_queue->read_count; - read_ptr = tx_queue->read_count & tx_queue->ptr_mask; - } -} - /* Initiate a packet transmission. We use one channel per CPU * (sharing when we have more CPUs than channels). On Falcon, the TX * completion events will be directed back to the CPU that transmitted @@ -834,173 +612,3 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, net_dev->num_tc = num_tc; return 0; } - -void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) -{ - unsigned fill_level; - struct efx_nic *efx = tx_queue->efx; - struct efx_tx_queue *txq2; - unsigned int pkts_compl = 0, bytes_compl = 0; - - EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); - - efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); - tx_queue->pkts_compl += pkts_compl; - tx_queue->bytes_compl += bytes_compl; - - if (pkts_compl > 1) - ++tx_queue->merge_events; - - /* See if we need to restart the netif queue. This memory - * barrier ensures that we write read_count (inside - * efx_dequeue_buffers()) before reading the queue status. - */ - smp_mb(); - if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && - likely(efx->port_enabled) && - likely(netif_device_present(efx->net_dev))) { - txq2 = efx_tx_queue_partner(tx_queue); - fill_level = max(tx_queue->insert_count - tx_queue->read_count, - txq2->insert_count - txq2->read_count); - if (fill_level <= efx->txq_wake_thresh) - netif_tx_wake_queue(tx_queue->core_txq); - } - - /* Check whether the hardware queue is now empty */ - if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { - tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); - if (tx_queue->read_count == tx_queue->old_write_count) { - smp_mb(); - tx_queue->empty_read_count = - tx_queue->read_count | EFX_EMPTY_COUNT_VALID; - } - } -} - -static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) -{ - return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER); -} - -int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) -{ - struct efx_nic *efx = tx_queue->efx; - unsigned int entries; - int rc; - - /* Create the smallest power-of-two aligned ring */ - entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); - EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); - tx_queue->ptr_mask = entries - 1; - - netif_dbg(efx, probe, efx->net_dev, - "creating TX queue %d size %#x mask %#x\n", - tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask); - - /* Allocate software ring */ - tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer), - GFP_KERNEL); - if (!tx_queue->buffer) - return -ENOMEM; - - tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), - sizeof(tx_queue->cb_page[0]), GFP_KERNEL); - if (!tx_queue->cb_page) { - rc = -ENOMEM; - goto fail1; - } - - /* Allocate hardware ring */ - rc = efx_nic_probe_tx(tx_queue); - if (rc) - goto fail2; - - return 0; - -fail2: - kfree(tx_queue->cb_page); - tx_queue->cb_page = NULL; -fail1: - kfree(tx_queue->buffer); - tx_queue->buffer = NULL; - return rc; -} - -void efx_init_tx_queue(struct efx_tx_queue *tx_queue) -{ - struct efx_nic *efx = tx_queue->efx; - - netif_dbg(efx, drv, efx->net_dev, - "initialising TX queue %d\n", tx_queue->queue); - - tx_queue->insert_count = 0; - tx_queue->write_count = 0; - tx_queue->packet_write_count = 0; - tx_queue->old_write_count = 0; - tx_queue->read_count = 0; - tx_queue->old_read_count = 0; - tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; - tx_queue->xmit_more_available = false; - tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && - tx_queue->channel == efx_ptp_channel(efx)); - tx_queue->completed_desc_ptr = tx_queue->ptr_mask; - tx_queue->completed_timestamp_major = 0; - tx_queue->completed_timestamp_minor = 0; - - tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); - - /* Set up default function pointers. These may get replaced by - * efx_nic_init_tx() based off NIC/queue capabilities. - */ - tx_queue->handle_tso = efx_enqueue_skb_tso; - - /* Set up TX descriptor ring */ - efx_nic_init_tx(tx_queue); - - tx_queue->initialised = true; -} - -void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) -{ - struct efx_tx_buffer *buffer; - - netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, - "shutting down TX queue %d\n", tx_queue->queue); - - if (!tx_queue->buffer) - return; - - /* Free any buffers left in the ring */ - while (tx_queue->read_count != tx_queue->write_count) { - unsigned int pkts_compl = 0, bytes_compl = 0; - buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; - efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - - ++tx_queue->read_count; - } - tx_queue->xmit_more_available = false; - netdev_tx_reset_queue(tx_queue->core_txq); -} - -void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) -{ - int i; - - if (!tx_queue->buffer) - return; - - netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, - "destroying TX queue %d\n", tx_queue->queue); - efx_nic_remove_tx(tx_queue); - - if (tx_queue->cb_page) { - for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) - efx_nic_free_buffer(tx_queue->efx, - &tx_queue->cb_page[i]); - kfree(tx_queue->cb_page); - tx_queue->cb_page = NULL; - } - - kfree(tx_queue->buffer); - tx_queue->buffer = NULL; -} diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c new file mode 100644 index 000000000000..b1571e9789d0 --- /dev/null +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include "efx.h" +#include "nic.h" +#include "tx_common.h" + +static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) +{ + return DIV_ROUND_UP(tx_queue->ptr_mask + 1, + PAGE_SIZE >> EFX_TX_CB_ORDER); +} + +int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned int entries; + int rc; + + /* Create the smallest power-of-two aligned ring */ + entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); + EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); + tx_queue->ptr_mask = entries - 1; + + netif_dbg(efx, probe, efx->net_dev, + "creating TX queue %d size %#x mask %#x\n", + tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask); + + /* Allocate software ring */ + tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer), + GFP_KERNEL); + if (!tx_queue->buffer) + return -ENOMEM; + + tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), + sizeof(tx_queue->cb_page[0]), GFP_KERNEL); + if (!tx_queue->cb_page) { + rc = -ENOMEM; + goto fail1; + } + + /* Allocate hardware ring */ + rc = efx_nic_probe_tx(tx_queue); + if (rc) + goto fail2; + + return 0; + +fail2: + kfree(tx_queue->cb_page); + tx_queue->cb_page = NULL; +fail1: + kfree(tx_queue->buffer); + tx_queue->buffer = NULL; + return rc; +} + +void efx_init_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + + netif_dbg(efx, drv, efx->net_dev, + "initialising TX queue %d\n", tx_queue->queue); + + tx_queue->insert_count = 0; + tx_queue->write_count = 0; + tx_queue->packet_write_count = 0; + tx_queue->old_write_count = 0; + tx_queue->read_count = 0; + tx_queue->old_read_count = 0; + tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; + tx_queue->xmit_more_available = false; + tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && + tx_queue->channel == efx_ptp_channel(efx)); + tx_queue->completed_desc_ptr = tx_queue->ptr_mask; + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; + + tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); + + /* Set up default function pointers. These may get replaced by + * efx_nic_init_tx() based off NIC/queue capabilities. + */ + tx_queue->handle_tso = efx_enqueue_skb_tso; + + /* Set up TX descriptor ring */ + efx_nic_init_tx(tx_queue); + + tx_queue->initialised = true; +} + +void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_tx_buffer *buffer; + + netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, + "shutting down TX queue %d\n", tx_queue->queue); + + if (!tx_queue->buffer) + return; + + /* Free any buffers left in the ring */ + while (tx_queue->read_count != tx_queue->write_count) { + unsigned int pkts_compl = 0, bytes_compl = 0; + + buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + + ++tx_queue->read_count; + } + tx_queue->xmit_more_available = false; + netdev_tx_reset_queue(tx_queue->core_txq); +} + +void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) +{ + int i; + + if (!tx_queue->buffer) + return; + + netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, + "destroying TX queue %d\n", tx_queue->queue); + efx_nic_remove_tx(tx_queue); + + if (tx_queue->cb_page) { + for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) + efx_nic_free_buffer(tx_queue->efx, + &tx_queue->cb_page[i]); + kfree(tx_queue->cb_page); + tx_queue->cb_page = NULL; + } + + kfree(tx_queue->buffer); + tx_queue->buffer = NULL; +} + +void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, + unsigned int *pkts_compl, + unsigned int *bytes_compl) +{ + if (buffer->unmap_len) { + struct device *dma_dev = &tx_queue->efx->pci_dev->dev; + dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; + + if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) + dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, + DMA_TO_DEVICE); + else + dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, + DMA_TO_DEVICE); + buffer->unmap_len = 0; + } + + if (buffer->flags & EFX_TX_BUF_SKB) { + struct sk_buff *skb = (struct sk_buff *)buffer->skb; + + EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); + (*pkts_compl)++; + (*bytes_compl) += skb->len; + if (tx_queue->timestamping && + (tx_queue->completed_timestamp_major || + tx_queue->completed_timestamp_minor)) { + struct skb_shared_hwtstamps hwtstamp; + + hwtstamp.hwtstamp = + efx_ptp_nic_to_kernel_time(tx_queue); + skb_tstamp_tx(skb, &hwtstamp); + + tx_queue->completed_timestamp_major = 0; + tx_queue->completed_timestamp_minor = 0; + } + dev_consume_skb_any((struct sk_buff *)buffer->skb); + netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, + "TX queue %d transmission id %x complete\n", + tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_XDP) { + xdp_return_frame_rx_napi(buffer->xdpf); + } + + buffer->len = 0; + buffer->flags = 0; +} + +/* Remove packets from the TX queue + * + * This removes packets from the TX queue, up to and including the + * specified index. + */ +static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, + unsigned int index, + unsigned int *pkts_compl, + unsigned int *bytes_compl) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned int stop_index, read_ptr; + + stop_index = (index + 1) & tx_queue->ptr_mask; + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + + while (read_ptr != stop_index) { + struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; + + if (!(buffer->flags & EFX_TX_BUF_OPTION) && + unlikely(buffer->len == 0)) { + netif_err(efx, tx_err, efx->net_dev, + "TX queue %d spurious TX completion id %x\n", + tx_queue->queue, read_ptr); + efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); + return; + } + + efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); + + ++tx_queue->read_count; + read_ptr = tx_queue->read_count & tx_queue->ptr_mask; + } +} + +void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) +{ + unsigned int fill_level, pkts_compl = 0, bytes_compl = 0; + struct efx_nic *efx = tx_queue->efx; + struct efx_tx_queue *txq2; + + EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); + + efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); + tx_queue->pkts_compl += pkts_compl; + tx_queue->bytes_compl += bytes_compl; + + if (pkts_compl > 1) + ++tx_queue->merge_events; + + /* See if we need to restart the netif queue. This memory + * barrier ensures that we write read_count (inside + * efx_dequeue_buffers()) before reading the queue status. + */ + smp_mb(); + if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && + likely(efx->port_enabled) && + likely(netif_device_present(efx->net_dev))) { + txq2 = efx_tx_queue_partner(tx_queue); + fill_level = max(tx_queue->insert_count - tx_queue->read_count, + txq2->insert_count - txq2->read_count); + if (fill_level <= efx->txq_wake_thresh) + netif_tx_wake_queue(tx_queue->core_txq); + } + + /* Check whether the hardware queue is now empty */ + if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { + tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); + if (tx_queue->read_count == tx_queue->old_write_count) { + smp_mb(); + tx_queue->empty_read_count = + tx_queue->read_count | EFX_EMPTY_COUNT_VALID; + } + } +} + +/* Remove buffers put into a tx_queue for the current packet. + * None of the buffers must have an skb attached. + */ +void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, + unsigned int insert_count) +{ + struct efx_tx_buffer *buffer; + unsigned int bytes_compl = 0; + unsigned int pkts_compl = 0; + + /* Work backwards until we hit the original insert pointer value */ + while (tx_queue->insert_count != insert_count) { + --tx_queue->insert_count; + buffer = __efx_tx_queue_get_insert_buffer(tx_queue); + efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); + } +} + +struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, size_t len) +{ + const struct efx_nic_type *nic_type = tx_queue->efx->type; + struct efx_tx_buffer *buffer; + unsigned int dma_len; + + /* Map the fragment taking account of NIC-dependent DMA limits. */ + do { + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); + + buffer->len = dma_len; + buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; + len -= dma_len; + dma_addr += dma_len; + ++tx_queue->insert_count; + } while (len); + + return buffer; +} + +/* Map all data from an SKB for DMA and create descriptors on the queue. */ +int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + unsigned int segment_count) +{ + struct efx_nic *efx = tx_queue->efx; + struct device *dma_dev = &efx->pci_dev->dev; + unsigned int frag_index, nr_frags; + dma_addr_t dma_addr, unmap_addr; + unsigned short dma_flags; + size_t len, unmap_len; + + nr_frags = skb_shinfo(skb)->nr_frags; + frag_index = 0; + + /* Map header data. */ + len = skb_headlen(skb); + dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); + dma_flags = EFX_TX_BUF_MAP_SINGLE; + unmap_len = len; + unmap_addr = dma_addr; + + if (unlikely(dma_mapping_error(dma_dev, dma_addr))) + return -EIO; + + if (segment_count) { + /* For TSO we need to put the header in to a separate + * descriptor. Map this separately if necessary. + */ + size_t header_len = skb_transport_header(skb) - skb->data + + (tcp_hdr(skb)->doff << 2u); + + if (header_len != len) { + tx_queue->tso_long_headers++; + efx_tx_map_chunk(tx_queue, dma_addr, header_len); + len -= header_len; + dma_addr += header_len; + } + } + + /* Add descriptors for each fragment. */ + do { + struct efx_tx_buffer *buffer; + skb_frag_t *fragment; + + buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); + + /* The final descriptor for a fragment is responsible for + * unmapping the whole fragment. + */ + buffer->flags = EFX_TX_BUF_CONT | dma_flags; + buffer->unmap_len = unmap_len; + buffer->dma_offset = buffer->dma_addr - unmap_addr; + + if (frag_index >= nr_frags) { + /* Store SKB details with the final buffer for + * the completion. + */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; + return 0; + } + + /* Move on to the next fragment. */ + fragment = &skb_shinfo(skb)->frags[frag_index++]; + len = skb_frag_size(fragment); + dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, + DMA_TO_DEVICE); + dma_flags = 0; + unmap_len = len; + unmap_addr = dma_addr; + + if (unlikely(dma_mapping_error(dma_dev, dma_addr))) + return -EIO; + } while (1); +} + +unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) +{ + /* Header and payload descriptor for each output segment, plus + * one for every input fragment boundary within a segment + */ + unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; + + /* Possibly one more per segment for option descriptors */ + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) + max_descs += EFX_TSO_MAX_SEGS; + + /* Possibly more for PCIe page boundaries within input fragments */ + if (PAGE_SIZE > EFX_PAGE_SIZE) + max_descs += max_t(unsigned int, MAX_SKB_FRAGS, + DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE)); + + return max_descs; +} diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h new file mode 100644 index 000000000000..f92f1fe3a87f --- /dev/null +++ b/drivers/net/ethernet/sfc/tx_common.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2018 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_TX_COMMON_H +#define EFX_TX_COMMON_H + +int efx_probe_tx_queue(struct efx_tx_queue *tx_queue); +void efx_init_tx_queue(struct efx_tx_queue *tx_queue); +void efx_fini_tx_queue(struct efx_tx_queue *tx_queue); +void efx_remove_tx_queue(struct efx_tx_queue *tx_queue); + +void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, + unsigned int *pkts_compl, + unsigned int *bytes_compl); + +void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); + +void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, + unsigned int insert_count); + +struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, size_t len); +int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + unsigned int segment_count); + +unsigned int efx_tx_max_skb_descs(struct efx_nic *efx); + +#endif |