summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/sfc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/sfc')
-rw-r--r--drivers/net/ethernet/sfc/Kconfig1
-rw-r--r--drivers/net/ethernet/sfc/Makefile3
-rw-r--r--drivers/net/ethernet/sfc/ef100_netdev.c85
-rw-r--r--drivers/net/ethernet/sfc/efx.c9
-rw-r--r--drivers/net/ethernet/sfc/efx_devlink.c80
-rw-r--r--drivers/net/ethernet/sfc/mae.c141
-rw-r--r--drivers/net/ethernet/sfc/mae.h9
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h3
-rw-r--r--drivers/net/ethernet/sfc/siena/tx_common.c1
-rw-r--r--drivers/net/ethernet/sfc/tc.c387
-rw-r--r--drivers/net/ethernet/sfc/tc.h50
-rw-r--r--drivers/net/ethernet/sfc/tc_bindings.c13
-rw-r--r--drivers/net/ethernet/sfc/tc_bindings.h14
-rw-r--r--drivers/net/ethernet/sfc/tc_encap_actions.c747
-rw-r--r--drivers/net/ethernet/sfc/tc_encap_actions.h114
-rw-r--r--drivers/net/ethernet/sfc/tx_common.c1
16 files changed, 1542 insertions, 116 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 4af36ba8906b..3eb55dcfa8a6 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -50,6 +50,7 @@ config SFC_MCDI_MON
config SFC_SRIOV
bool "Solarflare SFC9100-family SR-IOV support"
depends on SFC && PCI_IOV
+ depends on INET
default y
help
This enables support for the Single Root I/O Virtualization
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index 55b9c73cd8ef..16293b58e0a8 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -10,7 +10,8 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \
efx_devlink.o
sfc-$(CONFIG_SFC_MTD) += mtd.o
sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \
- mae.o tc.o tc_bindings.o tc_counters.o
+ mae.o tc.o tc_bindings.o tc_counters.o \
+ tc_encap_actions.o
obj-$(CONFIG_SFC) += sfc.o
diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c
index be395cd8770b..7f7d560cb2b4 100644
--- a/drivers/net/ethernet/sfc/ef100_netdev.c
+++ b/drivers/net/ethernet/sfc/ef100_netdev.c
@@ -24,6 +24,7 @@
#include "rx_common.h"
#include "ef100_sriov.h"
#include "tc_bindings.h"
+#include "tc_encap_actions.h"
#include "efx_devlink.h"
static void ef100_update_name(struct efx_nic *efx)
@@ -40,19 +41,26 @@ static int ef100_alloc_vis(struct efx_nic *efx, unsigned int *allocated_vis)
unsigned int tx_vis = efx->n_tx_channels + efx->n_extra_tx_channels;
unsigned int rx_vis = efx->n_rx_channels;
unsigned int min_vis, max_vis;
+ int rc;
EFX_WARN_ON_PARANOID(efx->tx_queues_per_channel != 1);
tx_vis += efx->n_xdp_channels * efx->xdp_tx_per_channel;
max_vis = max(rx_vis, tx_vis);
- /* Currently don't handle resource starvation and only accept
- * our maximum needs and no less.
+ /* We require at least a single complete TX channel worth of queues. */
+ min_vis = efx->tx_queues_per_channel;
+
+ rc = efx_mcdi_alloc_vis(efx, min_vis, max_vis,
+ NULL, allocated_vis);
+
+ /* We retry allocating VIs by reallocating channels when we have not
+ * been able to allocate the maximum VIs.
*/
- min_vis = max_vis;
+ if (!rc && *allocated_vis < max_vis)
+ rc = -EAGAIN;
- return efx_mcdi_alloc_vis(efx, min_vis, max_vis,
- NULL, allocated_vis);
+ return rc;
}
static int ef100_remap_bar(struct efx_nic *efx, int max_vis)
@@ -133,9 +141,41 @@ static int ef100_net_open(struct net_device *net_dev)
goto fail;
rc = ef100_alloc_vis(efx, &allocated_vis);
- if (rc)
+ if (rc && rc != -EAGAIN)
goto fail;
+ /* Try one more time but with the maximum number of channels
+ * equal to the allocated VIs, which would more likely succeed.
+ */
+ if (rc == -EAGAIN) {
+ rc = efx_mcdi_free_vis(efx);
+ if (rc)
+ goto fail;
+
+ efx_remove_interrupts(efx);
+ efx->max_channels = allocated_vis;
+
+ rc = efx_probe_interrupts(efx);
+ if (rc)
+ goto fail;
+
+ rc = efx_set_channels(efx);
+ if (rc)
+ goto fail;
+
+ rc = ef100_alloc_vis(efx, &allocated_vis);
+ if (rc && rc != -EAGAIN)
+ goto fail;
+
+ /* It should be very unlikely that we failed here again, but in
+ * such a case we return ENOSPC.
+ */
+ if (rc == -EAGAIN) {
+ rc = -ENOSPC;
+ goto fail;
+ }
+ }
+
rc = efx_probe_channels(efx);
if (rc)
return rc;
@@ -261,14 +301,38 @@ int ef100_netdev_event(struct notifier_block *this,
{
struct efx_nic *efx = container_of(this, struct efx_nic, netdev_notifier);
struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
+ struct ef100_nic_data *nic_data = efx->nic_data;
+ int err;
if (efx->net_dev == net_dev &&
(event == NETDEV_CHANGENAME || event == NETDEV_REGISTER))
ef100_update_name(efx);
+ if (!nic_data->grp_mae)
+ return NOTIFY_DONE;
+ err = efx_tc_netdev_event(efx, event, net_dev);
+ if (err & NOTIFY_STOP_MASK)
+ return err;
+
return NOTIFY_DONE;
}
+static int ef100_netevent_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct efx_nic *efx = container_of(this, struct efx_nic, netevent_notifier);
+ struct ef100_nic_data *nic_data = efx->nic_data;
+ int err;
+
+ if (!nic_data->grp_mae)
+ return NOTIFY_DONE;
+ err = efx_tc_netevent_event(efx, event, ptr);
+ if (err & NOTIFY_STOP_MASK)
+ return err;
+
+ return NOTIFY_DONE;
+};
+
static int ef100_register_netdev(struct efx_nic *efx)
{
struct net_device *net_dev = efx->net_dev;
@@ -328,6 +392,7 @@ void ef100_remove_netdev(struct efx_probe_data *probe_data)
rtnl_unlock();
unregister_netdevice_notifier(&efx->netdev_notifier);
+ unregister_netevent_notifier(&efx->netevent_notifier);
#if defined(CONFIG_SFC_SRIOV)
if (!efx->type->is_vf)
efx_ef100_pci_sriov_disable(efx, true);
@@ -448,6 +513,14 @@ int ef100_probe_netdev(struct efx_probe_data *probe_data)
goto fail;
}
+ efx->netevent_notifier.notifier_call = ef100_netevent_event;
+ rc = register_netevent_notifier(&efx->netevent_notifier);
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "Failed to register netevent notifier, rc=%d\n", rc);
+ goto fail;
+ }
+
efx_probe_devlink_unlock(efx);
return rc;
fail:
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index a4f22d8e6ac7..d670a319b379 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -32,6 +32,7 @@
#include "io.h"
#include "selftest.h"
#include "sriov.h"
+#include "efx_devlink.h"
#include "mcdi_port_common.h"
#include "mcdi_pcol.h"
@@ -877,6 +878,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
if (efx->type->sriov_fini)
efx->type->sriov_fini(efx);
+ efx_fini_devlink_lock(efx);
efx_unregister_netdev(efx);
efx_mtd_remove(efx);
@@ -886,6 +888,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
efx_fini_io(efx);
pci_dbg(efx->pci_dev, "shutdown successful\n");
+ efx_fini_devlink_and_unlock(efx);
efx_fini_struct(efx);
free_netdev(efx->net_dev);
probe_data = container_of(efx, struct efx_probe_data, efx);
@@ -1025,7 +1028,13 @@ static int efx_pci_probe_post_io(struct efx_nic *efx)
NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_NDO_XMIT;
+ /* devlink creation, registration and lock */
+ rc = efx_probe_devlink_and_lock(efx);
+ if (rc)
+ pci_err(efx->pci_dev, "devlink registration failed");
+
rc = efx_register_netdev(efx);
+ efx_probe_devlink_unlock(efx);
if (!rc)
return 0;
diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c
index ef9971cbb695..b82dad50a5b1 100644
--- a/drivers/net/ethernet/sfc/efx_devlink.c
+++ b/drivers/net/ethernet/sfc/efx_devlink.c
@@ -25,40 +25,6 @@ struct efx_devlink {
};
#ifdef CONFIG_SFC_SRIOV
-static void efx_devlink_del_port(struct devlink_port *dl_port)
-{
- if (!dl_port)
- return;
- devl_port_unregister(dl_port);
-}
-
-static int efx_devlink_add_port(struct efx_nic *efx,
- struct mae_mport_desc *mport)
-{
- bool external = false;
-
- if (!ef100_mport_on_local_intf(efx, mport))
- external = true;
-
- switch (mport->mport_type) {
- case MAE_MPORT_DESC_MPORT_TYPE_VNIC:
- if (mport->vf_idx != MAE_MPORT_DESC_VF_IDX_NULL)
- devlink_port_attrs_pci_vf_set(&mport->dl_port, 0, mport->pf_idx,
- mport->vf_idx,
- external);
- else
- devlink_port_attrs_pci_pf_set(&mport->dl_port, 0, mport->pf_idx,
- external);
- break;
- default:
- /* MAE_MPORT_DESC_MPORT_ALIAS and UNDEFINED */
- return 0;
- }
-
- mport->dl_port.index = mport->mport_id;
-
- return devl_port_register(efx->devlink, &mport->dl_port, mport->mport_id);
-}
static int efx_devlink_port_addr_get(struct devlink_port *port, u8 *hw_addr,
int *hw_addr_len,
@@ -158,6 +124,48 @@ static int efx_devlink_port_addr_set(struct devlink_port *port,
return rc;
}
+static const struct devlink_port_ops sfc_devlink_port_ops = {
+ .port_fn_hw_addr_get = efx_devlink_port_addr_get,
+ .port_fn_hw_addr_set = efx_devlink_port_addr_set,
+};
+
+static void efx_devlink_del_port(struct devlink_port *dl_port)
+{
+ if (!dl_port)
+ return;
+ devl_port_unregister(dl_port);
+}
+
+static int efx_devlink_add_port(struct efx_nic *efx,
+ struct mae_mport_desc *mport)
+{
+ bool external = false;
+
+ if (!ef100_mport_on_local_intf(efx, mport))
+ external = true;
+
+ switch (mport->mport_type) {
+ case MAE_MPORT_DESC_MPORT_TYPE_VNIC:
+ if (mport->vf_idx != MAE_MPORT_DESC_VF_IDX_NULL)
+ devlink_port_attrs_pci_vf_set(&mport->dl_port, 0, mport->pf_idx,
+ mport->vf_idx,
+ external);
+ else
+ devlink_port_attrs_pci_pf_set(&mport->dl_port, 0, mport->pf_idx,
+ external);
+ break;
+ default:
+ /* MAE_MPORT_DESC_MPORT_ALIAS and UNDEFINED */
+ return 0;
+ }
+
+ mport->dl_port.index = mport->mport_id;
+
+ return devl_port_register_with_ops(efx->devlink, &mport->dl_port,
+ mport->mport_id,
+ &sfc_devlink_port_ops);
+}
+
#endif
static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
@@ -609,10 +617,6 @@ static int efx_devlink_info_get(struct devlink *devlink,
static const struct devlink_ops sfc_devlink_ops = {
.info_get = efx_devlink_info_get,
-#ifdef CONFIG_SFC_SRIOV
- .port_function_hw_addr_get = efx_devlink_port_addr_get,
- .port_function_hw_addr_set = efx_devlink_port_addr_set,
-#endif
};
#ifdef CONFIG_SFC_SRIOV
diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c
index 49706a7b94bf..0cab508f2f9d 100644
--- a/drivers/net/ethernet/sfc/mae.c
+++ b/drivers/net/ethernet/sfc/mae.c
@@ -15,6 +15,7 @@
#include "mcdi.h"
#include "mcdi_pcol.h"
#include "mcdi_pcol_mae.h"
+#include "tc_encap_actions.h"
int efx_mae_allocate_mport(struct efx_nic *efx, u32 *id, u32 *label)
{
@@ -482,12 +483,14 @@ int efx_mae_match_check_caps(struct efx_nic *efx,
rc; \
})
/* Checks that the fields needed for encap-rule matches are supported by the
- * MAE. All the fields are exact-match.
+ * MAE. All the fields are exact-match, except possibly ENC_IP_TOS.
*/
int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
+ u8 ip_tos_mask, __be16 udp_sport_mask,
struct netlink_ext_ack *extack)
{
u8 *supported_fields = efx->tc->caps->outer_rule_fields;
+ enum mask_type typ;
int rc;
if (CHECK(ENC_ETHER_TYPE))
@@ -504,6 +507,22 @@ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
if (CHECK(ENC_L4_DPORT) ||
CHECK(ENC_IP_PROTO))
return rc;
+ typ = classify_mask((const u8 *)&udp_sport_mask, sizeof(udp_sport_mask));
+ rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_L4_SPORT],
+ typ);
+ if (rc) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s",
+ mask_type_name(typ), "enc_src_port");
+ return rc;
+ }
+ typ = classify_mask(&ip_tos_mask, sizeof(ip_tos_mask));
+ rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_IP_TOS],
+ typ);
+ if (rc) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s",
+ mask_type_name(typ), "enc_ip_tos");
+ return rc;
+ }
return 0;
}
#undef CHECK
@@ -592,6 +611,87 @@ static int efx_mae_encap_type_to_mae_type(enum efx_encap_type type)
}
}
+int efx_mae_allocate_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LEN(EFX_TC_MAX_ENCAP_HDR));
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_LEN);
+ size_t inlen, outlen;
+ int rc;
+
+ rc = efx_mae_encap_type_to_mae_type(encap->type);
+ if (rc < 0)
+ return rc;
+ MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_ALLOC_IN_ENCAP_TYPE, rc);
+ inlen = MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LEN(encap->encap_hdr_len);
+ if (WARN_ON(inlen > sizeof(inbuf))) /* can't happen */
+ return -EINVAL;
+ memcpy(MCDI_PTR(inbuf, MAE_ENCAP_HEADER_ALLOC_IN_HDR_DATA),
+ encap->encap_hdr,
+ encap->encap_hdr_len);
+ rc = efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_ALLOC, inbuf,
+ inlen, outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < sizeof(outbuf))
+ return -EIO;
+ encap->fw_id = MCDI_DWORD(outbuf, MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID);
+ return 0;
+}
+
+int efx_mae_update_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LEN(EFX_TC_MAX_ENCAP_HDR));
+ size_t inlen;
+ int rc;
+
+ rc = efx_mae_encap_type_to_mae_type(encap->type);
+ if (rc < 0)
+ return rc;
+ MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_ENCAP_TYPE, rc);
+ MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_EH_ID,
+ encap->fw_id);
+ inlen = MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LEN(encap->encap_hdr_len);
+ if (WARN_ON(inlen > sizeof(inbuf))) /* can't happen */
+ return -EINVAL;
+ memcpy(MCDI_PTR(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_HDR_DATA),
+ encap->encap_hdr,
+ encap->encap_hdr_len);
+
+ BUILD_BUG_ON(MC_CMD_MAE_ENCAP_HEADER_UPDATE_OUT_LEN != 0);
+ return efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_UPDATE, inbuf,
+ inlen, NULL, 0, NULL);
+}
+
+int efx_mae_free_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ENCAP_HEADER_FREE_OUT_LEN(1));
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_FREE_IN_LEN(1));
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_FREE_IN_EH_ID, encap->fw_id);
+ rc = efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_FREE, inbuf,
+ sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < sizeof(outbuf))
+ return -EIO;
+ /* FW freed a different ID than we asked for, should also never happen.
+ * Warn because it means we've now got a different idea to the FW of
+ * what encap_mds exist, which could cause mayhem later.
+ */
+ if (WARN_ON(MCDI_DWORD(outbuf, MAE_ENCAP_HEADER_FREE_OUT_FREED_EH_ID) != encap->fw_id))
+ return -EIO;
+ /* We're probably about to free @encap, but let's just make sure its
+ * fw_id is blatted so that it won't look valid if it leaks out.
+ */
+ encap->fw_id = MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL;
+ return 0;
+}
+
int efx_mae_lookup_mport(struct efx_nic *efx, u32 vf_idx, u32 *id)
{
struct ef100_nic_data *nic_data = efx->nic_data;
@@ -815,8 +915,12 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN1_PROTO_BE,
act->vlan_proto[1]);
}
- MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID,
- MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL);
+ if (act->encap_md)
+ MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID,
+ act->encap_md->fw_id);
+ else
+ MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID,
+ MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL);
if (act->deliver)
MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DELIVER,
act->dest_mport);
@@ -1001,8 +1105,16 @@ int efx_mae_register_encap_match(struct efx_nic *efx,
encap->udp_dport);
MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK,
~(__be16)0);
+ MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE,
+ encap->udp_sport);
+ MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK,
+ encap->udp_sport_mask);
MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO, IPPROTO_UDP);
MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO_MASK, ~0);
+ MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS,
+ encap->ip_tos);
+ MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS_MASK,
+ encap->ip_tos_mask);
rc = efx_mcdi_rpc(efx, MC_CMD_MAE_OUTER_RULE_INSERT, inbuf,
sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
if (rc)
@@ -1203,6 +1315,29 @@ int efx_mae_insert_rule(struct efx_nic *efx, const struct efx_tc_match *match,
return 0;
}
+int efx_mae_update_rule(struct efx_nic *efx, u32 acts_id, u32 id)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ACTION_RULE_UPDATE_IN_LEN);
+ MCDI_DECLARE_STRUCT_PTR(response);
+
+ BUILD_BUG_ON(MC_CMD_MAE_ACTION_RULE_UPDATE_OUT_LEN);
+ response = _MCDI_DWORD(inbuf, MAE_ACTION_RULE_UPDATE_IN_RESPONSE);
+
+ MCDI_SET_DWORD(inbuf, MAE_ACTION_RULE_UPDATE_IN_AR_ID, id);
+ if (efx_mae_asl_id(acts_id)) {
+ MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_ASL_ID, acts_id);
+ MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_AS_ID,
+ MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL);
+ } else {
+ /* We only had one AS, so we didn't wrap it in an ASL */
+ MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_ASL_ID,
+ MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
+ MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_AS_ID, acts_id);
+ }
+ return efx_mcdi_rpc(efx, MC_CMD_MAE_ACTION_RULE_UPDATE, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+}
+
int efx_mae_delete_rule(struct efx_nic *efx, u32 id)
{
MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ACTION_RULE_DELETE_OUT_LEN(1));
diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h
index 9226219491a0..24abfe509690 100644
--- a/drivers/net/ethernet/sfc/mae.h
+++ b/drivers/net/ethernet/sfc/mae.h
@@ -82,6 +82,7 @@ int efx_mae_match_check_caps(struct efx_nic *efx,
const struct efx_tc_match_fields *mask,
struct netlink_ext_ack *extack);
int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
+ u8 ip_tos_mask, __be16 udp_sport_mask,
struct netlink_ext_ack *extack);
int efx_mae_check_encap_type_supported(struct efx_nic *efx,
enum efx_encap_type typ);
@@ -89,6 +90,13 @@ int efx_mae_check_encap_type_supported(struct efx_nic *efx,
int efx_mae_allocate_counter(struct efx_nic *efx, struct efx_tc_counter *cnt);
int efx_mae_free_counter(struct efx_nic *efx, struct efx_tc_counter *cnt);
+int efx_mae_allocate_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap);
+int efx_mae_update_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap);
+int efx_mae_free_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap);
+
int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act);
int efx_mae_free_action_set(struct efx_nic *efx, u32 fw_id);
@@ -104,6 +112,7 @@ int efx_mae_unregister_encap_match(struct efx_nic *efx,
int efx_mae_insert_rule(struct efx_nic *efx, const struct efx_tc_match *match,
u32 prio, u32 acts_id, u32 *id);
+int efx_mae_update_rule(struct efx_nic *efx, u32 acts_id, u32 id);
int efx_mae_delete_rule(struct efx_nic *efx, u32 id);
int efx_init_mae(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index fcd51d3992fa..a7a22b019794 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -27,6 +27,7 @@
#include <linux/mtd/mtd.h>
#include <net/busy_poll.h>
#include <net/xdp.h>
+#include <net/netevent.h>
#include "enum.h"
#include "bitfield.h"
@@ -996,6 +997,7 @@ struct efx_mae;
* @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
* xdp_rxq_info structures?
* @netdev_notifier: Netdevice notifier.
+ * @netevent_notifier: Netevent notifier (for neighbour updates).
* @tc: state for TC offload (EF100).
* @devlink: reference to devlink structure owned by this device
* @dl_port: devlink port associated with the PF
@@ -1183,6 +1185,7 @@ struct efx_nic {
bool xdp_rxq_info_failed;
struct notifier_block netdev_notifier;
+ struct notifier_block netevent_notifier;
struct efx_tc_state *tc;
struct devlink *devlink;
diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c
index 93a32d61944f..a7a9ab304e13 100644
--- a/drivers/net/ethernet/sfc/siena/tx_common.c
+++ b/drivers/net/ethernet/sfc/siena/tx_common.c
@@ -12,6 +12,7 @@
#include "efx.h"
#include "nic_common.h"
#include "tx_common.h"
+#include <net/gso.h>
static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
{
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index c004443c1d58..77acdb60381e 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -14,11 +14,12 @@
#include <net/geneve.h>
#include "tc.h"
#include "tc_bindings.h"
+#include "tc_encap_actions.h"
#include "mae.h"
#include "ef100_rep.h"
#include "efx.h"
-static enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
+enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
{
if (netif_is_vxlan(net_dev))
return EFX_ENCAP_TYPE_VXLAN;
@@ -33,8 +34,8 @@ static enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
* May return NULL for the PF (us), or an error pointer for a device that
* isn't supported as a TC offload endpoint
*/
-static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
- struct net_device *dev)
+struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
+ struct net_device *dev)
{
struct efx_rep *efv;
@@ -70,7 +71,7 @@ static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv
}
/* Convert a driver-internal vport ID into an external device (wire or VF) */
-static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
+s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
{
u32 mport;
@@ -111,6 +112,10 @@ static void efx_tc_free_action_set(struct efx_nic *efx,
}
if (act->count)
efx_tc_flower_put_counter_index(efx, act->count);
+ if (act->encap_md) {
+ list_del(&act->encap_user);
+ efx_tc_flower_release_encap_md(efx, act->encap_md);
+ }
kfree(act);
}
@@ -132,23 +137,6 @@ static void efx_tc_free_action_set_list(struct efx_nic *efx,
/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
}
-static void efx_tc_flow_free(void *ptr, void *arg)
-{
- struct efx_tc_flow_rule *rule = ptr;
- struct efx_nic *efx = arg;
-
- netif_err(efx, drv, efx->net_dev,
- "tc rule %lx still present at teardown, removing\n",
- rule->cookie);
-
- efx_mae_delete_rule(efx, rule->fw_id);
-
- /* Release entries in subsidiary tables */
- efx_tc_free_action_set_list(efx, &rule->acts, true);
-
- kfree(rule);
-}
-
/* Boilerplate for the simple 'copy a field' cases */
#define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) { \
@@ -219,6 +207,7 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_TCP) |
@@ -363,20 +352,48 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
return 0;
}
+static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
+ struct efx_tc_encap_match *encap)
+{
+ int rc;
+
+ if (!refcount_dec_and_test(&encap->ref))
+ return; /* still in use */
+
+ if (encap->type == EFX_TC_EM_DIRECT) {
+ rc = efx_mae_unregister_encap_match(efx, encap);
+ if (rc)
+ /* Display message but carry on and remove entry from our
+ * SW tables, because there's not much we can do about it.
+ */
+ netif_err(efx, drv, efx->net_dev,
+ "Failed to release encap match %#x, rc %d\n",
+ encap->fw_id, rc);
+ }
+ rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
+ efx_tc_encap_match_ht_params);
+ if (encap->pseudo)
+ efx_tc_flower_release_encap_match(efx, encap->pseudo);
+ kfree(encap);
+}
+
static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
struct efx_tc_match *match,
enum efx_encap_type type,
+ enum efx_tc_em_pseudo_type em_type,
+ u8 child_ip_tos_mask,
+ __be16 child_udp_sport_mask,
struct netlink_ext_ack *extack)
{
- struct efx_tc_encap_match *encap, *old;
+ struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
bool ipv6 = false;
int rc;
/* We require that the socket-defining fields (IP addrs and UDP dest
- * port) are present and exact-match. Other fields are currently not
- * allowed. This meets what OVS will ask for, and means that we don't
- * need to handle difficult checks for overlapping matches as could
- * come up if we allowed masks or varying sets of match fields.
+ * port) are present and exact-match. Other fields may only be used
+ * if the field-set (and any masks) are the same for all encap
+ * matches on the same <sip,dip,dport> tuple; this is enforced by
+ * pseudo encap matches.
*/
if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
@@ -414,29 +431,42 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
return -EOPNOTSUPP;
}
- if (match->mask.enc_sport) {
- NL_SET_ERR_MSG_MOD(extack, "Egress encap match on src UDP port not supported");
- return -EOPNOTSUPP;
- }
- if (match->mask.enc_ip_tos) {
- NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP ToS not supported");
- return -EOPNOTSUPP;
+ if (match->mask.enc_sport || match->mask.enc_ip_tos) {
+ struct efx_tc_match pmatch = *match;
+
+ if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
+ NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
+ return -EOPNOTSUPP;
+ }
+ pmatch.value.enc_ip_tos = 0;
+ pmatch.mask.enc_ip_tos = 0;
+ pmatch.value.enc_sport = 0;
+ pmatch.mask.enc_sport = 0;
+ rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
+ EFX_TC_EM_PSEUDO_MASK,
+ match->mask.enc_ip_tos,
+ match->mask.enc_sport,
+ extack);
+ if (rc)
+ return rc;
+ pseudo = pmatch.encap;
}
if (match->mask.enc_ip_ttl) {
NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
- return -EOPNOTSUPP;
+ rc = -EOPNOTSUPP;
+ goto fail_pseudo;
}
- rc = efx_mae_check_encap_match_caps(efx, ipv6, extack);
- if (rc) {
- NL_SET_ERR_MSG_FMT_MOD(extack, "MAE hw reports no support for IPv%d encap matches",
- ipv6 ? 6 : 4);
- return -EOPNOTSUPP;
- }
+ rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
+ match->mask.enc_sport, extack);
+ if (rc)
+ goto fail_pseudo;
encap = kzalloc(sizeof(*encap), GFP_USER);
- if (!encap)
- return -ENOMEM;
+ if (!encap) {
+ rc = -ENOMEM;
+ goto fail_pseudo;
+ }
encap->src_ip = match->value.enc_src_ip;
encap->dst_ip = match->value.enc_dst_ip;
#ifdef CONFIG_IPV6
@@ -445,12 +475,66 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
#endif
encap->udp_dport = match->value.enc_dport;
encap->tun_type = type;
+ encap->ip_tos = match->value.enc_ip_tos;
+ encap->ip_tos_mask = match->mask.enc_ip_tos;
+ encap->child_ip_tos_mask = child_ip_tos_mask;
+ encap->udp_sport = match->value.enc_sport;
+ encap->udp_sport_mask = match->mask.enc_sport;
+ encap->child_udp_sport_mask = child_udp_sport_mask;
+ encap->type = em_type;
+ encap->pseudo = pseudo;
old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
&encap->linkage,
efx_tc_encap_match_ht_params);
if (old) {
/* don't need our new entry */
kfree(encap);
+ if (pseudo) /* don't need our new pseudo either */
+ efx_tc_flower_release_encap_match(efx, pseudo);
+ /* check old and new em_types are compatible */
+ switch (old->type) {
+ case EFX_TC_EM_DIRECT:
+ /* old EM is in hardware, so mustn't overlap with a
+ * pseudo, but may be shared with another direct EM
+ */
+ if (em_type == EFX_TC_EM_DIRECT)
+ break;
+ NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
+ return -EEXIST;
+ case EFX_TC_EM_PSEUDO_MASK:
+ /* old EM is protecting a ToS- or src port-qualified
+ * filter, so may only be shared with another pseudo
+ * for the same ToS and src port masks.
+ */
+ if (em_type != EFX_TC_EM_PSEUDO_MASK) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "%s encap match conflicts with existing pseudo(MASK) entry",
+ em_type ? "Pseudo" : "Direct");
+ return -EEXIST;
+ }
+ if (child_ip_tos_mask != old->child_ip_tos_mask) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x",
+ child_ip_tos_mask,
+ old->child_ip_tos_mask);
+ return -EEXIST;
+ }
+ if (child_udp_sport_mask != old->child_udp_sport_mask) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x",
+ child_udp_sport_mask,
+ old->child_udp_sport_mask);
+ return -EEXIST;
+ }
+ break;
+ default: /* Unrecognised pseudo-type. Just say no */
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "%s encap match conflicts with existing pseudo(%d) entry",
+ em_type ? "Pseudo" : "Direct",
+ old->type);
+ return -EEXIST;
+ }
+ /* check old and new tun_types are compatible */
if (old->tun_type != type) {
NL_SET_ERR_MSG_FMT_MOD(extack,
"Egress encap match with conflicting tun_type %u != %u",
@@ -462,10 +546,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
/* existing entry found */
encap = old;
} else {
- rc = efx_mae_register_encap_match(efx, encap);
- if (rc) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
- goto fail;
+ if (em_type == EFX_TC_EM_DIRECT) {
+ rc = efx_mae_register_encap_match(efx, encap);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
+ goto fail;
+ }
}
refcount_set(&encap->ref, 1);
}
@@ -475,30 +561,12 @@ fail:
rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
efx_tc_encap_match_ht_params);
kfree(encap);
+fail_pseudo:
+ if (pseudo)
+ efx_tc_flower_release_encap_match(efx, pseudo);
return rc;
}
-static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
- struct efx_tc_encap_match *encap)
-{
- int rc;
-
- if (!refcount_dec_and_test(&encap->ref))
- return; /* still in use */
-
- rc = efx_mae_unregister_encap_match(efx, encap);
- if (rc)
- /* Display message but carry on and remove entry from our
- * SW tables, because there's not much we can do about it.
- */
- netif_err(efx, drv, efx->net_dev,
- "Failed to release encap match %#x, rc %d\n",
- encap->fw_id, rc);
- rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
- efx_tc_encap_match_ht_params);
- kfree(encap);
-}
-
static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
{
efx_mae_delete_rule(efx, rule->fw_id);
@@ -531,6 +599,7 @@ enum efx_tc_action_order {
EFX_TC_AO_VLAN_POP,
EFX_TC_AO_VLAN_PUSH,
EFX_TC_AO_COUNT,
+ EFX_TC_AO_ENCAP,
EFX_TC_AO_DELIVER
};
/* Determine whether we can add @new action without violating order */
@@ -560,6 +629,10 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
if (act->count)
return false;
fallthrough;
+ case EFX_TC_AO_ENCAP:
+ if (act->encap_md)
+ return false;
+ fallthrough;
case EFX_TC_AO_DELIVER:
return !act->deliver;
default:
@@ -650,6 +723,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
}
rc = efx_tc_flower_record_encap_match(efx, &match, type,
+ EFX_TC_EM_DIRECT, 0, 0,
extack);
if (rc)
return rc;
@@ -854,11 +928,13 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
{
struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
struct netlink_ext_ack *extack = tc->common.extack;
+ const struct ip_tunnel_info *encap_info = NULL;
struct efx_tc_flow_rule *rule = NULL, *old;
struct efx_tc_action_set *act = NULL;
const struct flow_action_entry *fa;
struct efx_rep *from_efv, *to_efv;
struct efx_tc_match match;
+ u32 acts_id;
s64 rc;
int i;
@@ -1023,6 +1099,48 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
case FLOW_ACTION_MIRRED:
save = *act;
+ if (encap_info) {
+ struct efx_tc_encap_action *encap;
+
+ if (!efx_tc_flower_action_order_ok(act,
+ EFX_TC_AO_ENCAP)) {
+ rc = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order");
+ goto release;
+ }
+ encap = efx_tc_flower_create_encap_md(
+ efx, encap_info, fa->dev, extack);
+ if (IS_ERR_OR_NULL(encap)) {
+ rc = PTR_ERR(encap);
+ if (!rc)
+ rc = -EIO; /* arbitrary */
+ goto release;
+ }
+ act->encap_md = encap;
+ list_add_tail(&act->encap_user, &encap->users);
+ act->dest_mport = encap->dest_mport;
+ act->deliver = 1;
+ rc = efx_mae_alloc_action_set(efx, act);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)");
+ goto release;
+ }
+ list_add_tail(&act->list, &rule->acts.list);
+ act->user = &rule->acts;
+ act = NULL;
+ if (fa->id == FLOW_ACTION_REDIRECT)
+ break; /* end of the line */
+ /* Mirror, so continue on with saved act */
+ save.count = NULL;
+ act = kzalloc(sizeof(*act), GFP_USER);
+ if (!act) {
+ rc = -ENOMEM;
+ goto release;
+ }
+ *act = save;
+ break;
+ }
+
if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
/* can't happen */
rc = -EOPNOTSUPP;
@@ -1086,6 +1204,37 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
act->vlan_proto[act->vlan_push] = fa->vlan.proto;
act->vlan_push++;
break;
+ case FLOW_ACTION_TUNNEL_ENCAP:
+ if (encap_info) {
+ /* Can't specify encap multiple times.
+ * If you want to overwrite an existing
+ * encap_info, use an intervening
+ * FLOW_ACTION_TUNNEL_DECAP to clear it.
+ */
+ NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set");
+ rc = -EINVAL;
+ goto release;
+ }
+ if (!fa->tunnel) {
+ NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key");
+ rc = -EOPNOTSUPP;
+ goto release;
+ }
+ encap_info = fa->tunnel;
+ break;
+ case FLOW_ACTION_TUNNEL_DECAP:
+ if (encap_info) {
+ encap_info = NULL;
+ break;
+ }
+ /* Since we don't support enc_key matches on ingress
+ * (and if we did there'd be no tunnel-device to give
+ * us a type), we can't offload a decap that's not
+ * just undoing a previous encap action.
+ */
+ NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device");
+ rc = -EOPNOTSUPP;
+ goto release;
default:
NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
fa->id);
@@ -1129,8 +1278,21 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
goto release;
}
+ if (from_efv == EFX_EFV_PF)
+ /* PF netdev, so rule applies to traffic from wire */
+ rule->fallback = &efx->tc->facts.pf;
+ else
+ /* repdev, so rule applies to traffic from representee */
+ rule->fallback = &efx->tc->facts.reps;
+ if (!efx_tc_check_ready(efx, rule)) {
+ netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n");
+ acts_id = rule->fallback->fw_id;
+ } else {
+ netif_dbg(efx, drv, efx->net_dev, "ready for hw\n");
+ acts_id = rule->acts.fw_id;
+ }
rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
- rule->acts.fw_id, &rule->fw_id);
+ acts_id, &rule->fw_id);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
goto release_acts;
@@ -1327,6 +1489,58 @@ void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
}
+static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port,
+ struct efx_tc_action_set_list *acts)
+{
+ struct efx_tc_action_set *act;
+ int rc;
+
+ act = kzalloc(sizeof(*act), GFP_KERNEL);
+ if (!act)
+ return -ENOMEM;
+ act->deliver = 1;
+ act->dest_mport = eg_port;
+ rc = efx_mae_alloc_action_set(efx, act);
+ if (rc)
+ goto fail1;
+ EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
+ list_add_tail(&act->list, &acts->list);
+ rc = efx_mae_alloc_action_set_list(efx, acts);
+ if (rc)
+ goto fail2;
+ return 0;
+fail2:
+ list_del(&act->list);
+ efx_mae_free_action_set(efx, act->fw_id);
+fail1:
+ kfree(act);
+ return rc;
+}
+
+static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx)
+{
+ struct efx_tc_action_set_list *acts = &efx->tc->facts.pf;
+ u32 eg_port;
+
+ efx_mae_mport_uplink(efx, &eg_port);
+ return efx_tc_configure_fallback_acts(efx, eg_port, acts);
+}
+
+static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx)
+{
+ struct efx_tc_action_set_list *acts = &efx->tc->facts.reps;
+ u32 eg_port;
+
+ efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
+ return efx_tc_configure_fallback_acts(efx, eg_port, acts);
+}
+
+static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx,
+ struct efx_tc_action_set_list *acts)
+{
+ efx_tc_free_action_set_list(efx, acts, true);
+}
+
static int efx_tc_configure_rep_mport(struct efx_nic *efx)
{
u32 rep_mport_label;
@@ -1419,6 +1633,12 @@ int efx_init_tc(struct efx_nic *efx)
rc = efx_tc_configure_rep_mport(efx);
if (rc)
return rc;
+ rc = efx_tc_configure_fallback_acts_pf(efx);
+ if (rc)
+ return rc;
+ rc = efx_tc_configure_fallback_acts_reps(efx);
+ if (rc)
+ return rc;
efx->tc->up = true;
rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
if (rc)
@@ -1436,6 +1656,8 @@ void efx_fini_tc(struct efx_nic *efx)
efx_tc_deconfigure_rep_mport(efx);
efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf);
efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire);
+ efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf);
+ efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps);
efx->tc->up = false;
}
@@ -1451,6 +1673,21 @@ static void efx_tc_encap_match_free(void *ptr, void *__unused)
kfree(encap);
}
+static void efx_tc_flow_free(void *ptr, void *arg)
+{
+ struct efx_tc_flow_rule *rule = ptr;
+ struct efx_nic *efx = arg;
+
+ netif_err(efx, drv, efx->net_dev,
+ "tc rule %lx still present at teardown, removing\n",
+ rule->cookie);
+
+ /* Also releases entries in subsidiary tables */
+ efx_tc_delete_rule(efx, rule);
+
+ kfree(rule);
+}
+
int efx_init_struct_tc(struct efx_nic *efx)
{
int rc;
@@ -1470,6 +1707,9 @@ int efx_init_struct_tc(struct efx_nic *efx)
mutex_init(&efx->tc->mutex);
init_waitqueue_head(&efx->tc->flush_wq);
+ rc = efx_tc_init_encap_actions(efx);
+ if (rc < 0)
+ goto fail_encap_actions;
rc = efx_tc_init_counters(efx);
if (rc < 0)
goto fail_counters;
@@ -1485,6 +1725,10 @@ int efx_init_struct_tc(struct efx_nic *efx)
efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list);
efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
+ INIT_LIST_HEAD(&efx->tc->facts.pf.list);
+ efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
+ INIT_LIST_HEAD(&efx->tc->facts.reps.list);
+ efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type;
return 0;
fail_match_action_ht:
@@ -1492,6 +1736,8 @@ fail_match_action_ht:
fail_encap_match_ht:
efx_tc_destroy_counters(efx);
fail_counters:
+ efx_tc_destroy_encap_actions(efx);
+fail_encap_actions:
mutex_destroy(&efx->tc->mutex);
kfree(efx->tc->caps);
fail_alloc_caps:
@@ -1510,11 +1756,16 @@ void efx_fini_struct_tc(struct efx_nic *efx)
MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id !=
MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
+ EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id !=
+ MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
+ EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id !=
+ MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free,
efx);
rhashtable_free_and_destroy(&efx->tc->encap_match_ht,
efx_tc_encap_match_free, NULL);
efx_tc_fini_counters(efx);
+ efx_tc_fini_encap_actions(efx);
mutex_unlock(&efx->tc->mutex);
mutex_destroy(&efx->tc->mutex);
kfree(efx->tc->caps);
diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h
index 04cced6a2d39..607429f8bb28 100644
--- a/drivers/net/ethernet/sfc/tc.h
+++ b/drivers/net/ethernet/sfc/tc.h
@@ -25,6 +25,8 @@ static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr)
}
#endif
+struct efx_tc_encap_action; /* see tc_encap_actions.h */
+
struct efx_tc_action_set {
u16 vlan_push:2;
u16 vlan_pop:2;
@@ -33,6 +35,9 @@ struct efx_tc_action_set {
__be16 vlan_tci[2]; /* TCIs for vlan_push */
__be16 vlan_proto[2]; /* Ethertypes for vlan_push */
struct efx_tc_counter_index *count;
+ struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */
+ struct list_head encap_user; /* entry on encap_md->users list */
+ struct efx_tc_action_set_list *user; /* Only populated if encap_md */
u32 dest_mport;
u32 fw_id; /* index of this entry in firmware actions table */
struct list_head list;
@@ -74,14 +79,41 @@ static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask)
mask->enc_ip_ttl || mask->enc_sport || mask->enc_dport;
}
+/**
+ * enum efx_tc_em_pseudo_type - &struct efx_tc_encap_match pseudo type
+ *
+ * These are used to classify "pseudo" encap matches, which don't refer
+ * to an entry in hardware but rather indicate that a section of the
+ * match space is in use by another Outer Rule.
+ *
+ * @EFX_TC_EM_DIRECT: real HW entry in Outer Rule table; not a pseudo.
+ * Hardware index in &struct efx_tc_encap_match.fw_id is valid.
+ * @EFX_TC_EM_PSEUDO_MASK: registered by an encap match which includes a
+ * match on an optional field (currently ip_tos and/or udp_sport),
+ * to prevent an overlapping encap match _without_ optional fields.
+ * The pseudo encap match may be referenced again by an encap match
+ * with different values for these fields, but all masks must match the
+ * first (stored in our child_* fields).
+ */
+enum efx_tc_em_pseudo_type {
+ EFX_TC_EM_DIRECT,
+ EFX_TC_EM_PSEUDO_MASK,
+};
+
struct efx_tc_encap_match {
__be32 src_ip, dst_ip;
struct in6_addr src_ip6, dst_ip6;
__be16 udp_dport;
+ __be16 udp_sport, udp_sport_mask;
+ u8 ip_tos, ip_tos_mask;
struct rhash_head linkage;
enum efx_encap_type tun_type;
+ u8 child_ip_tos_mask;
+ __be16 child_udp_sport_mask;
refcount_t ref;
+ enum efx_tc_em_pseudo_type type;
u32 fw_id; /* index of this entry in firmware encap match table */
+ struct efx_tc_encap_match *pseudo; /* Referenced pseudo EM if needed */
};
struct efx_tc_match {
@@ -100,6 +132,7 @@ struct efx_tc_flow_rule {
struct rhash_head linkage;
struct efx_tc_match match;
struct efx_tc_action_set_list acts;
+ struct efx_tc_action_set_list *fallback; /* what to use when unready? */
u32 fw_id;
};
@@ -117,8 +150,10 @@ enum efx_tc_rule_prios {
* @mutex: Used to serialise operations on TC hashtables
* @counter_ht: Hashtable of TC counters (FW IDs and counter values)
* @counter_id_ht: Hashtable mapping TC counter cookies to counters
+ * @encap_ht: Hashtable of TC encap actions
* @encap_match_ht: Hashtable of TC encap matches
* @match_action_ht: Hashtable of TC match-action rules
+ * @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder)
* @reps_mport_id: MAE port allocated for representor RX
* @reps_filter_uc: VNIC filter for representor unicast RX (promisc)
* @reps_filter_mc: VNIC filter for representor multicast RX (allmulti)
@@ -133,6 +168,11 @@ enum efx_tc_rule_prios {
* %EFX_TC_PRIO_DFLT. Named by *ingress* port
* @dflt.pf: rule for traffic ingressing from PF (egresses to wire)
* @dflt.wire: rule for traffic ingressing from wire (egresses to PF)
+ * @facts: Fallback action-set-lists for unready rules. Named by *egress* port
+ * @facts.pf: action-set-list for unready rules on PF netdev, hence applying to
+ * traffic from wire, and egressing to PF
+ * @facts.reps: action-set-list for unready rules on representors, hence
+ * applying to traffic from representees, and egressing to the reps mport
* @up: have TC datastructures been set up?
*/
struct efx_tc_state {
@@ -141,8 +181,10 @@ struct efx_tc_state {
struct mutex mutex;
struct rhashtable counter_ht;
struct rhashtable counter_id_ht;
+ struct rhashtable encap_ht;
struct rhashtable encap_match_ht;
struct rhashtable match_action_ht;
+ struct rhashtable neigh_ht;
u32 reps_mport_id, reps_mport_vport_id;
s32 reps_filter_uc, reps_filter_mc;
bool flush_counters;
@@ -153,11 +195,19 @@ struct efx_tc_state {
struct efx_tc_flow_rule pf;
struct efx_tc_flow_rule wire;
} dflt;
+ struct {
+ struct efx_tc_action_set_list pf;
+ struct efx_tc_action_set_list reps;
+ } facts;
bool up;
};
struct efx_rep;
+enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev);
+struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
+ struct net_device *dev);
+s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv);
int efx_tc_configure_default_rule_rep(struct efx_rep *efv);
void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
struct efx_tc_flow_rule *rule);
diff --git a/drivers/net/ethernet/sfc/tc_bindings.c b/drivers/net/ethernet/sfc/tc_bindings.c
index c18d64519c2d..1b79c535c54e 100644
--- a/drivers/net/ethernet/sfc/tc_bindings.c
+++ b/drivers/net/ethernet/sfc/tc_bindings.c
@@ -10,6 +10,7 @@
#include "tc_bindings.h"
#include "tc.h"
+#include "tc_encap_actions.h"
struct efx_tc_block_binding {
struct list_head list;
@@ -226,3 +227,15 @@ int efx_tc_setup(struct net_device *net_dev, enum tc_setup_type type,
return -EOPNOTSUPP;
}
+
+int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event,
+ struct net_device *net_dev)
+{
+ if (efx->type->is_vf)
+ return NOTIFY_DONE;
+
+ if (event == NETDEV_UNREGISTER)
+ efx_tc_unregister_egdev(efx, net_dev);
+
+ return NOTIFY_OK;
+}
diff --git a/drivers/net/ethernet/sfc/tc_bindings.h b/drivers/net/ethernet/sfc/tc_bindings.h
index c210bb09150e..a326d23d322b 100644
--- a/drivers/net/ethernet/sfc/tc_bindings.h
+++ b/drivers/net/ethernet/sfc/tc_bindings.h
@@ -12,6 +12,7 @@
#define EFX_TC_BINDINGS_H
#include "net_driver.h"
+#if IS_ENABLED(CONFIG_SFC_SRIOV)
#include <net/sch_generic.h>
struct efx_rep;
@@ -26,4 +27,17 @@ int efx_tc_indr_setup_cb(struct net_device *net_dev, struct Qdisc *sch,
void *cb_priv, enum tc_setup_type type,
void *type_data, void *data,
void (*cleanup)(struct flow_block_cb *block_cb));
+int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event,
+ struct net_device *net_dev);
+
+#else /* CONFIG_SFC_SRIOV */
+
+static inline int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event,
+ struct net_device *net_dev)
+{
+ return NOTIFY_DONE;
+}
+
+#endif /* CONFIG_SFC_SRIOV */
+
#endif /* EFX_TC_BINDINGS_H */
diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c
new file mode 100644
index 000000000000..7e8bcdb222ad
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tc_encap_actions.c
@@ -0,0 +1,747 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2023, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "tc_encap_actions.h"
+#include "tc.h"
+#include "mae.h"
+#include <net/vxlan.h>
+#include <net/geneve.h>
+#include <net/netevent.h>
+#include <net/arp.h>
+
+static const struct rhashtable_params efx_neigh_ht_params = {
+ .key_len = offsetof(struct efx_neigh_binder, ha),
+ .key_offset = 0,
+ .head_offset = offsetof(struct efx_neigh_binder, linkage),
+};
+
+static const struct rhashtable_params efx_tc_encap_ht_params = {
+ .key_len = offsetofend(struct efx_tc_encap_action, key),
+ .key_offset = 0,
+ .head_offset = offsetof(struct efx_tc_encap_action, linkage),
+};
+
+static void efx_tc_encap_free(void *ptr, void *__unused)
+{
+ struct efx_tc_encap_action *enc = ptr;
+
+ WARN_ON(refcount_read(&enc->ref));
+ kfree(enc);
+}
+
+static void efx_neigh_free(void *ptr, void *__unused)
+{
+ struct efx_neigh_binder *neigh = ptr;
+
+ WARN_ON(refcount_read(&neigh->ref));
+ WARN_ON(!list_empty(&neigh->users));
+ put_net_track(neigh->net, &neigh->ns_tracker);
+ netdev_put(neigh->egdev, &neigh->dev_tracker);
+ kfree(neigh);
+}
+
+int efx_tc_init_encap_actions(struct efx_nic *efx)
+{
+ int rc;
+
+ rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params);
+ if (rc < 0)
+ goto fail_neigh_ht;
+ rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
+ if (rc < 0)
+ goto fail_encap_ht;
+ return 0;
+fail_encap_ht:
+ rhashtable_destroy(&efx->tc->neigh_ht);
+fail_neigh_ht:
+ return rc;
+}
+
+/* Only call this in init failure teardown.
+ * Normal exit should fini instead as there may be entries in the table.
+ */
+void efx_tc_destroy_encap_actions(struct efx_nic *efx)
+{
+ rhashtable_destroy(&efx->tc->encap_ht);
+ rhashtable_destroy(&efx->tc->neigh_ht);
+}
+
+void efx_tc_fini_encap_actions(struct efx_nic *efx)
+{
+ rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL);
+ rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL);
+}
+
+static void efx_neigh_update(struct work_struct *work);
+
+static int efx_bind_neigh(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap, struct net *net,
+ struct netlink_ext_ack *extack)
+{
+ struct efx_neigh_binder *neigh, *old;
+ struct flowi6 flow6 = {};
+ struct flowi4 flow4 = {};
+ int rc;
+
+ /* GCC stupidly thinks that only values explicitly listed in the enum
+ * definition can _possibly_ be sensible case values, so without this
+ * cast it complains about the IPv6 versions.
+ */
+ switch ((int)encap->type) {
+ case EFX_ENCAP_TYPE_VXLAN:
+ case EFX_ENCAP_TYPE_GENEVE:
+ flow4.flowi4_proto = IPPROTO_UDP;
+ flow4.fl4_dport = encap->key.tp_dst;
+ flow4.flowi4_tos = encap->key.tos;
+ flow4.daddr = encap->key.u.ipv4.dst;
+ flow4.saddr = encap->key.u.ipv4.src;
+ break;
+ case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
+ case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
+ flow6.flowi6_proto = IPPROTO_UDP;
+ flow6.fl6_dport = encap->key.tp_dst;
+ flow6.flowlabel = ip6_make_flowinfo(encap->key.tos,
+ encap->key.label);
+ flow6.daddr = encap->key.u.ipv6.dst;
+ flow6.saddr = encap->key.u.ipv6.src;
+ break;
+ default:
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
+ (int)encap->type);
+ return -EOPNOTSUPP;
+ }
+
+ neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT);
+ if (!neigh)
+ return -ENOMEM;
+ neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT);
+ neigh->dst_ip = flow4.daddr;
+ neigh->dst_ip6 = flow6.daddr;
+
+ old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht,
+ &neigh->linkage,
+ efx_neigh_ht_params);
+ if (old) {
+ /* don't need our new entry */
+ put_net_track(neigh->net, &neigh->ns_tracker);
+ kfree(neigh);
+ if (!refcount_inc_not_zero(&old->ref))
+ return -EAGAIN;
+ /* existing entry found, ref taken */
+ neigh = old;
+ } else {
+ /* New entry. We need to initiate a lookup */
+ struct neighbour *n;
+ struct rtable *rt;
+
+ if (encap->type & EFX_ENCAP_FLAG_IPV6) {
+#if IS_ENABLED(CONFIG_IPV6)
+ struct dst_entry *dst;
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
+ NULL);
+ rc = PTR_ERR_OR_ZERO(dst);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
+ goto out_free;
+ }
+ neigh->egdev = dst->dev;
+ netdev_hold(neigh->egdev, &neigh->dev_tracker,
+ GFP_KERNEL_ACCOUNT);
+ neigh->ttl = ip6_dst_hoplimit(dst);
+ n = dst_neigh_lookup(dst, &flow6.daddr);
+ dst_release(dst);
+#else
+ /* We shouldn't ever get here, because if IPv6 isn't
+ * enabled how did someone create an IPv6 tunnel_key?
+ */
+ rc = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
+ goto out_free;
+#endif
+ } else {
+ rt = ip_route_output_key(net, &flow4);
+ if (IS_ERR_OR_NULL(rt)) {
+ rc = PTR_ERR_OR_ZERO(rt);
+ if (!rc)
+ rc = -EIO;
+ NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
+ goto out_free;
+ }
+ neigh->egdev = rt->dst.dev;
+ netdev_hold(neigh->egdev, &neigh->dev_tracker,
+ GFP_KERNEL_ACCOUNT);
+ neigh->ttl = ip4_dst_hoplimit(&rt->dst);
+ n = dst_neigh_lookup(&rt->dst, &flow4.daddr);
+ ip_rt_put(rt);
+ }
+ if (!n) {
+ rc = -ENETUNREACH;
+ NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
+ netdev_put(neigh->egdev, &neigh->dev_tracker);
+ goto out_free;
+ }
+ refcount_set(&neigh->ref, 1);
+ INIT_LIST_HEAD(&neigh->users);
+ read_lock_bh(&n->lock);
+ ether_addr_copy(neigh->ha, n->ha);
+ neigh->n_valid = n->nud_state & NUD_VALID;
+ read_unlock_bh(&n->lock);
+ rwlock_init(&neigh->lock);
+ INIT_WORK(&neigh->work, efx_neigh_update);
+ neigh->efx = efx;
+ neigh->used = jiffies;
+ if (!neigh->n_valid)
+ /* Prod ARP to find us a neighbour */
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+ /* Add us to this neigh */
+ encap->neigh = neigh;
+ list_add_tail(&encap->list, &neigh->users);
+ return 0;
+
+out_free:
+ /* cleanup common to several error paths */
+ rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
+ efx_neigh_ht_params);
+ synchronize_rcu();
+ put_net_track(net, &neigh->ns_tracker);
+ kfree(neigh);
+ return rc;
+}
+
+static void efx_free_neigh(struct efx_neigh_binder *neigh)
+{
+ struct efx_nic *efx = neigh->efx;
+
+ rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
+ efx_neigh_ht_params);
+ synchronize_rcu();
+ netdev_put(neigh->egdev, &neigh->dev_tracker);
+ put_net_track(neigh->net, &neigh->ns_tracker);
+ kfree(neigh);
+}
+
+static void efx_release_neigh(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ struct efx_neigh_binder *neigh = encap->neigh;
+
+ if (!neigh)
+ return;
+ list_del(&encap->list);
+ encap->neigh = NULL;
+ if (!refcount_dec_and_test(&neigh->ref))
+ return; /* still in use */
+ efx_free_neigh(neigh);
+}
+
+static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto)
+{
+ struct efx_neigh_binder *neigh = encap->neigh;
+ struct ethhdr *eth;
+
+ encap->encap_hdr_len = sizeof(*eth);
+ eth = (struct ethhdr *)encap->encap_hdr;
+
+ if (encap->neigh->n_valid)
+ ether_addr_copy(eth->h_dest, neigh->ha);
+ else
+ eth_zero_addr(eth->h_dest);
+ ether_addr_copy(eth->h_source, neigh->egdev->dev_addr);
+ eth->h_proto = htons(proto);
+}
+
+static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
+{
+ struct efx_neigh_binder *neigh = encap->neigh;
+ struct ip_tunnel_key *key = &encap->key;
+ struct iphdr *ip;
+
+ ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*ip);
+
+ ip->daddr = key->u.ipv4.dst;
+ ip->saddr = key->u.ipv4.src;
+ ip->ttl = neigh->ttl;
+ ip->protocol = ipproto;
+ ip->version = 0x4;
+ ip->ihl = 0x5;
+ ip->tot_len = cpu_to_be16(ip->ihl * 4 + len);
+ ip_send_check(ip);
+}
+
+#ifdef CONFIG_IPV6
+static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
+{
+ struct efx_neigh_binder *neigh = encap->neigh;
+ struct ip_tunnel_key *key = &encap->key;
+ struct ipv6hdr *ip;
+
+ ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*ip);
+
+ ip6_flow_hdr(ip, key->tos, key->label);
+ ip->daddr = key->u.ipv6.dst;
+ ip->saddr = key->u.ipv6.src;
+ ip->hop_limit = neigh->ttl;
+ ip->nexthdr = ipproto;
+ ip->version = 0x6;
+ ip->payload_len = cpu_to_be16(len);
+}
+#endif
+
+static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len)
+{
+ struct ip_tunnel_key *key = &encap->key;
+ struct udphdr *udp;
+
+ udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*udp);
+
+ udp->dest = key->tp_dst;
+ udp->len = cpu_to_be16(sizeof(*udp) + len);
+}
+
+static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap)
+{
+ struct ip_tunnel_key *key = &encap->key;
+ struct vxlanhdr *vxlan;
+
+ vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*vxlan);
+
+ vxlan->vx_flags = VXLAN_HF_VNI;
+ vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id));
+}
+
+static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap)
+{
+ struct ip_tunnel_key *key = &encap->key;
+ struct genevehdr *geneve;
+ u32 vni;
+
+ geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len);
+ encap->encap_hdr_len += sizeof(*geneve);
+
+ geneve->proto_type = htons(ETH_P_TEB);
+ /* convert tun_id to host-endian so we can use host arithmetic to
+ * extract individual bytes.
+ */
+ vni = ntohl(tunnel_id_to_key32(key->tun_id));
+ geneve->vni[0] = vni >> 16;
+ geneve->vni[1] = vni >> 8;
+ geneve->vni[2] = vni;
+}
+
+#define vxlan_header_l4_len (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+#define vxlan4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len)
+static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap)
+{
+ BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len);
+ efx_gen_tun_header_eth(encap, ETH_P_IP);
+ efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len);
+ efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
+ efx_gen_tun_header_vxlan(encap);
+}
+
+#define geneve_header_l4_len (sizeof(struct udphdr) + sizeof(struct genevehdr))
+#define geneve4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len)
+static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap)
+{
+ BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len);
+ efx_gen_tun_header_eth(encap, ETH_P_IP);
+ efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len);
+ efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
+ efx_gen_tun_header_geneve(encap);
+}
+
+#ifdef CONFIG_IPV6
+#define vxlan6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len)
+static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap)
+{
+ BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len);
+ efx_gen_tun_header_eth(encap, ETH_P_IPV6);
+ efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len);
+ efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
+ efx_gen_tun_header_vxlan(encap);
+}
+
+#define geneve6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len)
+static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap)
+{
+ BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len);
+ efx_gen_tun_header_eth(encap, ETH_P_IPV6);
+ efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len);
+ efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
+ efx_gen_tun_header_geneve(encap);
+}
+#endif
+
+static void efx_gen_encap_header(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ encap->n_valid = encap->neigh->n_valid;
+
+ /* GCC stupidly thinks that only values explicitly listed in the enum
+ * definition can _possibly_ be sensible case values, so without this
+ * cast it complains about the IPv6 versions.
+ */
+ switch ((int)encap->type) {
+ case EFX_ENCAP_TYPE_VXLAN:
+ efx_gen_vxlan_header_ipv4(encap);
+ break;
+ case EFX_ENCAP_TYPE_GENEVE:
+ efx_gen_geneve_header_ipv4(encap);
+ break;
+#ifdef CONFIG_IPV6
+ case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
+ efx_gen_vxlan_header_ipv6(encap);
+ break;
+ case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
+ efx_gen_geneve_header_ipv6(encap);
+ break;
+#endif
+ default:
+ /* unhandled encap type, can't happen */
+ if (net_ratelimit())
+ netif_err(efx, drv, efx->net_dev,
+ "Bogus encap type %d, can't generate\n",
+ encap->type);
+
+ /* Use fallback action. */
+ encap->n_valid = false;
+ break;
+ }
+}
+
+static void efx_tc_update_encap(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ struct efx_tc_action_set_list *acts, *fallback;
+ struct efx_tc_flow_rule *rule;
+ struct efx_tc_action_set *act;
+ int rc;
+
+ if (encap->n_valid) {
+ /* Make sure no rules are using this encap while we change it */
+ list_for_each_entry(act, &encap->users, encap_user) {
+ acts = act->user;
+ if (WARN_ON(!acts)) /* can't happen */
+ continue;
+ rule = container_of(acts, struct efx_tc_flow_rule, acts);
+ if (rule->fallback)
+ fallback = rule->fallback;
+ else /* fallback fallback: deliver to PF */
+ fallback = &efx->tc->facts.pf;
+ rc = efx_mae_update_rule(efx, fallback->fw_id,
+ rule->fw_id);
+ if (rc)
+ netif_err(efx, drv, efx->net_dev,
+ "Failed to update (f) rule %08x rc %d\n",
+ rule->fw_id, rc);
+ else
+ netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n",
+ rule->fw_id);
+ }
+ }
+
+ /* Make sure we don't leak arbitrary bytes on the wire;
+ * set an all-0s ethernet header. A successful call to
+ * efx_gen_encap_header() will overwrite this.
+ */
+ memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
+ encap->encap_hdr_len = ETH_HLEN;
+
+ if (encap->neigh) {
+ read_lock_bh(&encap->neigh->lock);
+ efx_gen_encap_header(efx, encap);
+ read_unlock_bh(&encap->neigh->lock);
+ } else {
+ encap->n_valid = false;
+ }
+
+ rc = efx_mae_update_encap_md(efx, encap);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev,
+ "Failed to update encap hdr %08x rc %d\n",
+ encap->fw_id, rc);
+ return;
+ }
+ netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n",
+ encap->fw_id);
+ if (!encap->n_valid)
+ return;
+ /* Update rule users: use the action if they are now ready */
+ list_for_each_entry(act, &encap->users, encap_user) {
+ acts = act->user;
+ if (WARN_ON(!acts)) /* can't happen */
+ continue;
+ rule = container_of(acts, struct efx_tc_flow_rule, acts);
+ if (!efx_tc_check_ready(efx, rule))
+ continue;
+ rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id);
+ if (rc)
+ netif_err(efx, drv, efx->net_dev,
+ "Failed to update rule %08x rc %d\n",
+ rule->fw_id, rc);
+ else
+ netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n",
+ rule->fw_id);
+ }
+}
+
+static void efx_neigh_update(struct work_struct *work)
+{
+ struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work);
+ struct efx_tc_encap_action *encap;
+ struct efx_nic *efx = neigh->efx;
+
+ mutex_lock(&efx->tc->mutex);
+ list_for_each_entry(encap, &neigh->users, list)
+ efx_tc_update_encap(neigh->efx, encap);
+ /* release ref taken in efx_neigh_event() */
+ if (refcount_dec_and_test(&neigh->ref))
+ efx_free_neigh(neigh);
+ mutex_unlock(&efx->tc->mutex);
+}
+
+static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n)
+{
+ struct efx_neigh_binder keys = {NULL}, *neigh;
+ bool n_valid, ipv6 = false;
+ char ha[ETH_ALEN];
+ size_t keysize;
+
+ if (WARN_ON(!efx->tc))
+ return NOTIFY_DONE;
+
+ if (n->tbl == &arp_tbl) {
+ keysize = sizeof(keys.dst_ip);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (n->tbl == ipv6_stub->nd_tbl) {
+ ipv6 = true;
+ keysize = sizeof(keys.dst_ip6);
+#endif
+ } else {
+ return NOTIFY_DONE;
+ }
+ if (!n->parms) {
+ netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n");
+ return NOTIFY_DONE;
+ }
+ keys.net = read_pnet(&n->parms->net);
+ if (n->tbl->key_len != keysize) {
+ netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n",
+ n->tbl->key_len);
+ return NOTIFY_DONE;
+ }
+ read_lock_bh(&n->lock); /* Get a consistent view */
+ memcpy(ha, n->ha, ETH_ALEN);
+ n_valid = (n->nud_state & NUD_VALID) && !n->dead;
+ read_unlock_bh(&n->lock);
+ if (ipv6)
+ memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len);
+ else
+ memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len);
+ rcu_read_lock();
+ neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys,
+ efx_neigh_ht_params);
+ if (!neigh || neigh->dying)
+ /* We're not interested in this neighbour */
+ goto done;
+ write_lock_bh(&neigh->lock);
+ if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) {
+ write_unlock_bh(&neigh->lock);
+ /* Nothing has changed; no work to do */
+ goto done;
+ }
+ neigh->n_valid = n_valid;
+ memcpy(neigh->ha, ha, ETH_ALEN);
+ write_unlock_bh(&neigh->lock);
+ if (refcount_inc_not_zero(&neigh->ref)) {
+ rcu_read_unlock();
+ if (!schedule_work(&neigh->work))
+ /* failed to schedule, release the ref we just took */
+ if (refcount_dec_and_test(&neigh->ref))
+ efx_free_neigh(neigh);
+ } else {
+done:
+ rcu_read_unlock();
+ }
+ return NOTIFY_DONE;
+}
+
+bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
+{
+ struct efx_tc_action_set *act;
+
+ /* Encap actions can only be offloaded if they have valid
+ * neighbour info for the outer Ethernet header.
+ */
+ list_for_each_entry(act, &rule->acts.list, list)
+ if (act->encap_md && !act->encap_md->n_valid)
+ return false;
+ return true;
+}
+
+struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
+ struct efx_nic *efx, const struct ip_tunnel_info *info,
+ struct net_device *egdev, struct netlink_ext_ack *extack)
+{
+ enum efx_encap_type type = efx_tc_indr_netdev_type(egdev);
+ struct efx_tc_encap_action *encap, *old;
+ struct efx_rep *to_efv;
+ s64 rc;
+
+ if (type == EFX_ENCAP_TYPE_NONE) {
+ /* dest is not an encap device */
+ NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ rc = efx_mae_check_encap_type_supported(efx, type);
+ if (rc < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type");
+ return ERR_PTR(rc);
+ }
+ /* No support yet for Geneve options */
+ if (info->options_len) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ switch (info->mode) {
+ case IP_TUNNEL_INFO_TX:
+ break;
+ case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6:
+ type |= EFX_ENCAP_FLAG_IPV6;
+ break;
+ default:
+ NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u",
+ info->mode);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT);
+ if (!encap)
+ return ERR_PTR(-ENOMEM);
+ encap->type = type;
+ encap->key = info->key;
+ INIT_LIST_HEAD(&encap->users);
+ old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht,
+ &encap->linkage,
+ efx_tc_encap_ht_params);
+ if (old) {
+ /* don't need our new entry */
+ kfree(encap);
+ if (!refcount_inc_not_zero(&old->ref))
+ return ERR_PTR(-EAGAIN);
+ /* existing entry found, ref taken */
+ return old;
+ }
+
+ rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack);
+ if (rc < 0)
+ goto out_remove;
+ to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev);
+ if (IS_ERR(to_efv)) {
+ /* neigh->egdev isn't ours */
+ NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch");
+ rc = PTR_ERR(to_efv);
+ goto out_release;
+ }
+ rc = efx_tc_flower_external_mport(efx, to_efv);
+ if (rc < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port");
+ goto out_release;
+ }
+ encap->dest_mport = rc;
+ read_lock_bh(&encap->neigh->lock);
+ efx_gen_encap_header(efx, encap);
+ read_unlock_bh(&encap->neigh->lock);
+
+ rc = efx_mae_allocate_encap_md(efx, encap);
+ if (rc < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw");
+ goto out_release;
+ }
+
+ /* ref and return */
+ refcount_set(&encap->ref, 1);
+ return encap;
+out_release:
+ efx_release_neigh(efx, encap);
+out_remove:
+ rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
+ efx_tc_encap_ht_params);
+ kfree(encap);
+ return ERR_PTR(rc);
+}
+
+void efx_tc_flower_release_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap)
+{
+ if (!refcount_dec_and_test(&encap->ref))
+ return; /* still in use */
+ efx_release_neigh(efx, encap);
+ rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
+ efx_tc_encap_ht_params);
+ efx_mae_free_encap_md(efx, encap);
+ kfree(encap);
+}
+
+static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh)
+{
+ struct efx_tc_encap_action *encap, *next;
+
+ list_for_each_entry_safe(encap, next, &neigh->users, list) {
+ /* Should cause neigh usage count to fall to zero, freeing it */
+ efx_release_neigh(efx, encap);
+ /* The encap has lost its neigh, so it's now unready */
+ efx_tc_update_encap(efx, encap);
+ }
+}
+
+void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev)
+{
+ struct efx_neigh_binder *neigh;
+ struct rhashtable_iter walk;
+
+ mutex_lock(&efx->tc->mutex);
+ rhashtable_walk_enter(&efx->tc->neigh_ht, &walk);
+ rhashtable_walk_start(&walk);
+ while ((neigh = rhashtable_walk_next(&walk)) != NULL) {
+ if (IS_ERR(neigh))
+ continue;
+ if (neigh->egdev != net_dev)
+ continue;
+ neigh->dying = true;
+ rhashtable_walk_stop(&walk);
+ synchronize_rcu(); /* Make sure any updates see dying flag */
+ efx_tc_remove_neigh_users(efx, neigh); /* might sleep */
+ rhashtable_walk_start(&walk);
+ }
+ rhashtable_walk_stop(&walk);
+ rhashtable_walk_exit(&walk);
+ mutex_unlock(&efx->tc->mutex);
+}
+
+int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
+ void *ptr)
+{
+ if (efx->type->is_vf)
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ return efx_neigh_event(efx, ptr);
+ default:
+ return NOTIFY_DONE;
+ }
+}
diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.h b/drivers/net/ethernet/sfc/tc_encap_actions.h
new file mode 100644
index 000000000000..c3c7904ad7ff
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tc_encap_actions.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2023, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_TC_ENCAP_ACTIONS_H
+#define EFX_TC_ENCAP_ACTIONS_H
+#include "net_driver.h"
+
+#if IS_ENABLED(CONFIG_SFC_SRIOV)
+#include <linux/refcount.h>
+#include <net/tc_act/tc_tunnel_key.h>
+
+/**
+ * struct efx_neigh_binder - driver state for a neighbour entry
+ * @net: the network namespace in which this neigh resides
+ * @dst_ip: the IPv4 destination address resolved by this neigh
+ * @dst_ip6: the IPv6 destination address resolved by this neigh
+ * @ha: the hardware (Ethernet) address of the neighbour
+ * @n_valid: true if the neighbour is in NUD_VALID state
+ * @lock: protects @ha and @n_valid
+ * @ttl: Time To Live associated with the route used
+ * @dying: set when egdev is going away, to skip further updates
+ * @egdev: egress device from the route lookup. Holds a reference
+ * @dev_tracker: reference tracker entry for @egdev
+ * @ns_tracker: reference tracker entry for @ns
+ * @ref: counts encap actions referencing this entry
+ * @used: jiffies of last time traffic hit any encap action using this.
+ * When counter reads update this, a new neighbour event is sent to
+ * indicate that the neighbour entry is still in use.
+ * @users: list of &struct efx_tc_encap_action
+ * @linkage: entry in efx->neigh_ht (keys are @net, @dst_ip, @dst_ip6).
+ * @work: processes neighbour state changes, updates the encap actions
+ * @efx: owning NIC instance.
+ *
+ * Associates a neighbour entry with the encap actions that are
+ * interested in it, allowing the latter to be updated when the
+ * neighbour details change.
+ * Whichever of @dst_ip and @dst_ip6 is not in use will be all-zeroes,
+ * this distinguishes IPv4 from IPv6 entries.
+ */
+struct efx_neigh_binder {
+ struct net *net;
+ __be32 dst_ip;
+ struct in6_addr dst_ip6;
+ char ha[ETH_ALEN];
+ bool n_valid;
+ rwlock_t lock;
+ u8 ttl;
+ bool dying;
+ struct net_device *egdev;
+ netdevice_tracker dev_tracker;
+ netns_tracker ns_tracker;
+ refcount_t ref;
+ unsigned long used;
+ struct list_head users;
+ struct rhash_head linkage;
+ struct work_struct work;
+ struct efx_nic *efx;
+};
+
+/* This limit is arbitrary; current hardware (SN1022) handles encap headers
+ * of up to 126 bytes, but that limit is not enshrined in the MCDI protocol.
+ */
+#define EFX_TC_MAX_ENCAP_HDR 126
+struct efx_tc_encap_action {
+ enum efx_encap_type type;
+ struct ip_tunnel_key key; /* 52 bytes */
+ u32 dest_mport; /* is copied into struct efx_tc_action_set */
+ u8 encap_hdr_len;
+ bool n_valid;
+ u8 encap_hdr[EFX_TC_MAX_ENCAP_HDR];
+ struct efx_neigh_binder *neigh;
+ struct list_head list; /* entry on neigh->users list */
+ struct list_head users; /* action sets using this encap_md */
+ struct rhash_head linkage; /* efx->tc_encap_ht */
+ refcount_t ref;
+ u32 fw_id; /* index of this entry in firmware encap table */
+};
+
+/* create/uncreate/teardown hashtables */
+int efx_tc_init_encap_actions(struct efx_nic *efx);
+void efx_tc_destroy_encap_actions(struct efx_nic *efx);
+void efx_tc_fini_encap_actions(struct efx_nic *efx);
+
+struct efx_tc_flow_rule;
+bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule);
+
+struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
+ struct efx_nic *efx, const struct ip_tunnel_info *info,
+ struct net_device *egdev, struct netlink_ext_ack *extack);
+void efx_tc_flower_release_encap_md(struct efx_nic *efx,
+ struct efx_tc_encap_action *encap);
+
+void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev);
+int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
+ void *ptr);
+
+#else /* CONFIG_SFC_SRIOV */
+
+static inline int efx_tc_netevent_event(struct efx_nic *efx,
+ unsigned long event, void *ptr)
+{
+ return NOTIFY_DONE;
+}
+
+#endif /* CONFIG_SFC_SRIOV */
+
+#endif /* EFX_TC_ENCAP_ACTIONS_H */
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
index 755aa92bf823..9f2393d34371 100644
--- a/drivers/net/ethernet/sfc/tx_common.c
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -12,6 +12,7 @@
#include "efx.h"
#include "nic_common.h"
#include "tx_common.h"
+#include <net/gso.h>
static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
{