From e283546c0465dd3026bc94f7b1a9de7f6b8969ec Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 16 Apr 2014 19:27:48 +0100 Subject: sfc:On MCDI timeout, issue an FLR (and mark MCDI to fail-fast) When an MCDI command times out (whether or not we find it completed when we poll), call efx_mcdi_abandon(), which tells all subsequent MCDI calls to fail-fast, and queues up an FLR. Because an FLR doesn't lead to receiving any reboot even from the MC (unlike most other types of reset), we have to call efx_ef10_reset_mc_allocations. In efx_start_all(), if a reset (of any kind) is pending, we bail out. Without this, attempts to reconfigure (e.g. change mtu) can cause driver/mc state inconsistency if the first MCDI call triggers an FLR. For similar reasons, on EF10, in efx_reset_down(method=RESET_TYPE_MCDI_TIMEOUT), set the number of active queues to zero before calling efx_stop_all(). And, on farch, in efx_reset_up(method=RESET_TYPE_MCDI_TIMEOUT), set active_queues and flushes pending & outstanding to zero. efx_mcdi_mode_{poll,event}() should not take us out of fail-fast mode. Instead, this is done by efx_mcdi_reset() after the FLR completes. The new FLR reset_type RESET_TYPE_MCDI_TIMEOUT doesn't really fit into the hierarchy of reset 'scopes' whereby efx_reset() decides some resets subsume others. Thus, it uses separate logic. Also, fixed up some inconsistency around RESET_TYPE_MC_BIST, which was in the wrong place in that hierarchy. Signed-off-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/sfc/efx.c') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 57b971e5e6b2..63d595fd3cc5 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -76,6 +76,7 @@ const char *const efx_reset_type_names[] = { [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", [RESET_TYPE_WORLD] = "WORLD", [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", + [RESET_TYPE_MC_BIST] = "MC_BIST", [RESET_TYPE_DISABLE] = "DISABLE", [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", [RESET_TYPE_INT_ERROR] = "INT_ERROR", @@ -83,7 +84,7 @@ const char *const efx_reset_type_names[] = { [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", [RESET_TYPE_TX_SKIP] = "TX_SKIP", [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", - [RESET_TYPE_MC_BIST] = "MC_BIST", + [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", }; /* Reset workqueue. If any NIC has a hardware failure then a reset will be @@ -1739,7 +1740,8 @@ static void efx_start_all(struct efx_nic *efx) /* Check that it is appropriate to restart the interface. All * of these flags are safe to read under just the rtnl lock */ - if (efx->port_enabled || !netif_running(efx->net_dev)) + if (efx->port_enabled || !netif_running(efx->net_dev) || + efx->reset_pending) return; efx_start_port(efx); @@ -2334,6 +2336,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) { EFX_ASSERT_RESET_SERIALISED(efx); + if (method == RESET_TYPE_MCDI_TIMEOUT) + efx->type->prepare_flr(efx); + efx_stop_all(efx); efx_disable_interrupts(efx); @@ -2354,6 +2359,10 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) EFX_ASSERT_RESET_SERIALISED(efx); + if (method == RESET_TYPE_MCDI_TIMEOUT) + efx->type->finish_flr(efx); + + /* Ensure that SRAM is initialised even if we're disabling the device */ rc = efx->type->init(efx); if (rc) { netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); @@ -2417,7 +2426,10 @@ int efx_reset(struct efx_nic *efx, enum reset_type method) /* Clear flags for the scopes we covered. We assume the NIC and * driver are now quiescent so that there is no race here. */ - efx->reset_pending &= -(1 << (method + 1)); + if (method < RESET_TYPE_MAX_METHOD) + efx->reset_pending &= -(1 << (method + 1)); + else /* it doesn't fit into the well-ordered scope hierarchy */ + __clear_bit(method, &efx->reset_pending); /* Reinitialise bus-mastering, which may have been turned off before * the reset was scheduled. This is still appropriate, even in the @@ -2546,6 +2558,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) case RESET_TYPE_DISABLE: case RESET_TYPE_RECOVER_OR_DISABLE: case RESET_TYPE_MC_BIST: + case RESET_TYPE_MCDI_TIMEOUT: method = type; netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", RESET_TYPE(method)); -- cgit v1.2.3