summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Chan <michael.chan@broadcom.com>2020-09-03 14:28:54 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2020-09-12 11:47:34 +0200
commitdbf1ea0b3803b29675cc3dbea873d9ff25333e7d (patch)
tree26883e960d37675260cd4a13ef60c59a04fb8e04
parent8238ee93a30a5ff6fc75751e122a28e0d92f3e12 (diff)
downloadlinux-stable-dbf1ea0b3803b29675cc3dbea873d9ff25333e7d.tar.gz
linux-stable-dbf1ea0b3803b29675cc3dbea873d9ff25333e7d.tar.bz2
linux-stable-dbf1ea0b3803b29675cc3dbea873d9ff25333e7d.zip
tg3: Fix soft lockup when tg3_reset_task() fails.
[ Upstream commit 556699341efa98243e08e34401b3f601da91f5a3 ] If tg3_reset_task() fails, the device state is left in an inconsistent state with IFF_RUNNING still set but NAPI state not enabled. A subsequent operation, such as ifdown or AER error can cause it to soft lock up when it tries to disable NAPI state. Fix it by bringing down the device to !IFF_RUNNING state when tg3_reset_task() fails. tg3_reset_task() running from workqueue will now call tg3_close() when the reset fails. We need to modify tg3_reset_task_cancel() slightly to avoid tg3_close() calling cancel_work_sync() to cancel tg3_reset_task(). Otherwise cancel_work_sync() will wait forever for tg3_reset_task() to finish. Reported-by: David Christensen <drc@linux.vnet.ibm.com> Reported-by: Baptiste Covolato <baptiste@arista.com> Fixes: db2199737990 ("tg3: Schedule at most one tg3_reset_task run") Signed-off-by: Michael Chan <michael.chan@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c17
1 files changed, 13 insertions, 4 deletions
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 5790b35064a8..2db6102ed584 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7201,8 +7201,8 @@ static inline void tg3_reset_task_schedule(struct tg3 *tp)
static inline void tg3_reset_task_cancel(struct tg3 *tp)
{
- cancel_work_sync(&tp->reset_task);
- tg3_flag_clear(tp, RESET_TASK_PENDING);
+ if (test_and_clear_bit(TG3_FLAG_RESET_TASK_PENDING, tp->tg3_flags))
+ cancel_work_sync(&tp->reset_task);
tg3_flag_clear(tp, TX_RECOVERY_PENDING);
}
@@ -11174,18 +11174,27 @@ static void tg3_reset_task(struct work_struct *work)
tg3_halt(tp, RESET_KIND_SHUTDOWN, 0);
err = tg3_init_hw(tp, true);
- if (err)
+ if (err) {
+ tg3_full_unlock(tp);
+ tp->irq_sync = 0;
+ tg3_napi_enable(tp);
+ /* Clear this flag so that tg3_reset_task_cancel() will not
+ * call cancel_work_sync() and wait forever.
+ */
+ tg3_flag_clear(tp, RESET_TASK_PENDING);
+ dev_close(tp->dev);
goto out;
+ }
tg3_netif_start(tp);
-out:
tg3_full_unlock(tp);
if (!err)
tg3_phy_start(tp);
tg3_flag_clear(tp, RESET_TASK_PENDING);
+out:
rtnl_unlock();
}