diff options
author | Ofir Bitton <obitton@habana.ai> | 2021-11-23 16:34:28 +0200 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2021-12-26 14:42:31 +0200 |
commit | ce80098db2439ee44403ec6fccd3a10be21c7aff (patch) | |
tree | 47c6bf1e8b426a455211d715ad1ed1c9dca18304 /drivers/misc | |
parent | 42eb2872e0867679c996bb19ee9063e6141fa974 (diff) | |
download | linux-ce80098db2439ee44403ec6fccd3a10be21c7aff.tar.gz linux-ce80098db2439ee44403ec6fccd3a10be21c7aff.tar.bz2 linux-ce80098db2439ee44403ec6fccd3a10be21c7aff.zip |
habanalabs: support hard-reset scheduling during soft-reset
As hard-reset can be requested during soft-reset, driver must allow
it or else critical events received during soft-reset will be
ignored.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc')
-rw-r--r-- | drivers/misc/habanalabs/common/device.c | 31 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 3 |
2 files changed, 31 insertions, 3 deletions
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 84621ad765bc..733338ab6f1d 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -978,7 +978,7 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) int hl_device_reset(struct hl_device *hdev, u32 flags) { bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, - reset_upon_device_release = false; + reset_upon_device_release = false, schedule_hard_reset = false; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; struct hl_ctx *ctx; int i, rc; @@ -1031,6 +1031,9 @@ do_reset: /* Block future CS/VM/JOB completion operations */ spin_lock(&hdev->reset_info.lock); if (hdev->reset_info.in_reset) { + /* We only allow scheduling of a hard reset during soft reset */ + if (hard_reset && hdev->reset_info.is_in_soft_reset) + hdev->reset_info.hard_reset_schedule_flags = flags; spin_unlock(&hdev->reset_info.lock); return 0; } @@ -1193,7 +1196,6 @@ kill_processes: * is required for the initialization itself */ hdev->disabled = false; - hdev->reset_info.is_in_soft_reset = false; rc = hdev->asic_funcs->hw_init(hdev); if (rc) { @@ -1243,7 +1245,20 @@ kill_processes: } } - hdev->reset_info.in_reset = 0; + spin_lock(&hdev->reset_info.lock); + hdev->reset_info.is_in_soft_reset = false; + + /* Schedule hard reset only if requested and if not already in hard reset. + * We keep 'in_reset' enabled, so no other reset can go in during the hard + * reset schedule + */ + if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags) + schedule_hard_reset = true; + else + hdev->reset_info.in_reset = 0; + + spin_unlock(&hdev->reset_info.lock); + hdev->reset_info.needs_reset = false; dev_notice(hdev->dev, "Successfully finished resetting the device\n"); @@ -1261,6 +1276,16 @@ kill_processes: hdev->reset_info.soft_reset_cnt++; } + if (schedule_hard_reset) { + dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n"); + flags = hdev->reset_info.hard_reset_schedule_flags; + hdev->reset_info.hard_reset_schedule_flags = 0; + hdev->disabled = true; + hard_reset = true; + handle_reset_trigger(hdev, flags); + goto again; + } + return 0; out_err: diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 37a3a469b42f..cb710fd478b6 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2460,6 +2460,8 @@ struct last_error_session_info { * @lock: lock to protect critical reset flows. * @soft_reset_cnt: number of soft reset since the driver was loaded. * @hard_reset_cnt: number of hard reset since the driver was loaded. + * @hard_reset_schedule_flags: hard reset is scheduled to after current soft reset, + * here we hold the hard reset flags. * @in_reset: is device in reset flow. * @is_in_soft_reset: Device is currently in soft reset process. * @needs_reset: true if reset_on_lockup is false and device should be reset @@ -2478,6 +2480,7 @@ struct hl_reset_info { spinlock_t lock; u32 soft_reset_cnt; u32 hard_reset_cnt; + u32 hard_reset_schedule_flags; u8 in_reset; u8 is_in_soft_reset; u8 needs_reset; |