summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDani Liberman <dliberman@habana.ai>2022-10-31 11:44:45 +0200
committerOded Gabbay <ogabbay@kernel.org>2022-11-23 16:13:46 +0200
commitaff6354afd1f9eae1e10658c157c26e316806f56 (patch)
tree6d2dc9f1934640e5145ee43fecea1d3eaf7bed05
parentcd21701cde33123fc53c6401192219ba14832da3 (diff)
downloadlinux-stable-aff6354afd1f9eae1e10658c157c26e316806f56.tar.gz
linux-stable-aff6354afd1f9eae1e10658c157c26e316806f56.tar.bz2
linux-stable-aff6354afd1f9eae1e10658c157c26e316806f56.zip
habanalabs/gaudi: add page fault notify event
Each time page fault happens, besides capturing its data, also notify the user about it. Signed-off-by: Dani Liberman <dliberman@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
-rw-r--r--drivers/misc/habanalabs/common/device.c9
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h2
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c6
-rw-r--r--include/uapi/misc/habanalabs.h2
4 files changed, 16 insertions, 3 deletions
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 65bb40f81901..31818121ef4d 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2490,3 +2490,12 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is
hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id;
hl_capture_user_mappings(hdev, is_pmmu);
}
+
+void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
+ u64 *event_mask)
+{
+ hl_capture_page_fault(hdev, addr, eng_id, is_pmmu);
+
+ if (event_mask)
+ *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT;
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index d9335f3769b8..0781b8698f74 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3815,6 +3815,8 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_
void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
u8 flags, u64 *event_mask);
void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu);
+void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
+ u64 *event_mask);
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 035865cb097c..cbe1daf5a793 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6740,7 +6740,7 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_i
}
}
-static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr)
+static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 val;
@@ -6755,7 +6755,7 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr
*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
- hl_capture_page_fault(hdev, *addr, 0, true);
+ hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
}
@@ -7323,7 +7323,7 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
if (razwi) {
gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
&is_write);
- gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr);
+ gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
if (is_read)
razwi_flags |= HL_RAZWI_READ;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 7747e19e81fe..e50cb71df081 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -722,6 +722,7 @@ enum hl_server_type {
* HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state
* HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error
* HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened
+ * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened
*/
#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0)
#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1)
@@ -731,6 +732,7 @@ enum hl_server_type {
#define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5)
#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6)
#define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7)
+#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8)
/* Opcode for management ioctl
*