summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-driver-habanalabs7
-rw-r--r--drivers/misc/habanalabs/debugfs.c55
-rw-r--r--drivers/misc/habanalabs/goya/goya.c6
-rw-r--r--drivers/misc/habanalabs/habanalabs.h2
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h3
5 files changed, 70 insertions, 3 deletions
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index a73601c5121e..67e04f2d7e1d 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -150,3 +150,10 @@ KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Displays a list with information about all the active virtual
address mappings per ASID
+
+What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
+Date: Mar 2020
+KernelVersion: 5.6
+Contact: oded.gabbay@gmail.com
+Description: Sets the stop-on_error option for the device engines. Value of
+ "0" is for disable, otherwise enable.
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 756d36ed5d95..37beff3096f8 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -970,6 +970,49 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf,
return count;
}
+static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char tmp_buf[200];
+ ssize_t rc;
+
+ if (*ppos)
+ return 0;
+
+ sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
+ rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+ strlen(tmp_buf) + 1);
+
+ return rc;
+}
+
+static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 value;
+ ssize_t rc;
+
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Can't change stop on error during reset\n");
+ return 0;
+ }
+
+ rc = kstrtouint_from_user(buf, count, 10, &value);
+ if (rc)
+ return rc;
+
+ hdev->stop_on_err = value ? 1 : 0;
+
+ hl_device_reset(hdev, false, false);
+
+ return count;
+}
+
static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
@@ -1015,6 +1058,12 @@ static const struct file_operations hl_device_fops = {
.write = hl_device_write
};
+static const struct file_operations hl_stop_on_err_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_stop_on_err_read,
+ .write = hl_stop_on_err_write
+};
+
static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
@@ -1152,6 +1201,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_device_fops);
+ debugfs_create_file("stop_on_err",
+ 0644,
+ dev_entry->root,
+ dev_entry,
+ &hl_stop_on_err_fops);
+
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
ent = debugfs_create_file(hl_debugfs_list[i].name,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index db125cf80850..08f1d4080008 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -800,6 +800,7 @@ static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
u32 so_base_lo, so_base_hi;
u32 gic_base_lo, gic_base_hi;
u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
+ u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
@@ -836,7 +837,10 @@ static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
else
WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
- WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, QMAN_DMA_ERR_MSG_EN);
+ if (hdev->stop_on_err)
+ dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
+
+ WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
}
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 31ebcf9458fe..ae3db8eb2fb5 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1300,6 +1300,7 @@ struct hl_device_idle_busy_ts {
* @in_debug: is device under debug. This, together with fpriv_list, enforces
* that only a single user is configuring the debug infrastructure.
* @cdev_sysfs_created: were char devices and sysfs nodes created.
+ * @stop_on_err: true if engines should stop on error.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -1380,6 +1381,7 @@ struct hl_device {
u8 dma_mask;
u8 in_debug;
u8 cdev_sysfs_created;
+ u8 stop_on_err;
/* Parameters for bring-up */
u8 mmu_enable;
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
index 3c44ef3a23ed..067489bd048e 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
@@ -55,8 +55,7 @@
(1 << DMA_QM_0_GLBL_ERR_CFG_DMA_ERR_MSG_EN_SHIFT) | \
(1 << DMA_QM_0_GLBL_ERR_CFG_PQF_STOP_ON_ERR_SHIFT) | \
(1 << DMA_QM_0_GLBL_ERR_CFG_CQF_STOP_ON_ERR_SHIFT) | \
- (1 << DMA_QM_0_GLBL_ERR_CFG_CP_STOP_ON_ERR_SHIFT) | \
- (1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT))
+ (1 << DMA_QM_0_GLBL_ERR_CFG_CP_STOP_ON_ERR_SHIFT))
#define QMAN_MME_ENABLE (\
(1 << MME_QM_GLBL_CFG0_PQF_EN_SHIFT) | \