summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorShaoyun Liu <Shaoyun.Liu@amd.com>2018-07-11 22:33:04 -0400
committerOded Gabbay <oded.gabbay@gmail.com>2018-07-11 22:33:04 -0400
commita29ec470b19e58044005973301f233e0b20ed8c4 (patch)
treec1155668317cde74f8de3613783c19744b94d12b /drivers
parent1b0bfcff463f390c4032ebe36a4d5fb777c00a4c (diff)
downloadlinux-stable-a29ec470b19e58044005973301f233e0b20ed8c4.tar.gz
linux-stable-a29ec470b19e58044005973301f233e0b20ed8c4.tar.bz2
linux-stable-a29ec470b19e58044005973301f233e0b20ed8c4.zip
drm/amdkfd: Add debugfs interface to trigger HWS hang
Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c48
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c23
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c26
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h4
5 files changed, 113 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 4bd6ebfaf425..ab37d36d9cd6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -21,6 +21,8 @@
*/
#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
#include "kfd_priv.h"
static struct dentry *debugfs_root;
@@ -32,6 +34,38 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file)
return single_open(file, show, NULL);
}
+static ssize_t kfd_debugfs_hang_hws_write(struct file *file,
+ const char __user *user_buf, size_t size, loff_t *ppos)
+{
+ struct kfd_dev *dev;
+ char tmp[16];
+ uint32_t gpu_id;
+ int ret = -EINVAL;
+
+ memset(tmp, 0, 16);
+ if (size >= 16) {
+ pr_err("Invalid input for gpu id.\n");
+ goto out;
+ }
+ if (copy_from_user(tmp, user_buf, size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (kstrtoint(tmp, 10, &gpu_id)) {
+ pr_err("Invalid input for gpu id.\n");
+ goto out;
+ }
+ dev = kfd_device_by_id(gpu_id);
+ if (dev) {
+ kfd_debugfs_hang_hws(dev);
+ ret = size;
+ } else
+ pr_err("Cannot find device %d.\n", gpu_id);
+
+out:
+ return ret;
+}
+
static const struct file_operations kfd_debugfs_fops = {
.owner = THIS_MODULE,
.open = kfd_debugfs_open,
@@ -40,6 +74,15 @@ static const struct file_operations kfd_debugfs_fops = {
.release = single_release,
};
+static const struct file_operations kfd_debugfs_hang_hws_fops = {
+ .owner = THIS_MODULE,
+ .open = kfd_debugfs_open,
+ .read = seq_read,
+ .write = kfd_debugfs_hang_hws_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
void kfd_debugfs_init(void)
{
struct dentry *ent;
@@ -65,6 +108,11 @@ void kfd_debugfs_init(void)
ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
kfd_debugfs_rls_by_device,
&kfd_debugfs_fops);
+
+ ent = debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root,
+ NULL,
+ &kfd_debugfs_hang_hws_fops);
+
if (!ent)
pr_warn("Failed to create rls in kfd debugfs\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9f63ac366284..8faa8db3eba5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -914,3 +914,26 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
kfree(mem_obj);
return 0;
}
+
+#if defined(CONFIG_DEBUG_FS)
+
+/* This function will send a package to HIQ to hang the HWS
+ * which will trigger a GPU reset and bring the HWS back to normal state
+ */
+int kfd_debugfs_hang_hws(struct kfd_dev *dev)
+{
+ int r = 0;
+
+ if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
+ pr_err("HWS is not enabled");
+ return -EINVAL;
+ }
+
+ r = pm_debugfs_hang_hws(&dev->dqm->packets);
+ if (!r)
+ r = dqm_debugfs_execute_queues(dev->dqm);
+
+ return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 6b59eab39fbe..32e93b53e7e8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1801,4 +1801,16 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
return r;
}
+int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
+{
+ int r = 0;
+
+ dqm_lock(dqm);
+ dqm->active_runlist = true;
+ r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
+ dqm_unlock(dqm);
+
+ return r;
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index c317feb43f69..1092631765cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -418,4 +418,30 @@ out:
return 0;
}
+int pm_debugfs_hang_hws(struct packet_manager *pm)
+{
+ uint32_t *buffer, size;
+ int r = 0;
+
+ size = pm->pmf->query_status_size;
+ mutex_lock(&pm->lock);
+ pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ r = -ENOMEM;
+ goto out;
+ }
+ memset(buffer, 0x55, size);
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
+
+ pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+ buffer[0], buffer[1], buffer[2], buffer[3],
+ buffer[4], buffer[5], buffer[6]);
+out:
+ mutex_unlock(&pm->lock);
+ return r;
+}
+
+
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2e03d6c80aa0..d9bf70b52857 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -995,6 +995,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data);
int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
int pm_debugfs_runlist(struct seq_file *m, void *data);
+int kfd_debugfs_hang_hws(struct kfd_dev *dev);
+int pm_debugfs_hang_hws(struct packet_manager *pm);
+int dqm_debugfs_execute_queues(struct device_queue_manager *dqm);
+
#else
static inline void kfd_debugfs_init(void) {}