summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_int.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c49
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c615
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.h18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c83
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c34
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h41
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c72
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c9
-rw-r--r--drivers/gpu/drm/radeon/Makefile3
-rw-r--r--drivers/gpu/drm/radeon/cik.c14
-rw-r--r--drivers/gpu/drm/radeon/cikd.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon.h3
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_kfd.c901
-rw-r--r--drivers/gpu/drm/radeon/radeon_kfd.h47
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c7
25 files changed, 449 insertions, 1541 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index e13c67c8d2c0..bc5a2945bd2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,6 @@
config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
- depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
+ depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
help
Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 211fc48697fa..3d5ccb3755d4 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -36,6 +36,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
/* Do not process in ISR, just request it to be forwarded to WQ. */
return (pasid != 0) &&
(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
+ ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
}
@@ -46,6 +47,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
+ uint32_t context_id = ihre->data & 0xfffffff;
pasid = (ihre->ring_id & 0xffff0000) >> 16;
@@ -53,9 +55,11 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
return;
if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)
- kfd_signal_event_interrupt(pasid, 0, 0);
+ kfd_signal_event_interrupt(pasid, context_id, 28);
+ else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP)
+ kfd_signal_event_interrupt(pasid, context_id, 28);
else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
- kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8);
+ kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
kfd_signal_hw_exception_event(pasid);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h
index 79a16d24c1b8..109298b9d507 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
@@ -32,9 +32,10 @@ struct cik_ih_ring_entry {
uint32_t reserved;
};
-#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
#define CIK_INTSRC_CP_END_OF_PIPE 0xB5
#define CIK_INTSRC_CP_BAD_OPCODE 0xB7
+#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
+#define CIK_INTSRC_SDMA_TRAP 0xE0
#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 0ef82b229754..505d39156acd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -450,8 +450,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
return -EINVAL;
}
- mutex_lock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex);
+ mutex_lock(kfd_get_dbgmgr_mutex());
/*
* make sure that we have pdd, if this the first queue created for
@@ -479,8 +479,8 @@ static int kfd_ioctl_dbg_register(struct file *filep,
}
out:
- mutex_unlock(&p->mutex);
mutex_unlock(kfd_get_dbgmgr_mutex());
+ mutex_unlock(&p->mutex);
return status;
}
@@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
void *data)
{
struct kfd_ioctl_wait_events_args *args = data;
- enum kfd_event_wait_result wait_result;
int err;
err = kfd_wait_on_events(p, args->num_events,
(void __user *)args->events_ptr,
(args->wait_for_all != 0),
- args->timeout, &wait_result);
-
- args->wait_result = wait_result;
+ args->timeout, &args->wait_result);
return err;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 46049f005b02..621a3b53a038 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -403,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
if (kfd->interrupts_active
&& interrupt_is_wanted(kfd, ih_ring_entry)
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
- schedule_work(&kfd->interrupt_work);
+ queue_work(kfd->ih_wq, &kfd->interrupt_work);
spin_unlock(&kfd->interrupt_lock);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index da3b74315acf..e202921c150e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
retval = unmap_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
- if (retval != 0) {
+ if (retval) {
pr_err("unmap queue failed\n");
goto out_unlock;
}
- } else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
- prev_active &&
+ } else if (prev_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
retval = mqd->destroy_mqd(mqd, q->mqd,
@@ -408,24 +407,25 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
- if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
- retval = map_queues_cpsch(dqm);
- else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
- q->properties.is_active &&
- (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
- q->properties.type == KFD_QUEUE_TYPE_SDMA))
- retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
- &q->properties, q->process->mm);
-
/*
- * check active state vs. the previous state
- * and modify counter accordingly
+ * check active state vs. the previous state and modify
+ * counter accordingly. map_queues_cpsch uses the
+ * dqm->queue_count to determine whether a new runlist must be
+ * uploaded.
*/
if (q->properties.is_active && !prev_active)
dqm->queue_count++;
else if (!q->properties.is_active && prev_active)
dqm->queue_count--;
+ if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
+ retval = map_queues_cpsch(dqm);
+ else if (q->properties.is_active &&
+ (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA))
+ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
+ &q->properties, q->process->mm);
+
out_unlock:
mutex_unlock(&dqm->lock);
return retval;
@@ -467,7 +467,7 @@ static int register_process(struct device_queue_manager *dqm,
mutex_lock(&dqm->lock);
list_add(&n->list, &dqm->queues);
- retval = dqm->ops_asic_specific.register_process(dqm, qpd);
+ retval = dqm->asic_ops.update_qpd(dqm, qpd);
dqm->processes_count++;
@@ -629,7 +629,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
- dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
+ dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
@@ -696,8 +696,6 @@ static int set_sched_resources(struct device_queue_manager *dqm)
static int initialize_cpsch(struct device_queue_manager *dqm)
{
- int retval;
-
pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
mutex_init(&dqm->lock);
@@ -706,11 +704,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->sdma_queue_count = 0;
dqm->active_runlist = false;
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
- retval = dqm->ops_asic_specific.initialize(dqm);
- if (retval)
- mutex_destroy(&dqm->lock);
- return retval;
+ return 0;
}
static int start_cpsch(struct device_queue_manager *dqm)
@@ -835,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
retval = allocate_sdma_queue(dqm, &q->sdma_id);
- if (retval != 0)
+ if (retval)
goto out;
q->properties.sdma_queue_id =
q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
@@ -850,7 +845,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
goto out;
}
- dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
+ dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
@@ -1095,7 +1090,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
qpd->sh_mem_ape1_limit = limit >> 16;
}
- retval = dqm->ops_asic_specific.set_cache_memory_policy(
+ retval = dqm->asic_ops.set_cache_memory_policy(
dqm,
qpd,
default_policy,
@@ -1270,11 +1265,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
switch (dev->device_info->asic_family) {
case CHIP_CARRIZO:
- device_queue_manager_init_vi(&dqm->ops_asic_specific);
+ device_queue_manager_init_vi(&dqm->asic_ops);
break;
case CHIP_KAVERI:
- device_queue_manager_init_cik(&dqm->ops_asic_specific);
+ device_queue_manager_init_cik(&dqm->asic_ops);
break;
default:
WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 31c2b1f9d320..5b77cb69f732 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -128,9 +128,8 @@ struct device_queue_manager_ops {
};
struct device_queue_manager_asic_ops {
- int (*register_process)(struct device_queue_manager *dqm,
+ int (*update_qpd)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
- int (*initialize)(struct device_queue_manager *dqm);
bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
@@ -156,7 +155,7 @@ struct device_queue_manager_asic_ops {
struct device_queue_manager {
struct device_queue_manager_ops ops;
- struct device_queue_manager_asic_ops ops_asic_specific;
+ struct device_queue_manager_asic_ops asic_ops;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
struct packet_manager packets;
@@ -179,8 +178,10 @@ struct device_queue_manager {
bool active_runlist;
};
-void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
-void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
+void device_queue_manager_init_cik(
+ struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_vi(
+ struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_queues_num(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 72c3cbabc0a7..28e48c90c596 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -32,18 +32,17 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
-static int register_process_cik(struct device_queue_manager *dqm,
+static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
-static int initialize_cpsch_cik(struct device_queue_manager *dqm);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
-void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops)
+void device_queue_manager_init_cik(
+ struct device_queue_manager_asic_ops *asic_ops)
{
- ops->set_cache_memory_policy = set_cache_memory_policy_cik;
- ops->register_process = register_process_cik;
- ops->initialize = initialize_cpsch_cik;
- ops->init_sdma_vm = init_sdma_vm;
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
+ asic_ops->update_qpd = update_qpd_cik;
+ asic_ops->init_sdma_vm = init_sdma_vm;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
@@ -99,7 +98,7 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
return true;
}
-static int register_process_cik(struct device_queue_manager *dqm,
+static int update_qpd_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
@@ -148,8 +147,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_vm_addr = value;
}
-
-static int initialize_cpsch_cik(struct device_queue_manager *dqm)
-{
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 40e9ddd096cd..2fbce57a2f21 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -33,18 +33,17 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
-static int register_process_vi(struct device_queue_manager *dqm,
+static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
-static int initialize_cpsch_vi(struct device_queue_manager *dqm);
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd);
-void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
+void device_queue_manager_init_vi(
+ struct device_queue_manager_asic_ops *asic_ops)
{
- ops->set_cache_memory_policy = set_cache_memory_policy_vi;
- ops->register_process = register_process_vi;
- ops->initialize = initialize_cpsch_vi;
- ops->init_sdma_vm = init_sdma_vm;
+ asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
+ asic_ops->update_qpd = update_qpd_vi;
+ asic_ops->init_sdma_vm = init_sdma_vm;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
@@ -104,7 +103,7 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
return true;
}
-static int register_process_vi(struct device_queue_manager *dqm,
+static int update_qpd_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
@@ -160,8 +159,3 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_vm_addr = value;
}
-
-static int initialize_cpsch_vi(struct device_queue_manager *dqm)
-{
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 944abfad39c1..cb92d4b72400 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -24,8 +24,8 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/uaccess.h>
-#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/memory.h>
#include "kfd_priv.h"
@@ -33,185 +33,89 @@
#include <linux/device.h>
/*
- * A task can only be on a single wait_queue at a time, but we need to support
- * waiting on multiple events (any/all).
- * Instead of each event simply having a wait_queue with sleeping tasks, it
- * has a singly-linked list of tasks.
- * A thread that wants to sleep creates an array of these, one for each event
- * and adds one to each event's waiter chain.
+ * Wrapper around wait_queue_entry_t
*/
struct kfd_event_waiter {
- struct list_head waiters;
- struct task_struct *sleeping_task;
-
- /* Transitions to true when the event this belongs to is signaled. */
- bool activated;
-
- /* Event */
- struct kfd_event *event;
- uint32_t input_index;
+ wait_queue_entry_t wait;
+ struct kfd_event *event; /* Event to wait for */
+ bool activated; /* Becomes true when event is signaled */
};
/*
- * Over-complicated pooled allocator for event notification slots.
- *
* Each signal event needs a 64-bit signal slot where the signaler will write
- * a 1 before sending an interrupt.l (This is needed because some interrupts
+ * a 1 before sending an interrupt. (This is needed because some interrupts
* do not contain enough spare data bits to identify an event.)
- * We get whole pages from vmalloc and map them to the process VA.
- * Individual signal events are then allocated a slot in a page.
+ * We get whole pages and map them to the process VA.
+ * Individual signal events use their event_id as slot index.
*/
-
-struct signal_page {
- struct list_head event_pages; /* kfd_process.signal_event_pages */
+struct kfd_signal_page {
uint64_t *kernel_address;
uint64_t __user *user_address;
- uint32_t page_index; /* Index into the mmap aperture. */
- unsigned int free_slots;
- unsigned long used_slot_bitmap[0];
};
-#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
-#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE)
-#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1)
-#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \
- SLOT_BITMAP_SIZE * sizeof(long))
-
-/*
- * For signal events, the event ID is used as the interrupt user data.
- * For SQ s_sendmsg interrupts, this is limited to 8 bits.
- */
-
-#define INTERRUPT_DATA_BITS 8
-#define SIGNAL_EVENT_ID_SLOT_SHIFT 0
-static uint64_t *page_slots(struct signal_page *page)
+static uint64_t *page_slots(struct kfd_signal_page *page)
{
return page->kernel_address;
}
-static bool allocate_free_slot(struct kfd_process *process,
- struct signal_page **out_page,
- unsigned int *out_slot_index)
-{
- struct signal_page *page;
-
- list_for_each_entry(page, &process->signal_event_pages, event_pages) {
- if (page->free_slots > 0) {
- unsigned int slot =
- find_first_zero_bit(page->used_slot_bitmap,
- SLOTS_PER_PAGE);
-
- __set_bit(slot, page->used_slot_bitmap);
- page->free_slots--;
-
- page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
-
- *out_page = page;
- *out_slot_index = slot;
-
- pr_debug("Allocated event signal slot in page %p, slot %d\n",
- page, slot);
-
- return true;
- }
- }
-
- pr_debug("No free event signal slots were found for process %p\n",
- process);
-
- return false;
-}
-
-#define list_tail_entry(head, type, member) \
- list_entry((head)->prev, type, member)
-
-static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
+static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
{
void *backing_store;
- struct signal_page *page;
+ struct kfd_signal_page *page;
- page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL);
+ page = kzalloc(sizeof(*page), GFP_KERNEL);
if (!page)
- goto fail_alloc_signal_page;
+ return NULL;
- page->free_slots = SLOTS_PER_PAGE;
-
- backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ backing_store = (void *) __get_free_pages(GFP_KERNEL,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
if (!backing_store)
goto fail_alloc_signal_store;
- /* prevent user-mode info leaks */
+ /* Initialize all events to unsignaled */
memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
- KFD_SIGNAL_EVENT_LIMIT * 8);
+ KFD_SIGNAL_EVENT_LIMIT * 8);
page->kernel_address = backing_store;
-
- if (list_empty(&p->signal_event_pages))
- page->page_index = 0;
- else
- page->page_index = list_tail_entry(&p->signal_event_pages,
- struct signal_page,
- event_pages)->page_index + 1;
-
pr_debug("Allocated new event signal page at %p, for process %p\n",
page, p);
- pr_debug("Page index is %d\n", page->page_index);
- list_add(&page->event_pages, &p->signal_event_pages);
-
- return true;
+ return page;
fail_alloc_signal_store:
kfree(page);
-fail_alloc_signal_page:
- return false;
+ return NULL;
}
-static bool allocate_event_notification_slot(struct file *devkfd,
- struct kfd_process *p,
- struct signal_page **page,
- unsigned int *signal_slot_index)
+static int allocate_event_notification_slot(struct kfd_process *p,
+ struct kfd_event *ev)
{
- bool ret;
+ int id;
- ret = allocate_free_slot(p, page, signal_slot_index);
- if (!ret) {
- ret = allocate_signal_page(devkfd, p);
- if (ret)
- ret = allocate_free_slot(p, page, signal_slot_index);
+ if (!p->signal_page) {
+ p->signal_page = allocate_signal_page(p);
+ if (!p->signal_page)
+ return -ENOMEM;
+ /* Oldest user mode expects 256 event slots */
+ p->signal_mapped_size = 256*8;
}
- return ret;
-}
-
-/* Assumes that the process's event_mutex is locked. */
-static void release_event_notification_slot(struct signal_page *page,
- size_t slot_index)
-{
- __clear_bit(slot_index, page->used_slot_bitmap);
- page->free_slots++;
-
- /* We don't free signal pages, they are retained by the process
- * and reused until it exits.
- */
-}
-
-static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
- unsigned int page_index)
-{
- struct signal_page *page;
-
/*
- * This is safe because we don't delete signal pages until the
- * process exits.
+ * Compatibility with old user mode: Only use signal slots
+ * user mode has mapped, may be less than
+ * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
+ * of the event limit without breaking user mode.
*/
- list_for_each_entry(page, &p->signal_event_pages, event_pages)
- if (page->page_index == page_index)
- return page;
+ id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
- return NULL;
+ ev->event_id = id;
+ page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
+
+ return 0;
}
/*
@@ -220,99 +124,81 @@ static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
*/
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{
- struct kfd_event *ev;
-
- hash_for_each_possible(p->events, ev, events, id)
- if (ev->event_id == id)
- return ev;
-
- return NULL;
+ return idr_find(&p->event_idr, id);
}
-static u32 make_signal_event_id(struct signal_page *page,
- unsigned int signal_slot_index)
-{
- return page->page_index |
- (signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT);
-}
-
-/*
- * Produce a kfd event id for a nonsignal event.
- * These are arbitrary numbers, so we do a sequential search through
- * the hash table for an unused number.
+/**
+ * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID
+ * @p: Pointer to struct kfd_process
+ * @id: ID to look up
+ * @bits: Number of valid bits in @id
+ *
+ * Finds the first signaled event with a matching partial ID. If no
+ * matching signaled event is found, returns NULL. In that case the
+ * caller should assume that the partial ID is invalid and do an
+ * exhaustive search of all siglaned events.
+ *
+ * If multiple events with the same partial ID signal at the same
+ * time, they will be found one interrupt at a time, not necessarily
+ * in the same order the interrupts occurred. As long as the number of
+ * interrupts is correct, all signaled events will be seen by the
+ * driver.
*/
-static u32 make_nonsignal_event_id(struct kfd_process *p)
+static struct kfd_event *lookup_signaled_event_by_partial_id(
+ struct kfd_process *p, uint32_t id, uint32_t bits)
{
- u32 id;
-
- for (id = p->next_nonsignal_event_id;
- id < KFD_LAST_NONSIGNAL_EVENT_ID &&
- lookup_event_by_id(p, id);
- id++)
- ;
+ struct kfd_event *ev;
- if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
+ if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT)
+ return NULL;
- /*
- * What if id == LAST_NONSIGNAL_EVENT_ID - 1?
- * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
- * the first loop fails immediately and we proceed with the
- * wraparound loop below.
- */
- p->next_nonsignal_event_id = id + 1;
+ /* Fast path for the common case that @id is not a partial ID
+ * and we only need a single lookup.
+ */
+ if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) {
+ if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
+ return NULL;
- return id;
+ return idr_find(&p->event_idr, id);
}
- for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
- id < KFD_LAST_NONSIGNAL_EVENT_ID &&
- lookup_event_by_id(p, id);
- id++)
- ;
-
+ /* General case for partial IDs: Iterate over all matching IDs
+ * and find the first one that has signaled.
+ */
+ for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) {
+ if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
+ continue;
- if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
- p->next_nonsignal_event_id = id + 1;
- return id;
+ ev = idr_find(&p->event_idr, id);
}
- p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
- return 0;
-}
-
-static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
- struct signal_page *page,
- unsigned int signal_slot)
-{
- return lookup_event_by_id(p, make_signal_event_id(page, signal_slot));
+ return ev;
}
static int create_signal_event(struct file *devkfd,
struct kfd_process *p,
struct kfd_event *ev)
{
- if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
+ int ret;
+
+ if (p->signal_mapped_size &&
+ p->signal_event_count == p->signal_mapped_size / 8) {
if (!p->signal_event_limit_reached) {
pr_warn("Signal event wasn't created because limit was reached\n");
p->signal_event_limit_reached = true;
}
- return -ENOMEM;
+ return -ENOSPC;
}
- if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page,
- &ev->signal_slot_index)) {
+ ret = allocate_event_notification_slot(p, ev);
+ if (ret) {
pr_warn("Signal event wasn't created because out of kernel memory\n");
- return -ENOMEM;
+ return ret;
}
p->signal_event_count++;
- ev->user_signal_address =
- &ev->signal_page->user_address[ev->signal_slot_index];
-
- ev->event_id = make_signal_event_id(ev->signal_page,
- ev->signal_slot_index);
-
+ ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
pr_debug("Signal event number %zu created with id %d, address %p\n",
p->signal_event_count, ev->event_id,
ev->user_signal_address);
@@ -320,16 +206,20 @@ static int create_signal_event(struct file *devkfd,
return 0;
}
-/*
- * No non-signal events are supported yet.
- * We create them as events that never signal.
- * Set event calls from user-mode are failed.
- */
static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
{
- ev->event_id = make_nonsignal_event_id(p);
- if (ev->event_id == 0)
- return -ENOMEM;
+ /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+ * intentional integer overflow to -1 without a compiler
+ * warning. idr_alloc treats a negative value as "maximum
+ * signed integer".
+ */
+ int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+ (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+ GFP_KERNEL);
+
+ if (id < 0)
+ return id;
+ ev->event_id = id;
return 0;
}
@@ -337,50 +227,47 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
void kfd_event_init_process(struct kfd_process *p)
{
mutex_init(&p->event_mutex);
- hash_init(p->events);
- INIT_LIST_HEAD(&p->signal_event_pages);
- p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+ idr_init(&p->event_idr);
+ p->signal_page = NULL;
p->signal_event_count = 0;
}
static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
{
- if (ev->signal_page) {
- release_event_notification_slot(ev->signal_page,
- ev->signal_slot_index);
- p->signal_event_count--;
- }
+ struct kfd_event_waiter *waiter;
- /*
- * Abandon the list of waiters. Individual waiting threads will
- * clean up their own data.
- */
- list_del(&ev->waiters);
+ /* Wake up pending waiters. They will return failure */
+ list_for_each_entry(waiter, &ev->wq.head, wait.entry)
+ waiter->event = NULL;
+ wake_up_all(&ev->wq);
+
+ if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
+ ev->type == KFD_EVENT_TYPE_DEBUG)
+ p->signal_event_count--;
- hash_del(&ev->events);
+ idr_remove(&p->event_idr, ev->event_id);
kfree(ev);
}
static void destroy_events(struct kfd_process *p)
{
struct kfd_event *ev;
- struct hlist_node *tmp;
- unsigned int hash_bkt;
+ uint32_t id;
- hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
+ idr_for_each_entry(&p->event_idr, ev, id)
destroy_event(p, ev);
+ idr_destroy(&p->event_idr);
}
/*
* We assume that the process is being destroyed and there is no need to
* unmap the pages or keep bookkeeping data in order.
*/
-static void shutdown_signal_pages(struct kfd_process *p)
+static void shutdown_signal_page(struct kfd_process *p)
{
- struct signal_page *page, *tmp;
+ struct kfd_signal_page *page = p->signal_page;
- list_for_each_entry_safe(page, tmp, &p->signal_event_pages,
- event_pages) {
+ if (page) {
free_pages((unsigned long)page->kernel_address,
get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
kfree(page);
@@ -390,7 +277,7 @@ static void shutdown_signal_pages(struct kfd_process *p)
void kfd_event_free_process(struct kfd_process *p)
{
destroy_events(p);
- shutdown_signal_pages(p);
+ shutdown_signal_page(p);
}
static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
@@ -419,7 +306,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
ev->auto_reset = auto_reset;
ev->signaled = false;
- INIT_LIST_HEAD(&ev->waiters);
+ init_waitqueue_head(&ev->wq);
*event_page_offset = 0;
@@ -430,10 +317,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
- *event_page_offset = (ev->signal_page->page_index |
- KFD_MMAP_EVENTS_MASK);
+ *event_page_offset = KFD_MMAP_EVENTS_MASK;
*event_page_offset <<= PAGE_SHIFT;
- *event_slot_index = ev->signal_slot_index;
+ *event_slot_index = ev->event_id;
}
break;
default:
@@ -442,8 +328,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
}
if (!ret) {
- hash_add(p->events, &ev->events, ev->event_id);
-
*event_id = ev->event_id;
*event_trigger_data = ev->event_id;
} else {
@@ -477,19 +361,18 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
static void set_event(struct kfd_event *ev)
{
struct kfd_event_waiter *waiter;
- struct kfd_event_waiter *next;
- /* Auto reset if the list is non-empty and we're waking someone. */
- ev->signaled = !ev->auto_reset || list_empty(&ev->waiters);
+ /* Auto reset if the list is non-empty and we're waking
+ * someone. waitqueue_active is safe here because we're
+ * protected by the p->event_mutex, which is also held when
+ * updating the wait queues in kfd_wait_on_events.
+ */
+ ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
- list_for_each_entry_safe(waiter, next, &ev->waiters, waiters) {
+ list_for_each_entry(waiter, &ev->wq.head, wait.entry)
waiter->activated = true;
- /* _init because free_waiters will call list_del */
- list_del_init(&waiter->waiters);
-
- wake_up_process(waiter->sleeping_task);
- }
+ wake_up_all(&ev->wq);
}
/* Assumes that p is current. */
@@ -538,13 +421,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{
- page_slots(ev->signal_page)[ev->signal_slot_index] =
- UNSIGNALED_EVENT_SLOT;
-}
-
-static bool is_slot_signaled(struct signal_page *page, unsigned int index)
-{
- return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT;
+ page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
}
static void set_event_from_interrupt(struct kfd_process *p,
@@ -559,7 +436,7 @@ static void set_event_from_interrupt(struct kfd_process *p,
void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits)
{
- struct kfd_event *ev;
+ struct kfd_event *ev = NULL;
/*
* Because we are called from arbitrary context (workqueue) as opposed
@@ -573,26 +450,46 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
mutex_lock(&p->event_mutex);
- if (valid_id_bits >= INTERRUPT_DATA_BITS) {
- /* Partial ID is a full ID. */
- ev = lookup_event_by_id(p, partial_id);
+ if (valid_id_bits)
+ ev = lookup_signaled_event_by_partial_id(p, partial_id,
+ valid_id_bits);
+ if (ev) {
set_event_from_interrupt(p, ev);
- } else {
+ } else if (p->signal_page) {
/*
- * Partial ID is in fact partial. For now we completely
- * ignore it, but we could use any bits we did receive to
- * search faster.
+ * Partial ID lookup failed. Assume that the event ID
+ * in the interrupt payload was invalid and do an
+ * exhaustive search of signaled events.
*/
- struct signal_page *page;
- unsigned int i;
-
- list_for_each_entry(page, &p->signal_event_pages, event_pages)
- for (i = 0; i < SLOTS_PER_PAGE; i++)
- if (is_slot_signaled(page, i)) {
- ev = lookup_event_by_page_slot(p,
- page, i);
+ uint64_t *slots = page_slots(p->signal_page);
+ uint32_t id;
+
+ if (valid_id_bits)
+ pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
+ partial_id, valid_id_bits);
+
+ if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+ /* With relatively few events, it's faster to
+ * iterate over the event IDR
+ */
+ idr_for_each_entry(&p->event_idr, ev, id) {
+ if (id >= KFD_SIGNAL_EVENT_LIMIT)
+ break;
+
+ if (slots[id] != UNSIGNALED_EVENT_SLOT)
+ set_event_from_interrupt(p, ev);
+ }
+ } else {
+ /* With relatively many events, it's faster to
+ * iterate over the signal slots and lookup
+ * only signaled events from the IDR.
+ */
+ for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+ if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+ ev = lookup_event_by_id(p, id);
set_event_from_interrupt(p, ev);
}
+ }
}
mutex_unlock(&p->event_mutex);
@@ -609,18 +506,16 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
GFP_KERNEL);
for (i = 0; (event_waiters) && (i < num_events) ; i++) {
- INIT_LIST_HEAD(&event_waiters[i].waiters);
- event_waiters[i].sleeping_task = current;
+ init_wait(&event_waiters[i].wait);
event_waiters[i].activated = false;
}
return event_waiters;
}
-static int init_event_waiter(struct kfd_process *p,
+static int init_event_waiter_get_status(struct kfd_process *p,
struct kfd_event_waiter *waiter,
- uint32_t event_id,
- uint32_t input_index)
+ uint32_t event_id)
{
struct kfd_event *ev = lookup_event_by_id(p, event_id);
@@ -628,38 +523,60 @@ static int init_event_waiter(struct kfd_process *p,
return -EINVAL;
waiter->event = ev;
- waiter->input_index = input_index;
waiter->activated = ev->signaled;
ev->signaled = ev->signaled && !ev->auto_reset;
- list_add(&waiter->waiters, &ev->waiters);
-
return 0;
}
-static bool test_event_condition(bool all, uint32_t num_events,
+static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter)
+{
+ struct kfd_event *ev = waiter->event;
+
+ /* Only add to the wait list if we actually need to
+ * wait on this event.
+ */
+ if (!waiter->activated)
+ add_wait_queue(&ev->wq, &waiter->wait);
+}
+
+/* test_event_condition - Test condition of events being waited for
+ * @all: Return completion only if all events have signaled
+ * @num_events: Number of events to wait for
+ * @event_waiters: Array of event waiters, one per event
+ *
+ * Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have
+ * signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all)
+ * events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of
+ * the events have been destroyed.
+ */
+static uint32_t test_event_condition(bool all, uint32_t num_events,
struct kfd_event_waiter *event_waiters)
{
uint32_t i;
uint32_t activated_count = 0;
for (i = 0; i < num_events; i++) {
+ if (!event_waiters[i].event)
+ return KFD_IOC_WAIT_RESULT_FAIL;
+
if (event_waiters[i].activated) {
if (!all)
- return true;
+ return KFD_IOC_WAIT_RESULT_COMPLETE;
activated_count++;
}
}
- return activated_count == num_events;
+ return activated_count == num_events ?
+ KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT;
}
/*
* Copy event specific data, if defined.
* Currently only memory exception events have additional data to copy to user
*/
-static bool copy_signaled_event_data(uint32_t num_events,
+static int copy_signaled_event_data(uint32_t num_events,
struct kfd_event_waiter *event_waiters,
struct kfd_event_data __user *data)
{
@@ -673,15 +590,15 @@ static bool copy_signaled_event_data(uint32_t num_events,
waiter = &event_waiters[i];
event = waiter->event;
if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) {
- dst = &data[waiter->input_index].memory_exception_data;
+ dst = &data[i].memory_exception_data;
src = &event->memory_exception_data;
if (copy_to_user(dst, src,
sizeof(struct kfd_hsa_memory_exception_data)))
- return false;
+ return -EFAULT;
}
}
- return true;
+ return 0;
}
@@ -710,7 +627,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
uint32_t i;
for (i = 0; i < num_events; i++)
- list_del(&waiters[i].waiters);
+ if (waiters[i].event)
+ remove_wait_queue(&waiters[i].event->wq,
+ &waiters[i].wait);
kfree(waiters);
}
@@ -718,38 +637,56 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms,
- enum kfd_event_wait_result *wait_result)
+ uint32_t *wait_result)
{
struct kfd_event_data __user *events =
(struct kfd_event_data __user *) data;
uint32_t i;
int ret = 0;
+
struct kfd_event_waiter *event_waiters = NULL;
long timeout = user_timeout_to_jiffies(user_timeout_ms);
- mutex_lock(&p->event_mutex);
-
event_waiters = alloc_event_waiters(num_events);
if (!event_waiters) {
ret = -ENOMEM;
- goto fail;
+ goto out;
}
+ mutex_lock(&p->event_mutex);
+
for (i = 0; i < num_events; i++) {
struct kfd_event_data event_data;
if (copy_from_user(&event_data, &events[i],
sizeof(struct kfd_event_data))) {
ret = -EFAULT;
- goto fail;
+ goto out_unlock;
}
- ret = init_event_waiter(p, &event_waiters[i],
- event_data.event_id, i);
+ ret = init_event_waiter_get_status(p, &event_waiters[i],
+ event_data.event_id);
if (ret)
- goto fail;
+ goto out_unlock;
}
+ /* Check condition once. */
+ *wait_result = test_event_condition(all, num_events, event_waiters);
+ if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) {
+ ret = copy_signaled_event_data(num_events,
+ event_waiters, events);
+ goto out_unlock;
+ } else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) {
+ /* This should not happen. Events shouldn't be
+ * destroyed while we're holding the event_mutex
+ */
+ goto out_unlock;
+ }
+
+ /* Add to wait lists if we need to wait. */
+ for (i = 0; i < num_events; i++)
+ init_event_waiter_add_to_waitlist(&event_waiters[i]);
+
mutex_unlock(&p->event_mutex);
while (true) {
@@ -771,62 +708,66 @@ int kfd_wait_on_events(struct kfd_process *p,
break;
}
- if (test_event_condition(all, num_events, event_waiters)) {
- if (copy_signaled_event_data(num_events,
- event_waiters, events))
- *wait_result = KFD_WAIT_COMPLETE;
- else
- *wait_result = KFD_WAIT_ERROR;
+ /* Set task state to interruptible sleep before
+ * checking wake-up conditions. A concurrent wake-up
+ * will put the task back into runnable state. In that
+ * case schedule_timeout will not put the task to
+ * sleep and we'll get a chance to re-check the
+ * updated conditions almost immediately. Otherwise,
+ * this race condition would lead to a soft hang or a
+ * very long sleep.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ *wait_result = test_event_condition(all, num_events,
+ event_waiters);
+ if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT)
break;
- }
- if (timeout <= 0) {
- *wait_result = KFD_WAIT_TIMEOUT;
+ if (timeout <= 0)
break;
- }
- timeout = schedule_timeout_interruptible(timeout);
+ timeout = schedule_timeout(timeout);
}
__set_current_state(TASK_RUNNING);
+ /* copy_signaled_event_data may sleep. So this has to happen
+ * after the task state is set back to RUNNING.
+ */
+ if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
+ ret = copy_signaled_event_data(num_events,
+ event_waiters, events);
+
mutex_lock(&p->event_mutex);
+out_unlock:
free_waiters(num_events, event_waiters);
mutex_unlock(&p->event_mutex);
-
- return ret;
-
-fail:
- if (event_waiters)
- free_waiters(num_events, event_waiters);
-
- mutex_unlock(&p->event_mutex);
-
- *wait_result = KFD_WAIT_ERROR;
+out:
+ if (ret)
+ *wait_result = KFD_IOC_WAIT_RESULT_FAIL;
+ else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL)
+ ret = -EIO;
return ret;
}
int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
{
-
- unsigned int page_index;
unsigned long pfn;
- struct signal_page *page;
+ struct kfd_signal_page *page;
+ int ret;
- /* check required size is logical */
- if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) !=
+ /* check required size doesn't exceed the allocated size */
+ if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) <
get_order(vma->vm_end - vma->vm_start)) {
pr_err("Event page mmap requested illegal size\n");
return -EINVAL;
}
- page_index = vma->vm_pgoff;
-
- page = lookup_signal_page_by_index(p, page_index);
+ page = p->signal_page;
if (!page) {
/* Probably KFD bug, but mmap is user-accessible. */
- pr_debug("Signal page could not be found for page_index %u\n",
- page_index);
+ pr_debug("Signal page could not be found\n");
return -EINVAL;
}
@@ -847,8 +788,12 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
page->user_address = (uint64_t __user *)vma->vm_start;
/* mapping the page to user process */
- return remap_pfn_range(vma, vma->vm_start, pfn,
+ ret = remap_pfn_range(vma, vma->vm_start, pfn,
vma->vm_end - vma->vm_start, vma->vm_page_prot);
+ if (!ret)
+ p->signal_mapped_size = vma->vm_end - vma->vm_start;
+
+ return ret;
}
/*
@@ -860,12 +805,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
{
struct kfd_hsa_memory_exception_data *ev_data;
struct kfd_event *ev;
- int bkt;
+ uint32_t id;
bool send_signal = true;
ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
- hash_for_each(p->events, bkt, ev, events)
+ id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+ idr_for_each_entry_continue(&p->event_idr, ev, id)
if (ev->type == type) {
send_signal = false;
dev_dbg(kfd_device,
@@ -904,14 +850,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
* running so the lookup function returns a locked process.
*/
struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+ struct mm_struct *mm;
if (!p)
return; /* Presumably process exited. */
+ /* Take a safe reference to the mm_struct, which may otherwise
+ * disappear even while the kfd_process is still referenced.
+ */
+ mm = get_task_mm(p->lead_thread);
+ if (!mm) {
+ mutex_unlock(&p->mutex);
+ return; /* Process is exiting */
+ }
+
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
- down_read(&p->mm->mmap_sem);
- vma = find_vma(p->mm, address);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
memory_exception_data.gpu_id = dev->id;
memory_exception_data.va = address;
@@ -937,7 +893,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
}
}
- up_read(&p->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
mutex_lock(&p->event_mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index 28f6838b1f4c..abca5bfebbff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -27,12 +27,17 @@
#include <linux/hashtable.h>
#include <linux/types.h>
#include <linux/list.h>
+#include <linux/wait.h>
#include "kfd_priv.h"
#include <uapi/linux/kfd_ioctl.h>
-#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
-#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
-#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
+/*
+ * IDR supports non-negative integer IDs. Small IDs are used for
+ * signal events to match their signal slot. Use the upper half of the
+ * ID space for non-signal events.
+ */
+#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
+#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
/*
* Written into kfd_signal_slot_t to indicate that the event is not signaled.
@@ -46,9 +51,6 @@ struct kfd_event_waiter;
struct signal_page;
struct kfd_event {
- /* All events in process, rooted at kfd_process.events. */
- struct hlist_node events;
-
u32 event_id;
bool signaled;
@@ -56,11 +58,9 @@ struct kfd_event {
int type;
- struct list_head waiters; /* List of kfd_event_waiter by waiters. */
+ wait_queue_head_t wq; /* List of event waiters. */
/* Only for signal events. */
- struct signal_page *signal_page;
- unsigned int signal_slot_index;
uint64_t __user *user_signal_address;
/* type specific data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 70b3a99cffc2..035c351f47c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -42,26 +42,26 @@
#include <linux/slab.h>
#include <linux/device.h>
+#include <linux/kfifo.h>
#include "kfd_priv.h"
-#define KFD_INTERRUPT_RING_SIZE 1024
+#define KFD_IH_NUM_ENTRIES 8192
static void interrupt_wq(struct work_struct *);
int kfd_interrupt_init(struct kfd_dev *kfd)
{
- void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
- kfd->device_info->ih_ring_entry_size,
- GFP_KERNEL);
- if (!interrupt_ring)
- return -ENOMEM;
-
- kfd->interrupt_ring = interrupt_ring;
- kfd->interrupt_ring_size =
- KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
- atomic_set(&kfd->interrupt_ring_wptr, 0);
- atomic_set(&kfd->interrupt_ring_rptr, 0);
+ int r;
+
+ r = kfifo_alloc(&kfd->ih_fifo,
+ KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
+ GFP_KERNEL);
+ if (r) {
+ dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
+ return r;
+ }
+ kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
spin_lock_init(&kfd->interrupt_lock);
INIT_WORK(&kfd->interrupt_work, interrupt_wq);
@@ -92,74 +92,47 @@ void kfd_interrupt_exit(struct kfd_dev *kfd)
spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
/*
- * Flush_scheduled_work ensures that there are no outstanding
+ * flush_work ensures that there are no outstanding
* work-queue items that will access interrupt_ring. New work items
* can't be created because we stopped interrupt handling above.
*/
- flush_scheduled_work();
+ flush_workqueue(kfd->ih_wq);
- kfree(kfd->interrupt_ring);
+ kfifo_free(&kfd->ih_fifo);
}
/*
- * This assumes that it can't be called concurrently with itself
- * but only with dequeue_ih_ring_entry.
+ * Assumption: single reader/writer. This function is not re-entrant
*/
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
{
- unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
- unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
+ int count;
- if ((rptr - wptr) % kfd->interrupt_ring_size ==
- kfd->device_info->ih_ring_entry_size) {
- /* This is very bad, the system is likely to hang. */
+ count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
+ kfd->device_info->ih_ring_entry_size);
+ if (count != kfd->device_info->ih_ring_entry_size) {
dev_err_ratelimited(kfd_chardev(),
- "Interrupt ring overflow, dropping interrupt.\n");
+ "Interrupt ring overflow, dropping interrupt %d\n",
+ count);
return false;
}
- memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
-
- wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
- kfd->interrupt_ring_size;
- smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
- atomic_set(&kfd->interrupt_ring_wptr, wptr);
-
return true;
}
/*
- * This assumes that it can't be called concurrently with itself
- * but only with enqueue_ih_ring_entry.
+ * Assumption: single reader/writer. This function is not re-entrant
*/
static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
{
- /*
- * Assume that wait queues have an implicit barrier, i.e. anything that
- * happened in the ISR before it queued work is visible.
- */
-
- unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
- unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
+ int count;
- if (rptr == wptr)
- return false;
-
- memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
- kfd->device_info->ih_ring_entry_size);
-
- rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
- kfd->interrupt_ring_size;
+ count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
+ kfd->device_info->ih_ring_entry_size);
- /*
- * Ensure the rptr write update is not visible until
- * memcpy has finished reading.
- */
- smp_mb();
- atomic_set(&kfd->interrupt_ring_rptr, rptr);
+ WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
- return true;
+ return count == kfd->device_info->ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 44ffd23348fc..4859d263fa2a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -189,12 +189,9 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
- q->is_active = false;
- if (q->queue_size > 0 &&
+ q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
- q->queue_percent > 0) {
- q->is_active = true;
- }
+ q->queue_percent > 0);
return 0;
}
@@ -215,24 +212,17 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
- m->sdma_rlc_doorbell = q->doorbell_off <<
- SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
- 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
+ m->sdma_rlc_doorbell =
+ q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
m->sdma_engine_id = q->sdma_engine_id;
m->sdma_queue_id = q->sdma_queue_id;
- q->is_active = false;
- if (q->queue_size > 0 &&
+ q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
- q->queue_percent > 0) {
- m->sdma_rlc_rb_cntl |=
- 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
-
- q->is_active = true;
- }
+ q->queue_percent > 0);
return 0;
}
@@ -359,19 +349,13 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
- m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
- DOORBELL_OFFSET(q->doorbell_off);
+ m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
m->cp_hqd_vmid = q->vmid;
- m->cp_hqd_active = 0;
- q->is_active = false;
- if (q->queue_size > 0 &&
+ q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
- q->queue_percent > 0) {
- m->cp_hqd_active = 1;
- q->is_active = true;
- }
+ q->queue_percent > 0);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 73cbfe186dd2..4ea854f9007b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -163,12 +163,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
}
- q->is_active = false;
- if (q->queue_size > 0 &&
+ q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
- q->queue_percent > 0) {
- q->is_active = true;
- }
+ q->queue_percent > 0);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7d86ec9790d3..9e4134c5b481 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -31,6 +31,8 @@
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/kfd_ioctl.h>
+#include <linux/idr.h>
+#include <linux/kfifo.h>
#include <kgd_kfd_interface.h>
#include "amd_shared.h"
@@ -181,10 +183,8 @@ struct kfd_dev {
unsigned int gtt_sa_num_of_chunks;
/* Interrupts */
- void *interrupt_ring;
- size_t interrupt_ring_size;
- atomic_t interrupt_ring_rptr;
- atomic_t interrupt_ring_wptr;
+ struct kfifo ih_fifo;
+ struct workqueue_struct *ih_wq;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
@@ -494,7 +494,12 @@ struct kfd_process {
*/
struct hlist_node kfd_processes;
- struct mm_struct *mm;
+ /*
+ * Opaque pointer to mm_struct. We don't hold a reference to
+ * it so it should never be dereferenced from here. This is
+ * only used for looking up processes by their mm.
+ */
+ void *mm;
struct mutex mutex;
@@ -502,6 +507,8 @@ struct kfd_process {
* In any process, the thread that started main() is the lead
* thread and outlives the rest.
* It is here because amd_iommu_bind_pasid wants a task_struct.
+ * It can also be used for safely getting a reference to the
+ * mm_struct of the process.
*/
struct task_struct *lead_thread;
@@ -522,22 +529,16 @@ struct kfd_process {
struct process_queue_manager pqm;
- /* The process's queues. */
- size_t queue_array_size;
-
- /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
- struct kfd_queue **queues;
-
/*Is the user space process 32 bit?*/
bool is_32bit_user_mode;
/* Event-related data */
struct mutex event_mutex;
- /* All events in process hashed by ID, linked on kfd_event.events. */
- DECLARE_HASHTABLE(events, 4);
- /* struct slot_page_header.event_pages */
- struct list_head signal_event_pages;
- u32 next_nonsignal_event_id;
+ /* Event ID allocator and lookup */
+ struct idr event_idr;
+ /* Event page */
+ struct kfd_signal_page *signal_page;
+ size_t signal_mapped_size;
size_t signal_event_count;
bool signal_event_limit_reached;
};
@@ -721,19 +722,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
extern const struct kfd_device_global_init_class device_global_init_class_cik;
-enum kfd_event_wait_result {
- KFD_WAIT_COMPLETE,
- KFD_WAIT_TIMEOUT,
- KFD_WAIT_ERROR
-};
-
void kfd_event_init_process(struct kfd_process *p);
void kfd_event_free_process(struct kfd_process *p);
int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
int kfd_wait_on_events(struct kfd_process *p,
uint32_t num_events, void __user *data,
bool all, uint32_t user_timeout_ms,
- enum kfd_event_wait_result *wait_result);
+ uint32_t *wait_result);
void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
uint32_t valid_id_bits);
void kfd_signal_iommu_event(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3ccb3b53216e..1f5ccd28bd41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -35,13 +35,6 @@ struct mm_struct;
#include "kfd_dbgmgr.h"
/*
- * Initial size for the array of queues.
- * The allocated size is doubled each time
- * it is exceeded up to MAX_PROCESS_QUEUES.
- */
-#define INITIAL_QUEUE_ARRAY_SIZE 16
-
-/*
* List of struct kfd_process (field kfd_process).
* Unique/indexed by mm_struct*
*/
@@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work)
mutex_destroy(&p->mutex);
- kfree(p->queues);
-
kfree(p);
kfree(work);
@@ -200,7 +191,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
struct kfd_process *p;
p = container_of(rcu, struct kfd_process, rcu);
- WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
mmdrop(p->mm);
@@ -234,17 +224,26 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
mutex_lock(&p->mutex);
+ /* Iterate over all process device data structures and if the
+ * pdd is in debug mode, we should first force unregistration,
+ * then we will be able to destroy the queues
+ */
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+ struct kfd_dev *dev = pdd->dev;
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+ if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
+ if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
+ kfd_dbgmgr_destroy(dev->dbgmgr);
+ dev->dbgmgr = NULL;
+ }
+ }
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+ }
+
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
- /* Iterate over all process device data structure and check
- * if we should delete debug managers
- */
- list_for_each_entry(pdd, &p->per_device_data, per_device_list)
- if ((pdd->dev->dbgmgr) &&
- (pdd->dev->dbgmgr->pasid == p->pasid))
- kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
-
mutex_unlock(&p->mutex);
/*
@@ -271,11 +270,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (!process)
goto err_alloc_process;
- process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
- sizeof(process->queues[0]), GFP_KERNEL);
- if (!process->queues)
- goto err_alloc_queues;
-
process->pasid = kfd_pasid_alloc();
if (process->pasid == 0)
goto err_alloc_pasid;
@@ -298,8 +292,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process->lead_thread = thread->group_leader;
- process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
-
INIT_LIST_HEAD(&process->per_device_data);
kfd_event_init_process(process);
@@ -328,8 +320,6 @@ err_mmu_notifier:
err_alloc_doorbells:
kfd_pasid_free(process->pasid);
err_alloc_pasid:
- kfree(process->queues);
-err_alloc_queues:
kfree(process);
err_alloc_process:
return ERR_PTR(err);
@@ -426,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
p->lead_thread);
if (err < 0) {
- pr_err("unexpected pasid %d binding failure\n",
+ pr_err("Unexpected pasid %d binding failure\n",
p->pasid);
mutex_unlock(&p->mutex);
break;
@@ -442,29 +432,25 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
}
/*
- * Temporarily unbind currently bound processes from the device and
- * mark them as PDD_BOUND_SUSPENDED. These processes will be restored
- * to PDD_BOUND state in kfd_bind_processes_to_device.
+ * Mark currently bound processes as PDD_BOUND_SUSPENDED. These
+ * processes will be restored to PDD_BOUND state in
+ * kfd_bind_processes_to_device.
*/
void kfd_unbind_processes_from_device(struct kfd_dev *dev)
{
struct kfd_process_device *pdd;
struct kfd_process *p;
- unsigned int temp, temp_bound, temp_pasid;
+ unsigned int temp;
int idx = srcu_read_lock(&kfd_processes_srcu);
hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
mutex_lock(&p->mutex);
pdd = kfd_get_process_device_data(dev, p);
- temp_bound = pdd->bound;
- temp_pasid = p->pasid;
+
if (pdd->bound == PDD_BOUND)
pdd->bound = PDD_BOUND_SUSPENDED;
mutex_unlock(&p->mutex);
-
- if (temp_bound == PDD_BOUND)
- amd_iommu_unbind_pasid(dev->pdev, temp_pasid);
}
srcu_read_unlock(&kfd_processes_srcu, idx);
@@ -486,8 +472,16 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
pr_debug("Unbinding process %d from IOMMU\n", pasid);
- if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
- kfd_dbgmgr_destroy(dev->dbgmgr);
+ mutex_lock(kfd_get_dbgmgr_mutex());
+
+ if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) {
+ if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) {
+ kfd_dbgmgr_destroy(dev->dbgmgr);
+ dev->dbgmgr = NULL;
+ }
+ }
+
+ mutex_unlock(kfd_get_dbgmgr_mutex());
pdd = kfd_get_process_device_data(dev, p);
if (pdd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5129dc139219..2bec902fc939 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -177,7 +177,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if (retval != 0)
return retval;
- if (list_empty(&pqm->queues)) {
+ if (list_empty(&pdd->qpd.queues_list) &&
+ list_empty(&pdd->qpd.priv_queue_list)) {
pdd->qpd.pqm = pqm;
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
}
@@ -248,7 +249,8 @@ err_create_queue:
err_allocate_pqn:
/* check if queues list is empty unregister process from device */
clear_bit(*qid, pqm->queue_slot_bitmap);
- if (list_empty(&pqm->queues))
+ if (list_empty(&pdd->qpd.queues_list) &&
+ list_empty(&pdd->qpd.priv_queue_list))
dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
return retval;
}
@@ -302,7 +304,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
kfree(pqn);
clear_bit(qid, pqm->queue_slot_bitmap);
- if (list_empty(&pqm->queues))
+ if (list_empty(&pdd->qpd.queues_list) &&
+ list_empty(&pdd->qpd.priv_queue_list))
dqm->ops.unregister_process(dqm, &pdd->qpd);
return retval;
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index be16c6390216..cf3e5985e3e7 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -102,8 +102,7 @@ radeon-y += \
radeon-y += \
radeon_vce.o \
vce_v1_0.o \
- vce_v2_0.o \
- radeon_kfd.o
+ vce_v2_0.o
radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
radeon-$(CONFIG_ACPI) += radeon_acpi.o
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 3cb6c55b268d..898f9a078830 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -33,7 +33,6 @@
#include "cik_blit_shaders.h"
#include "radeon_ucode.h"
#include "clearstate_ci.h"
-#include "radeon_kfd.h"
#define SH_MEM_CONFIG_GFX_DEFAULT \
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
@@ -5684,10 +5683,9 @@ int cik_vm_init(struct radeon_device *rdev)
/*
* number of VMs
* VMID 0 is reserved for System
- * radeon graphics/compute will use VMIDs 1-7
- * amdkfd will use VMIDs 8-15
+ * radeon graphics/compute will use VMIDs 1-15
*/
- rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
+ rdev->vm_manager.nvm = 16;
/* base offset of vram pages */
if (rdev->flags & RADEON_IS_IGP) {
u64 tmp = RREG32(MC_VM_FB_OFFSET);
@@ -7589,9 +7587,6 @@ restart_ih:
/* wptr/rptr are in bytes! */
ring_index = rptr / 4;
- radeon_kfd_interrupt(rdev,
- (const void *) &rdev->ih.ring[ring_index]);
-
src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
@@ -8486,10 +8481,6 @@ static int cik_startup(struct radeon_device *rdev)
if (r)
return r;
- r = radeon_kfd_resume(rdev);
- if (r)
- return r;
-
return 0;
}
@@ -8538,7 +8529,6 @@ int cik_resume(struct radeon_device *rdev)
*/
int cik_suspend(struct radeon_device *rdev)
{
- radeon_kfd_suspend(rdev);
radeon_pm_suspend(rdev);
radeon_audio_fini(rdev);
radeon_vm_manager_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index e21015475ed5..cda16fcd43bb 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -30,8 +30,6 @@
#define CIK_RB_BITMAP_WIDTH_PER_SH 2
#define HAWAII_RB_BITMAP_WIDTH_PER_SH 4
-#define RADEON_NUM_OF_VMIDS 8
-
/* DIDT IND registers */
#define DIDT_SQ_CTRL0 0x0
# define DIDT_CTRL_EN (1 << 0)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8cbaeec090c9..a8e546569858 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2456,9 +2456,6 @@ struct radeon_device {
u64 vram_pin_size;
u64 gart_pin_size;
- /* amdkfd interface */
- struct kfd_dev *kfd;
-
struct mutex mn_lock;
DECLARE_HASHTABLE(mn_hash, 7);
};
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index f4becad0a78c..31dd04f6baa1 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -43,7 +43,6 @@
#include <drm/drm_fb_helper.h>
#include <drm/drm_crtc_helper.h>
-#include "radeon_kfd.h"
/*
* KMS wrapper.
@@ -338,14 +337,6 @@ static int radeon_pci_probe(struct pci_dev *pdev,
{
int ret;
- /*
- * Initialize amdkfd before starting radeon. If it was not loaded yet,
- * defer radeon probing
- */
- ret = radeon_kfd_init();
- if (ret == -EPROBE_DEFER)
- return ret;
-
if (vga_switcheroo_client_probe_defer(pdev))
return -EPROBE_DEFER;
@@ -645,7 +636,6 @@ static int __init radeon_init(void)
static void __exit radeon_exit(void)
{
- radeon_kfd_fini();
pci_unregister_driver(pdriver);
radeon_unregister_atpx_handler();
}
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
deleted file mode 100644
index 385b4d76956d..000000000000
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ /dev/null
@@ -1,901 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/fdtable.h>
-#include <linux/uaccess.h>
-#include <drm/drmP.h>
-#include "radeon.h"
-#include "cikd.h"
-#include "cik_reg.h"
-#include "radeon_kfd.h"
-#include "radeon_ucode.h"
-#include <linux/firmware.h>
-#include "cik_structs.h"
-
-#define CIK_PIPE_PER_MEC (4)
-
-static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
- TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
- TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
- TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
- TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
-};
-
-struct kgd_mem {
- struct radeon_bo *bo;
- uint64_t gpu_addr;
- void *cpu_ptr;
-};
-
-
-static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
- void **mem_obj, uint64_t *gpu_addr,
- void **cpu_ptr);
-
-static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
-
-static uint64_t get_vmem_size(struct kgd_dev *kgd);
-static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
-
-static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
-
-static int alloc_pasid(unsigned int bits);
-static void free_pasid(unsigned int pasid);
-
-static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
-
-/*
- * Register access functions
- */
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
- uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
-
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid);
-
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm);
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id);
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int timeout);
-static int kgd_address_watch_disable(struct kgd_dev *kgd);
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo);
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd);
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset);
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
-
-static const struct kfd2kgd_calls kfd2kgd = {
- .init_gtt_mem_allocation = alloc_gtt_mem,
- .free_gtt_mem = free_gtt_mem,
- .get_vmem_size = get_vmem_size,
- .get_gpu_clock_counter = get_gpu_clock_counter,
- .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
- .alloc_pasid = alloc_pasid,
- .free_pasid = free_pasid,
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
- .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
- .address_watch_disable = kgd_address_watch_disable,
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
- .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
- .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
- .write_vmid_invalidate_request = write_vmid_invalidate_request,
- .get_fw_version = get_fw_version
-};
-
-static const struct kgd2kfd_calls *kgd2kfd;
-
-int radeon_kfd_init(void)
-{
- int ret;
-
-#if defined(CONFIG_HSA_AMD_MODULE)
- int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
-
- kgd2kfd_init_p = symbol_request(kgd2kfd_init);
-
- if (kgd2kfd_init_p == NULL)
- return -ENOENT;
-
- ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
- if (ret) {
- symbol_put(kgd2kfd_init);
- kgd2kfd = NULL;
- }
-
-#elif defined(CONFIG_HSA_AMD)
- ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
- if (ret)
- kgd2kfd = NULL;
-
-#else
- ret = -ENOENT;
-#endif
-
- return ret;
-}
-
-void radeon_kfd_fini(void)
-{
- if (kgd2kfd) {
- kgd2kfd->exit();
- symbol_put(kgd2kfd_init);
- }
-}
-
-void radeon_kfd_device_probe(struct radeon_device *rdev)
-{
- if (kgd2kfd)
- rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
- rdev->pdev, &kfd2kgd);
-}
-
-void radeon_kfd_device_init(struct radeon_device *rdev)
-{
- int i, queue, pipe, mec;
-
- if (rdev->kfd) {
- struct kgd2kfd_shared_resources gpu_resources = {
- .compute_vmid_bitmap = 0xFF00,
- .num_pipe_per_mec = 4,
- .num_queue_per_pipe = 8
- };
-
- bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES);
-
- for (i = 0; i < KGD_MAX_QUEUES; ++i) {
- queue = i % gpu_resources.num_queue_per_pipe;
- pipe = (i / gpu_resources.num_queue_per_pipe)
- % gpu_resources.num_pipe_per_mec;
- mec = (i / gpu_resources.num_queue_per_pipe)
- / gpu_resources.num_pipe_per_mec;
-
- if (mec == 0 && pipe > 0)
- set_bit(i, gpu_resources.queue_bitmap);
- }
-
- radeon_doorbell_get_kfd_info(rdev,
- &gpu_resources.doorbell_physical_address,
- &gpu_resources.doorbell_aperture_size,
- &gpu_resources.doorbell_start_offset);
-
- kgd2kfd->device_init(rdev->kfd, &gpu_resources);
- }
-}
-
-void radeon_kfd_device_fini(struct radeon_device *rdev)
-{
- if (rdev->kfd) {
- kgd2kfd->device_exit(rdev->kfd);
- rdev->kfd = NULL;
- }
-}
-
-void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
-{
- if (rdev->kfd)
- kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
-}
-
-void radeon_kfd_suspend(struct radeon_device *rdev)
-{
- if (rdev->kfd)
- kgd2kfd->suspend(rdev->kfd);
-}
-
-int radeon_kfd_resume(struct radeon_device *rdev)
-{
- int r = 0;
-
- if (rdev->kfd)
- r = kgd2kfd->resume(rdev->kfd);
-
- return r;
-}
-
-static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
- void **mem_obj, uint64_t *gpu_addr,
- void **cpu_ptr)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
- struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
- int r;
-
- BUG_ON(kgd == NULL);
- BUG_ON(gpu_addr == NULL);
- BUG_ON(cpu_ptr == NULL);
-
- *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
- if ((*mem) == NULL)
- return -ENOMEM;
-
- r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
- RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
- if (r) {
- dev_err(rdev->dev,
- "failed to allocate BO for amdkfd (%d)\n", r);
- return r;
- }
-
- /* map the buffer */
- r = radeon_bo_reserve((*mem)->bo, true);
- if (r) {
- dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
- goto allocate_mem_reserve_bo_failed;
- }
-
- r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
- &(*mem)->gpu_addr);
- if (r) {
- dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
- goto allocate_mem_pin_bo_failed;
- }
- *gpu_addr = (*mem)->gpu_addr;
-
- r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
- if (r) {
- dev_err(rdev->dev,
- "(%d) failed to map bo to kernel for amdkfd\n", r);
- goto allocate_mem_kmap_bo_failed;
- }
- *cpu_ptr = (*mem)->cpu_ptr;
-
- radeon_bo_unreserve((*mem)->bo);
-
- return 0;
-
-allocate_mem_kmap_bo_failed:
- radeon_bo_unpin((*mem)->bo);
-allocate_mem_pin_bo_failed:
- radeon_bo_unreserve((*mem)->bo);
-allocate_mem_reserve_bo_failed:
- radeon_bo_unref(&(*mem)->bo);
-
- return r;
-}
-
-static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
-{
- struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
-
- BUG_ON(mem == NULL);
-
- radeon_bo_reserve(mem->bo, true);
- radeon_bo_kunmap(mem->bo);
- radeon_bo_unpin(mem->bo);
- radeon_bo_unreserve(mem->bo);
- radeon_bo_unref(&(mem->bo));
- kfree(mem);
-}
-
-static uint64_t get_vmem_size(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- BUG_ON(kgd == NULL);
-
- return rdev->mc.real_vram_size;
-}
-
-static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- return rdev->asic->get_gpu_clock_counter(rdev);
-}
-
-static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = (struct radeon_device *)kgd;
-
- /* The sclk is in quantas of 10kHz */
- return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
-}
-
-/*
- * PASID manager
- */
-static DEFINE_IDA(pasid_ida);
-
-static int alloc_pasid(unsigned int bits)
-{
- int pasid = -EINVAL;
-
- for (bits = min(bits, 31U); bits > 0; bits--) {
- pasid = ida_simple_get(&pasid_ida,
- 1U << (bits - 1), 1U << bits,
- GFP_KERNEL);
- if (pasid != -ENOSPC)
- break;
- }
-
- return pasid;
-}
-
-static void free_pasid(unsigned int pasid)
-{
- ida_simple_remove(&pasid_ida, pasid);
-}
-
-static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
-{
- return (struct radeon_device *)kgd;
-}
-
-static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
-{
- struct radeon_device *rdev = get_radeon_device(kgd);
-
- writel(value, (void __iomem *)(rdev->rmmio + offset));
-}
-
-static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
-{
- struct radeon_device *rdev = get_radeon_device(kgd);
-
- return readl((void __iomem *)(rdev->rmmio + offset));
-}
-
-static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
- uint32_t queue, uint32_t vmid)
-{
- struct radeon_device *rdev = get_radeon_device(kgd);
- uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
-
- mutex_lock(&rdev->srbm_mutex);
- write_register(kgd, SRBM_GFX_CNTL, value);
-}
-
-static void unlock_srbm(struct kgd_dev *kgd)
-{
- struct radeon_device *rdev = get_radeon_device(kgd);
-
- write_register(kgd, SRBM_GFX_CNTL, 0);
- mutex_unlock(&rdev->srbm_mutex);
-}
-
-static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t queue_id)
-{
- uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
- uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
- lock_srbm(kgd, mec, pipe, queue_id, 0);
-}
-
-static void release_queue(struct kgd_dev *kgd)
-{
- unlock_srbm(kgd);
-}
-
-static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config,
- uint32_t sh_mem_ape1_base,
- uint32_t sh_mem_ape1_limit,
- uint32_t sh_mem_bases)
-{
- lock_srbm(kgd, 0, 0, 0, vmid);
-
- write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
- write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
- write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
- write_register(kgd, SH_MEM_BASES, sh_mem_bases);
-
- unlock_srbm(kgd);
-}
-
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
- unsigned int vmid)
-{
- /*
- * We have to assume that there is no outstanding mapping.
- * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
- * because a mapping is in progress or because a mapping finished and
- * the SW cleared it.
- * So the protocol is to always wait & clear.
- */
- uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
- ATC_VMID_PASID_MAPPING_VALID_MASK;
-
- write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
- pasid_mapping);
-
- while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
- (1U << vmid)))
- cpu_relax();
- write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
-
- /* Mapping vmid to pasid also for IH block */
- write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
- pasid_mapping);
-
- return 0;
-}
-
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* nothing to do here */
- return 0;
-}
-
-static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
-{
- uint32_t mec;
- uint32_t pipe;
-
- mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
- pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
- lock_srbm(kgd, mec, pipe, 0, 0);
-
- write_register(kgd, CPC_INT_CNTL,
- TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
-
- unlock_srbm(kgd);
-
- return 0;
-}
-
-static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
-{
- uint32_t retval;
-
- retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
- m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
-
- pr_debug("kfd: sdma base address: 0x%x\n", retval);
-
- return retval;
-}
-
-static inline struct cik_mqd *get_mqd(void *mqd)
-{
- return (struct cik_mqd *)mqd;
-}
-
-static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
-{
- return (struct cik_sdma_rlc_registers *)mqd;
-}
-
-static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr,
- uint32_t wptr_shift, uint32_t wptr_mask,
- struct mm_struct *mm)
-{
- uint32_t wptr_shadow, is_wptr_shadow_valid;
- struct cik_mqd *m;
-
- m = get_mqd(mqd);
-
- is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-
- acquire_queue(kgd, pipe_id, queue_id);
- write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
- write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
- write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
-
- write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
- write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
- write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-
- write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
- write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
- write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
-
- write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
-
- write_register(kgd, CP_HQD_PERSISTENT_STATE,
- m->cp_hqd_persistent_state);
- write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
- write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
-
- write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
- m->cp_hqd_atomic0_preop_lo);
-
- write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
- m->cp_hqd_atomic0_preop_hi);
-
- write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
- m->cp_hqd_atomic1_preop_lo);
-
- write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
- m->cp_hqd_atomic1_preop_hi);
-
- write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
- m->cp_hqd_pq_rptr_report_addr_lo);
-
- write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
- m->cp_hqd_pq_rptr_report_addr_hi);
-
- write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
-
- write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
- m->cp_hqd_pq_wptr_poll_addr_lo);
-
- write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
- m->cp_hqd_pq_wptr_poll_addr_hi);
-
- write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
- m->cp_hqd_pq_doorbell_control);
-
- write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
-
- write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
-
- write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
- write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-
- write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
-
- if (is_wptr_shadow_valid)
- write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
-
- write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
- release_queue(kgd);
-
- return 0;
-}
-
-static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
-{
- struct cik_sdma_rlc_registers *m;
- uint32_t sdma_base_addr;
-
- m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
-
- write_register(kgd,
- sdma_base_addr + SDMA0_RLC0_VIRTUAL_ADDR,
- m->sdma_rlc_virtual_addr);
-
- write_register(kgd,
- sdma_base_addr + SDMA0_RLC0_RB_BASE,
- m->sdma_rlc_rb_base);
-
- write_register(kgd,
- sdma_base_addr + SDMA0_RLC0_RB_BASE_HI,
- m->sdma_rlc_rb_base_hi);
-
- write_register(kgd,
- sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_LO,
- m->sdma_rlc_rb_rptr_addr_lo);
-
- write_register(kgd,
- sdma_base_addr + SDMA0_RLC0_RB_RPTR_ADDR_HI,
- m->sdma_rlc_rb_rptr_addr_hi);
-
- write_register(kgd,
- sdma_base_addr + SDMA0_RLC0_DOORBELL,
- m->sdma_rlc_doorbell);
-
- write_register(kgd,
- sdma_base_addr + SDMA0_RLC0_RB_CNTL,
- m->sdma_rlc_rb_cntl);
-
- return 0;
-}
-
-static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
-{
- uint32_t act;
- bool retval = false;
- uint32_t low, high;
-
- acquire_queue(kgd, pipe_id, queue_id);
- act = read_register(kgd, CP_HQD_ACTIVE);
- if (act) {
- low = lower_32_bits(queue_address >> 8);
- high = upper_32_bits(queue_address >> 8);
-
- if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
- high == read_register(kgd, CP_HQD_PQ_BASE_HI))
- retval = true;
- }
- release_queue(kgd);
- return retval;
-}
-
-static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
-{
- struct cik_sdma_rlc_registers *m;
- uint32_t sdma_base_addr;
- uint32_t sdma_rlc_rb_cntl;
-
- m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
-
- sdma_rlc_rb_cntl = read_register(kgd,
- sdma_base_addr + SDMA0_RLC0_RB_CNTL);
-
- if (sdma_rlc_rb_cntl & SDMA_RB_ENABLE)
- return true;
-
- return false;
-}
-
-static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id)
-{
- uint32_t temp;
-
- acquire_queue(kgd, pipe_id, queue_id);
- write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
-
- write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
-
- while (true) {
- temp = read_register(kgd, CP_HQD_ACTIVE);
- if (temp & 0x1)
- break;
- if (timeout == 0) {
- pr_err("kfd: cp queue preemption time out (%dms)\n",
- temp);
- release_queue(kgd);
- return -ETIME;
- }
- msleep(20);
- timeout -= 20;
- }
-
- release_queue(kgd);
- return 0;
-}
-
-static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int timeout)
-{
- struct cik_sdma_rlc_registers *m;
- uint32_t sdma_base_addr;
- uint32_t temp;
-
- m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
-
- temp = read_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL);
- temp = temp & ~SDMA_RB_ENABLE;
- write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_CNTL, temp);
-
- while (true) {
- temp = read_register(kgd, sdma_base_addr +
- SDMA0_RLC0_CONTEXT_STATUS);
- if (temp & SDMA_RLC_IDLE)
- break;
- if (timeout == 0)
- return -ETIME;
- msleep(20);
- timeout -= 20;
- }
-
- write_register(kgd, sdma_base_addr + SDMA0_RLC0_DOORBELL, 0);
- write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_RPTR, 0);
- write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_WPTR, 0);
- write_register(kgd, sdma_base_addr + SDMA0_RLC0_RB_BASE, 0);
-
- return 0;
-}
-
-static int kgd_address_watch_disable(struct kgd_dev *kgd)
-{
- union TCP_WATCH_CNTL_BITS cntl;
- unsigned int i;
-
- cntl.u32All = 0;
-
- cntl.bitfields.valid = 0;
- cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
- cntl.bitfields.atc = 1;
-
- /* Turning off this address until we set all the registers */
- for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
- write_register(kgd,
- watchRegs[i * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL],
- cntl.u32All);
-
- return 0;
-}
-
-static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- uint32_t cntl_val,
- uint32_t addr_hi,
- uint32_t addr_lo)
-{
- union TCP_WATCH_CNTL_BITS cntl;
-
- cntl.u32All = cntl_val;
-
- /* Turning off this watch point until we set all the registers */
- cntl.bitfields.valid = 0;
- write_register(kgd,
- watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL],
- cntl.u32All);
-
- write_register(kgd,
- watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_ADDR_HI],
- addr_hi);
-
- write_register(kgd,
- watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_ADDR_LO],
- addr_lo);
-
- /* Enable the watch point */
- cntl.bitfields.valid = 1;
-
- write_register(kgd,
- watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
- ADDRESS_WATCH_REG_CNTL],
- cntl.u32All);
-
- return 0;
-}
-
-static int kgd_wave_control_execute(struct kgd_dev *kgd,
- uint32_t gfx_index_val,
- uint32_t sq_cmd)
-{
- struct radeon_device *rdev = get_radeon_device(kgd);
- uint32_t data;
-
- mutex_lock(&rdev->grbm_idx_mutex);
-
- write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
- write_register(kgd, SQ_CMD, sq_cmd);
-
- /* Restore the GRBM_GFX_INDEX register */
-
- data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
- SE_BROADCAST_WRITES;
-
- write_register(kgd, GRBM_GFX_INDEX, data);
-
- mutex_unlock(&rdev->grbm_idx_mutex);
-
- return 0;
-}
-
-static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
-{
- return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]
- / 4;
-}
-
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
-{
- uint32_t reg;
- struct radeon_device *rdev = (struct radeon_device *) kgd;
-
- reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
- return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid)
-{
- uint32_t reg;
- struct radeon_device *rdev = (struct radeon_device *) kgd;
-
- reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
- return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
-}
-
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
- struct radeon_device *rdev = (struct radeon_device *) kgd;
-
- return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
-}
-
-static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
-{
- struct radeon_device *rdev = (struct radeon_device *) kgd;
- const union radeon_firmware_header *hdr;
-
- BUG_ON(kgd == NULL || rdev->mec_fw == NULL);
-
- switch (type) {
- case KGD_ENGINE_PFP:
- hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data;
- break;
-
- case KGD_ENGINE_ME:
- hdr = (const union radeon_firmware_header *) rdev->me_fw->data;
- break;
-
- case KGD_ENGINE_CE:
- hdr = (const union radeon_firmware_header *) rdev->ce_fw->data;
- break;
-
- case KGD_ENGINE_MEC1:
- hdr = (const union radeon_firmware_header *) rdev->mec_fw->data;
- break;
-
- case KGD_ENGINE_MEC2:
- hdr = (const union radeon_firmware_header *)
- rdev->mec2_fw->data;
- break;
-
- case KGD_ENGINE_RLC:
- hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
- break;
-
- case KGD_ENGINE_SDMA1:
- case KGD_ENGINE_SDMA2:
- hdr = (const union radeon_firmware_header *)
- rdev->sdma_fw->data;
- break;
-
- default:
- return 0;
- }
-
- if (hdr == NULL)
- return 0;
-
- /* Only 12 bit in use*/
- return hdr->common.ucode_version;
-}
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/radeon/radeon_kfd.h
deleted file mode 100644
index 9df1fea8e971..000000000000
--- a/drivers/gpu/drm/radeon/radeon_kfd.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * radeon_kfd.h defines the private interface between the
- * AMD kernel graphics drivers and the AMD KFD.
- */
-
-#ifndef RADEON_KFD_H_INCLUDED
-#define RADEON_KFD_H_INCLUDED
-
-#include <linux/types.h>
-#include "kgd_kfd_interface.h"
-
-struct radeon_device;
-
-int radeon_kfd_init(void);
-void radeon_kfd_fini(void);
-
-void radeon_kfd_suspend(struct radeon_device *rdev);
-int radeon_kfd_resume(struct radeon_device *rdev);
-void radeon_kfd_interrupt(struct radeon_device *rdev,
- const void *ih_ring_entry);
-void radeon_kfd_device_probe(struct radeon_device *rdev);
-void radeon_kfd_device_init(struct radeon_device *rdev);
-void radeon_kfd_device_fini(struct radeon_device *rdev);
-
-#endif /* RADEON_KFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index dfee8f7d94ae..cde037f213d7 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -34,8 +34,6 @@
#include <linux/slab.h>
#include <linux/pm_runtime.h>
-#include "radeon_kfd.h"
-
#if defined(CONFIG_VGA_SWITCHEROO)
bool radeon_has_atpx(void);
#else
@@ -68,8 +66,6 @@ void radeon_driver_unload_kms(struct drm_device *dev)
pm_runtime_forbid(dev->dev);
}
- radeon_kfd_device_fini(rdev);
-
radeon_acpi_fini(rdev);
radeon_modeset_fini(rdev);
@@ -174,9 +170,6 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
"Error during ACPI methods call\n");
}
- radeon_kfd_device_probe(rdev);
- radeon_kfd_device_init(rdev);
-
if (radeon_is_px(dev)) {
pm_runtime_use_autosuspend(dev->dev);
pm_runtime_set_autosuspend_delay(dev->dev, 5000);