summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c65
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c16
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c24
5 files changed, 64 insertions, 47 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 16a57b70cc1a..4a416231b24c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -468,6 +468,7 @@ static const struct kfd_device_info navi10_device_info = {
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = true,
+ .no_atomic_fw_version = 145,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -487,6 +488,7 @@ static const struct kfd_device_info navi12_device_info = {
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = true,
+ .no_atomic_fw_version = 145,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -506,6 +508,7 @@ static const struct kfd_device_info navi14_device_info = {
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = true,
+ .no_atomic_fw_version = 145,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -525,6 +528,7 @@ static const struct kfd_device_info sienna_cichlid_device_info = {
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = true,
+ .no_atomic_fw_version = 92,
.num_sdma_engines = 4,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -544,6 +548,7 @@ static const struct kfd_device_info navy_flounder_device_info = {
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = true,
+ .no_atomic_fw_version = 92,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -562,7 +567,8 @@ static const struct kfd_device_info vangogh_device_info = {
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
- .needs_pci_atomics = false,
+ .needs_pci_atomics = true,
+ .no_atomic_fw_version = 92,
.num_sdma_engines = 1,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
@@ -582,6 +588,7 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = {
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = true,
+ .no_atomic_fw_version = 92,
.num_sdma_engines = 2,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -601,6 +608,7 @@ static const struct kfd_device_info beige_goby_device_info = {
.needs_iommu_device = false,
.supports_cwsr = true,
.needs_pci_atomics = true,
+ .no_atomic_fw_version = 92,
.num_sdma_engines = 1,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
@@ -619,7 +627,8 @@ static const struct kfd_device_info yellow_carp_device_info = {
.mqd_size_aligned = MQD_SIZE_ALIGNED,
.needs_iommu_device = false,
.supports_cwsr = true,
- .needs_pci_atomics = false,
+ .needs_pci_atomics = true,
+ .no_atomic_fw_version = 92,
.num_sdma_engines = 1,
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
@@ -708,20 +717,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
if (!kfd)
return NULL;
- /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
- * 32 and 64-bit requests are possible and must be
- * supported.
- */
- kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
- if (device_info->needs_pci_atomics &&
- !kfd->pci_atomic_requested) {
- dev_info(kfd_device,
- "skipped device %x:%x, PCI rejects atomics\n",
- pdev->vendor, pdev->device);
- kfree(kfd);
- return NULL;
- }
-
kfd->kgd = kgd;
kfd->device_info = device_info;
kfd->pdev = pdev;
@@ -821,6 +816,23 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
- kfd->vm_info.first_vmid_kfd + 1;
+ /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
+ * 32 and 64-bit requests are possible and must be
+ * supported.
+ */
+ kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd);
+ if (!kfd->pci_atomic_requested &&
+ kfd->device_info->needs_pci_atomics &&
+ (!kfd->device_info->no_atomic_fw_version ||
+ kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
+ dev_info(kfd_device,
+ "skipped device %x:%x, PCI rejects atomics %d<%d\n",
+ kfd->pdev->vendor, kfd->pdev->device,
+ kfd->mec_fw_version,
+ kfd->device_info->no_atomic_fw_version);
+ return false;
+ }
+
/* Verify module parameters regarding mapped process number*/
if ((hws_max_conc_proc < 0)
|| (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
@@ -959,7 +971,6 @@ out:
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
if (kfd->init_complete) {
- svm_migrate_fini((struct amdgpu_device *)kfd->kgd);
device_queue_manager_uninit(kfd->dqm);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
@@ -1057,31 +1068,29 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
return ret;
}
-static int kfd_resume(struct kfd_dev *kfd)
+int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
{
int err = 0;
err = kfd_iommu_resume(kfd);
- if (err) {
+ if (err)
dev_err(kfd_device,
"Failed to resume IOMMU for device %x:%x\n",
kfd->pdev->vendor, kfd->pdev->device);
- return err;
- }
+ return err;
+}
+
+static int kfd_resume(struct kfd_dev *kfd)
+{
+ int err = 0;
err = kfd->dqm->ops.start(kfd->dqm);
- if (err) {
+ if (err)
dev_err(kfd_device,
"Error starting queue manager for device %x:%x\n",
kfd->pdev->vendor, kfd->pdev->device);
- goto dqm_start_error;
- }
return err;
-
-dqm_start_error:
- kfd_iommu_suspend(kfd);
- return err;
}
static inline void kfd_queue_work(struct workqueue_struct *wq,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index dab290a4d19d..4a16e3c257b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -891,9 +891,16 @@ int svm_migrate_init(struct amdgpu_device *adev)
pgmap->ops = &svm_migrate_pgmap_ops;
pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+
+ /* Device manager releases device-specific resources, memory region and
+ * pgmap when driver disconnects from device.
+ */
r = devm_memremap_pages(adev->dev, pgmap);
if (IS_ERR(r)) {
pr_err("failed to register HMM device memory\n");
+
+ /* Disable SVM support capability */
+ pgmap->type = 0;
devm_release_mem_region(adev->dev, res->start,
res->end - res->start + 1);
return PTR_ERR(r);
@@ -908,12 +915,3 @@ int svm_migrate_init(struct amdgpu_device *adev)
return 0;
}
-
-void svm_migrate_fini(struct amdgpu_device *adev)
-{
- struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap;
-
- devm_memunmap_pages(adev->dev, pgmap);
- devm_release_mem_region(adev->dev, pgmap->range.start,
- pgmap->range.end - pgmap->range.start + 1);
-}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 0de76b5d4973..2f5b3394c9ed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -47,7 +47,6 @@ unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
int svm_migrate_init(struct amdgpu_device *adev);
-void svm_migrate_fini(struct amdgpu_device *adev);
#else
@@ -55,10 +54,6 @@ static inline int svm_migrate_init(struct amdgpu_device *adev)
{
return 0;
}
-static inline void svm_migrate_fini(struct amdgpu_device *adev)
-{
- /* empty */
-}
#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ab83b0de6b22..6d8f9bb2d905 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -207,6 +207,7 @@ struct kfd_device_info {
bool supports_cwsr;
bool needs_iommu_device;
bool needs_pci_atomics;
+ uint32_t no_atomic_fw_version;
unsigned int num_sdma_engines;
unsigned int num_xgmi_sdma_engines;
unsigned int num_sdma_queues_per_engine;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 9fc8021bb0ab..9d0f65a90002 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -118,6 +118,13 @@ static void svm_range_remove_notifier(struct svm_range *prange)
mmu_interval_notifier_remove(&prange->notifier);
}
+static bool
+svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
+{
+ return dma_addr && !dma_mapping_error(dev, dma_addr) &&
+ !(dma_addr & SVM_RANGE_VRAM_DOMAIN);
+}
+
static int
svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
unsigned long offset, unsigned long npages,
@@ -139,8 +146,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
addr += offset;
for (i = 0; i < npages; i++) {
- if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
- "leaking dma mapping\n"))
+ if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
page = hmm_pfn_to_page(hmm_pfns[i]);
@@ -209,7 +215,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
return;
for (i = offset; i < offset + npages; i++) {
- if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
+ if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
continue;
pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
@@ -1165,7 +1171,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned long last_start;
int last_domain;
int r = 0;
- int64_t i;
+ int64_t i, j;
last_start = prange->start + offset;
@@ -1178,7 +1184,11 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
for (i = offset; i < offset + npages; i++) {
last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
- if ((prange->start + i) < prange->last &&
+
+ /* Collect all pages in the same address range and memory domain
+ * that can be mapped with a single call to update mapping.
+ */
+ if (i < offset + npages - 1 &&
last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN))
continue;
@@ -1201,6 +1211,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
NULL, dma_addr,
&vm->last_update,
&table_freed);
+
+ for (j = last_start - prange->start; j <= i; j++)
+ dma_addr[j] |= last_domain;
+
if (r) {
pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
goto out;