diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 65 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 |
5 files changed, 64 insertions, 47 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 16a57b70cc1a..4a416231b24c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -468,6 +468,7 @@ static const struct kfd_device_info navi10_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -487,6 +488,7 @@ static const struct kfd_device_info navi12_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -506,6 +508,7 @@ static const struct kfd_device_info navi14_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -525,6 +528,7 @@ static const struct kfd_device_info sienna_cichlid_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 4, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -544,6 +548,7 @@ static const struct kfd_device_info navy_flounder_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -562,7 +567,8 @@ static const struct kfd_device_info vangogh_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, @@ -582,6 +588,7 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -601,6 +608,7 @@ static const struct kfd_device_info beige_goby_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -619,7 +627,8 @@ static const struct kfd_device_info yellow_carp_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, @@ -708,20 +717,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, if (!kfd) return NULL; - /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. - * 32 and 64-bit requests are possible and must be - * supported. - */ - kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd); - if (device_info->needs_pci_atomics && - !kfd->pci_atomic_requested) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics\n", - pdev->vendor, pdev->device); - kfree(kfd); - return NULL; - } - kfd->kgd = kgd; kfd->device_info = device_info; kfd->pdev = pdev; @@ -821,6 +816,23 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd - kfd->vm_info.first_vmid_kfd + 1; + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ + kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd); + if (!kfd->pci_atomic_requested && + kfd->device_info->needs_pci_atomics && + (!kfd->device_info->no_atomic_fw_version || + kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics %d<%d\n", + kfd->pdev->vendor, kfd->pdev->device, + kfd->mec_fw_version, + kfd->device_info->no_atomic_fw_version); + return false; + } + /* Verify module parameters regarding mapped process number*/ if ((hws_max_conc_proc < 0) || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { @@ -959,7 +971,6 @@ out: void kgd2kfd_device_exit(struct kfd_dev *kfd) { if (kfd->init_complete) { - svm_migrate_fini((struct amdgpu_device *)kfd->kgd); device_queue_manager_uninit(kfd->dqm); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); @@ -1057,31 +1068,29 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) return ret; } -static int kfd_resume(struct kfd_dev *kfd) +int kgd2kfd_resume_iommu(struct kfd_dev *kfd) { int err = 0; err = kfd_iommu_resume(kfd); - if (err) { + if (err) dev_err(kfd_device, "Failed to resume IOMMU for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - return err; - } + return err; +} + +static int kfd_resume(struct kfd_dev *kfd) +{ + int err = 0; err = kfd->dqm->ops.start(kfd->dqm); - if (err) { + if (err) dev_err(kfd_device, "Error starting queue manager for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - goto dqm_start_error; - } return err; - -dqm_start_error: - kfd_iommu_suspend(kfd); - return err; } static inline void kfd_queue_work(struct workqueue_struct *wq, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index dab290a4d19d..4a16e3c257b9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -891,9 +891,16 @@ int svm_migrate_init(struct amdgpu_device *adev) pgmap->ops = &svm_migrate_pgmap_ops; pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev); pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + + /* Device manager releases device-specific resources, memory region and + * pgmap when driver disconnects from device. + */ r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { pr_err("failed to register HMM device memory\n"); + + /* Disable SVM support capability */ + pgmap->type = 0; devm_release_mem_region(adev->dev, res->start, res->end - res->start + 1); return PTR_ERR(r); @@ -908,12 +915,3 @@ int svm_migrate_init(struct amdgpu_device *adev) return 0; } - -void svm_migrate_fini(struct amdgpu_device *adev) -{ - struct dev_pagemap *pgmap = &adev->kfd.dev->pgmap; - - devm_memunmap_pages(adev->dev, pgmap); - devm_release_mem_region(adev->dev, pgmap->range.start, - pgmap->range.end - pgmap->range.start + 1); -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 0de76b5d4973..2f5b3394c9ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -47,7 +47,6 @@ unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); int svm_migrate_init(struct amdgpu_device *adev); -void svm_migrate_fini(struct amdgpu_device *adev); #else @@ -55,10 +54,6 @@ static inline int svm_migrate_init(struct amdgpu_device *adev) { return 0; } -static inline void svm_migrate_fini(struct amdgpu_device *adev) -{ - /* empty */ -} #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ab83b0de6b22..6d8f9bb2d905 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -207,6 +207,7 @@ struct kfd_device_info { bool supports_cwsr; bool needs_iommu_device; bool needs_pci_atomics; + uint32_t no_atomic_fw_version; unsigned int num_sdma_engines; unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 9fc8021bb0ab..9d0f65a90002 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -118,6 +118,13 @@ static void svm_range_remove_notifier(struct svm_range *prange) mmu_interval_notifier_remove(&prange->notifier); } +static bool +svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr && !dma_mapping_error(dev, dma_addr) && + !(dma_addr & SVM_RANGE_VRAM_DOMAIN); +} + static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, @@ -139,8 +146,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, addr += offset; for (i = 0; i < npages; i++) { - if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), - "leaking dma mapping\n")) + if (svm_is_valid_dma_mapping_addr(dev, addr[i])) dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); page = hmm_pfn_to_page(hmm_pfns[i]); @@ -209,7 +215,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, return; for (i = offset; i < offset + npages; i++) { - if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) + if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i])) continue; pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); @@ -1165,7 +1171,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned long last_start; int last_domain; int r = 0; - int64_t i; + int64_t i, j; last_start = prange->start + offset; @@ -1178,7 +1184,11 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, for (i = offset; i < offset + npages; i++) { last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; - if ((prange->start + i) < prange->last && + + /* Collect all pages in the same address range and memory domain + * that can be mapped with a single call to update mapping. + */ + if (i < offset + npages - 1 && last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN)) continue; @@ -1201,6 +1211,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, NULL, dma_addr, &vm->last_update, &table_freed); + + for (j = last_start - prange->start; j <= i; j++) + dma_addr[j] |= last_domain; + if (r) { pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); goto out; |