diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 157 |
1 files changed, 116 insertions, 41 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 72f8018fa2a8..f35d7a554ad5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "gmc_v9_0.h" #include "amdgpu_atomfirmware.h" +#include "amdgpu_gem.h" #include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_sh_mask.h" @@ -42,6 +43,7 @@ #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" +#include "gfxhub_v1_1.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -264,12 +266,12 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n", + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, task_info.task_name, task_info.pid); - dev_err(adev->dev, " at address 0x%016llx from %d\n", + dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n", addr, entry->client_id); if (!amdgpu_sriov_vf(adev)) dev_err(adev->dev, @@ -310,6 +312,48 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) return req; } +static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + spin_lock_irqsave(&kiq->ring_lock, flags); + + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for IRQ context */ + if (r < 1 && in_interrupt()) + goto failed_kiq; + + might_sleep(); + + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return 0; + +failed_kiq: + pr_err("failed to invalidate tlb with kiq\n"); + return r; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -331,13 +375,23 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, /* Use register 17 for GART */ const unsigned eng = 17; unsigned i, j; - - spin_lock(&adev->gmc.invalidate_lock); + int r; for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &adev->vmhub[i]; u32 tmp = gmc_v9_0_get_invalidate_req(vmid); + if (adev->gfx.kiq.ring.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + !adev->in_gpu_reset) { + r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid); + if (!r) + continue; + } + + spin_lock(&adev->gmc.invalidate_lock); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* Busy wait for ACK.*/ @@ -348,8 +402,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, break; cpu_relax(); } - if (j < 100) + if (j < 100) { + spin_unlock(&adev->gmc.invalidate_lock); continue; + } /* Wait for ACK with a delay.*/ for (j = 0; j < adev->usec_timeout; j++) { @@ -359,13 +415,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, break; udelay(1); } - if (j < adev->usec_timeout) + if (j < adev->usec_timeout) { + spin_unlock(&adev->gmc.invalidate_lock); continue; - + } + spin_unlock(&adev->gmc.invalidate_lock); DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } - - spin_unlock(&adev->gmc.invalidate_lock); } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -374,12 +430,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; uint32_t req = gmc_v9_0_get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags); - pd_addr |= flags; - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -509,7 +561,7 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { - if (!(*flags & AMDGPU_PDE_PTE)) + if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) *addr = adev->vm_manager.vram_base_offset + *addr - adev->gmc.vram_start; BUG_ON(*addr & 0xFFFF00000000003FULL); @@ -541,8 +593,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } static int gmc_v9_0_early_init(void *handle) @@ -641,6 +692,29 @@ static int gmc_v9_0_ecc_available(struct amdgpu_device *adev) return lost_sheep == 0; } +static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) +{ + + /* + * TODO: + * Currently there is a bug where some memory client outside + * of the driver writes to first 8M of VRAM on S3 resume, + * this overrides GART which by default gets placed in first 8M and + * causes VM_FAULTS once GTT is accessed. + * Keep the stolen memory reservation until the while this is not solved. + * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init + */ + switch (adev->asic_type) { + case CHIP_VEGA10: + return true; + case CHIP_RAVEN: + case CHIP_VEGA12: + case CHIP_VEGA20: + default: + return false; + } +} + static int gmc_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -657,10 +731,8 @@ static int gmc_v9_0_late_init(void *handle) unsigned i; int r; - /* - * TODO - Uncomment once GART corruption issue is fixed. - */ - /* amdgpu_bo_late_init(adev); */ + if (!gmc_v9_0_keep_stolen_memory(adev)) + amdgpu_bo_late_init(adev); for(i = 0; i < adev->num_rings; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -698,10 +770,18 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = 0; if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); - amdgpu_device_vram_location(adev, &adev->gmc, base); - amdgpu_device_gart_location(adev, mc); + /* add the xgmi offset of the physical node */ + base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc); + if (!amdgpu_sriov_vf(adev)) + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); + + /* XXX: add the xgmi offset of the physical node? */ + adev->vm_manager.vram_base_offset += + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; } /** @@ -781,7 +861,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) { int r; - if (adev->gart.robj) { + if (adev->gart.bo) { WARN(1, "VEGA10 PCIE GART already initialized\n"); return 0; } @@ -797,18 +877,16 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { -#if 0 u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); -#endif unsigned size; /* * TODO Remove once GART corruption is resolved * Check related code in gmc_v9_0_sw_fini * */ - size = 9 * 1024 * 1024; + if (gmc_v9_0_keep_stolen_memory(adev)) + return 9 * 1024 * 1024; -#if 0 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ } else { @@ -825,6 +903,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) break; case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: default: viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * @@ -837,7 +916,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) return 0; -#endif return size; } @@ -913,6 +991,12 @@ static int gmc_v9_0_sw_init(void *handle) } adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + if (adev->asic_type == CHIP_VEGA20) { + r = gfxhub_v1_1_get_xgmi_info(adev); + if (r) + return r; + } + r = gmc_v9_0_mc_init(adev); if (r) return r; @@ -949,16 +1033,8 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - /* - * TODO: - * Currently there is a bug where some memory client outside - * of the driver writes to first 8M of VRAM on S3 resume, - * this overrides GART which by default gets placed in first 8M and - * causes VM_FAULTS once GTT is accessed. - * Keep the stolen memory reservation until the while this is not solved. - * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init - */ - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + if (gmc_v9_0_keep_stolen_memory(adev)) + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); @@ -1007,7 +1083,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) golden_settings_vega10_hdp, ARRAY_SIZE(golden_settings_vega10_hdp)); - if (adev->gart.robj == NULL) { + if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } @@ -1017,7 +1093,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: - mmhub_v1_0_initialize_power_gating(adev); mmhub_v1_0_update_power_gating(adev, true); break; default: @@ -1051,7 +1126,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), - (unsigned long long)adev->gart.table_addr); + (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); adev->gart.ready = true; return 0; } |