diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-08 12:28:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-08 12:28:15 -0700 |
commit | f55966571d5eb2876a11e48e798b4592fa1ffbb7 (patch) | |
tree | fc609e3d8cd988f7d13da7abc36f8c77bd4dc0be /drivers/gpu | |
parent | 8c1bfd746030a14435c9b60d08a81af61332089b (diff) | |
parent | 21c355b09711e95f8f0e7c1890b343c6cd350042 (diff) | |
download | linux-stable-f55966571d5eb2876a11e48e798b4592fa1ffbb7.tar.gz linux-stable-f55966571d5eb2876a11e48e798b4592fa1ffbb7.tar.bz2 linux-stable-f55966571d5eb2876a11e48e798b4592fa1ffbb7.zip |
Merge tag 'drm-next-2021-07-08-1' of git://anongit.freedesktop.org/drm/drm
Pull drm fixes from Dave Airlie:
"Some fixes for rc1 that came in the past weeks, mainly a bunch of
amdgpu fixes, some i915 and the rest are misc around the place. I'm
sending this a bit early so some more stuff may show up, but I'll
probably take tomorrow off.
dma-buf:
- doc fixes
amdgpu:
- Misc Navi fixes
- Powergating fix
- Yellow Carp updates
- Beige Goby updates
- S0ix fix
- Revert overlay validation fix
- GPU reset fix for DC
- PPC64 fix
- Add new dimgrey cavefish DID
- RAS fix
- TTM fixes
amdkfd:
- SVM fixes
radeon:
- Fix missing drm_gem_object_put in error path
- NULL ptr deref fix
i915:
- display DP VSC fix
- DG1 display fix
- IRQ fixes
- IRQ demidlayering
gma500:
- bo leaks in error paths fixed"
* tag 'drm-next-2021-07-08-1' of git://anongit.freedesktop.org/drm/drm: (52 commits)
drm/i915: Drop all references to DRM IRQ midlayer
drm/i915: Use the correct IRQ during resume
drm/i915/display/dg1: Correctly map DPLLs during state readout
drm/i915/display: Do not zero past infoframes.vsc
drm/amdgpu: Conditionally reset SDMA RAS error counts
drm/amdkfd: Maintain svm_bo reference in page->zone_device_data
drm/amdkfd: add invalid pages debug at vram migration
drm/amdkfd: skip migration for pages already in VRAM
drm/amdkfd: skip invalid pages during migrations
drm/amdkfd: classify and map mixed svm range pages in GPU
drm/amdkfd: use hmm range fault to get both domain pfns
drm/amdgpu: get owner ref in validate and map
drm/amdkfd: set owner ref to svm range prefault
drm/amdkfd: add owner ref param to get hmm pages
drm/amdkfd: device pgmap owner at the svm migrate init
drm/amdkfd: inc counter on child ranges with xnack off
drm/amd/display: Extend DMUB diagnostic logging to DCN3.1
drm/amdgpu: Update NV SIMD-per-CU to 2
drm/amdgpu: add new dimgrey cavefish DID
drm/amd/pm: skip PrepareMp1ForUnload message in s0ix
...
Diffstat (limited to 'drivers/gpu')
52 files changed, 1047 insertions, 503 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 130a9adf09ef..d303e88e3c23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1369,6 +1369,38 @@ def_value: adev->pm.smu_prv_buffer_size = 0; } +static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) +{ + if (!(adev->flags & AMD_IS_APU) || + adev->asic_type < CHIP_RAVEN) + return 0; + + switch (adev->asic_type) { + case CHIP_RAVEN: + if (adev->pdev->device == 0x15dd) + adev->apu_flags |= AMD_APU_IS_RAVEN; + if (adev->pdev->device == 0x15d8) + adev->apu_flags |= AMD_APU_IS_PICASSO; + break; + case CHIP_RENOIR: + if ((adev->pdev->device == 0x1636) || + (adev->pdev->device == 0x164c)) + adev->apu_flags |= AMD_APU_IS_RENOIR; + else + adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; + break; + case CHIP_VANGOGH: + adev->apu_flags |= AMD_APU_IS_VANGOGH; + break; + case CHIP_YELLOW_CARP: + break; + default: + return -EINVAL; + } + + return 0; +} + /** * amdgpu_device_check_arguments - validate module params * @@ -3386,6 +3418,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); + r = amdgpu_device_init_apu_flags(adev); + if (r) + return r; + r = amdgpu_device_check_arguments(adev); if (r) return r; @@ -4304,6 +4340,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: + case CHIP_BEIGE_GOBY: case CHIP_VANGOGH: case CHIP_ALDEBARAN: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6f30c525caac..71beb0db0125 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -160,6 +160,7 @@ int amdgpu_smu_pptable_id = -1; * highest. That helps saving some idle power. * DISABLE_FRACTIONAL_PWM (bit 2) disabled by default * PSR (bit 3) disabled by default + * EDP NO POWER SEQUENCING (bit 4) disabled by default */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; @@ -1198,6 +1199,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, /* Aldebaran */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 0174f7817ce2..d0b8d415b63b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -562,6 +562,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index d6c54c7f7679..4b153daf283d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -160,7 +160,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, struct mm_struct *mm, struct page **pages, uint64_t start, uint64_t npages, struct hmm_range **phmm_range, bool readonly, - bool mmap_locked) + bool mmap_locked, void *owner) { struct hmm_range *hmm_range; unsigned long timeout; @@ -185,6 +185,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, hmm_range->hmm_pfns = pfns; hmm_range->start = start; hmm_range->end = start + npages * PAGE_SIZE; + hmm_range->dev_private_owner = owner; /* Assuming 512MB takes maxmium 1 second to fault page address */ timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index 7f7d37a457c3..14a3c1864085 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -34,7 +34,7 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, struct mm_struct *mm, struct page **pages, uint64_t start, uint64_t npages, struct hmm_range **phmm_range, bool readonly, - bool mmap_locked); + bool mmap_locked, void *owner); int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); #if defined(CONFIG_HMM_MIRROR) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 25ee53545837..45295dce5c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -93,6 +93,8 @@ struct amdgpu_nbio_funcs { void (*enable_aspm)(struct amdgpu_device *adev, bool enable); void (*program_aspm)(struct amdgpu_device *adev); + void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev); + void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index cdfc20b0b2eb..3a55f08e00e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -590,10 +590,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += adev->gmc.aper_base; mem->bus.is_iomem = true; - if (adev->gmc.xgmi.connected_to_cpu) - mem->bus.caching = ttm_cached; - else - mem->bus.caching = ttm_write_combined; break; default: return -EINVAL; @@ -695,7 +691,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, ttm->num_pages, >t->range, readonly, - true); + true, NULL); out_unlock: mmap_read_unlock(mm); mmput(mm); @@ -923,7 +919,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; - if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { + if (bo_mem->mem_type != TTM_PL_TT || + !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { gtt->offset = AMDGPU_BO_INVALID_OFFSET; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 436ec246a7da..2fd77c36a1ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -463,6 +463,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (i == 1) node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + if (adev->gmc.xgmi.connected_to_cpu) + node->base.bus.caching = ttm_cached; + else + node->base.bus.caching = ttm_write_combined; + atomic64_add(vis_usage, &mgr->vis_usage); *res = &node->base; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index 5b90efd6f6d0..3ac505d954c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -36,9 +36,12 @@ athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + return; + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + if (enable) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; else data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -53,10 +56,13 @@ athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; + if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))) + return; + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && - (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + if (enable) data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; else data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2d56b60bc058..f5e9c022960b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5086,47 +5086,44 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) 4 + /* RMI */ 1); /* SQG */ - if (adev->asic_type == CHIP_NAVI10 || - adev->asic_type == CHIP_NAVI14 || - adev->asic_type == CHIP_NAVI12) { - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); - wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); - /* - * Set corresponding TCP bits for the inactive WGPs in - * GCRD_SA_TARGETS_DISABLE - */ - gcrd_targets_disable_tcp = 0; - /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ - utcl_invreq_disable = 0; - - for (k = 0; k < max_wgp_per_sh; k++) { - if (!(wgp_active_bitmap & (1 << k))) { - gcrd_targets_disable_tcp |= 3 << (2 * k); - utcl_invreq_disable |= (3 << (2 * k)) | - (3 << (2 * (max_wgp_per_sh + k))); - } + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); + /* + * Set corresponding TCP bits for the inactive WGPs in + * GCRD_SA_TARGETS_DISABLE + */ + gcrd_targets_disable_tcp = 0; + /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ + utcl_invreq_disable = 0; + + for (k = 0; k < max_wgp_per_sh; k++) { + if (!(wgp_active_bitmap & (1 << k))) { + gcrd_targets_disable_tcp |= 3 << (2 * k); + gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2)); + utcl_invreq_disable |= (3 << (2 * k)) | + (3 << (2 * (max_wgp_per_sh + k))); } - - tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); - /* only override TCP & SQC bits */ - tmp &= 0xffffffff << (4 * max_wgp_per_sh); - tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); - WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); - - tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); - /* only override TCP bits */ - tmp &= 0xffffffff << (2 * max_wgp_per_sh); - tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); - WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); } - } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); + tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); + /* only override TCP & SQC bits */ + tmp &= (0xffffffffU << (4 * max_wgp_per_sh)); + tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); + WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); + /* only override TCP & SQC bits */ + tmp &= (0xffffffffU << (3 * max_wgp_per_sh)); + tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); + WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); + } } + + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); } static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@ -7404,7 +7401,10 @@ static int gfx_v10_0_hw_init(void *handle) * init golden registers and rlc resume may override some registers, * reconfig them here */ - gfx_v10_0_tcp_harvest(adev); + if (adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12) + gfx_v10_0_tcp_harvest(adev); r = gfx_v10_0_cp_resume(adev); if (r) @@ -7777,6 +7777,9 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) + return; + /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 0 - Disable some blocks' MGCG */ @@ -7791,6 +7794,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); if (def != data) @@ -7813,13 +7817,15 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } - } else { + } else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | - RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); @@ -7845,22 +7851,34 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS))) + return; + /* Enable 3D CGCG/CGLS */ - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + if (enable) { /* write cmd to clear cgcg/cgls ov */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ - data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + /* enable 3Dcgcg FSM(0x0000363f) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); @@ -7873,9 +7891,14 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, } else { /* Disable CGCG/CGLS */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + /* disable cgcg, cgls should be disabled */ - data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | - RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + /* disable cgcg and cgls in FSM */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); @@ -7887,25 +7910,35 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS))) + return; + + if (enable) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ - data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; - else - data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); @@ -7917,8 +7950,14 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); } else { def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + /* reset CGCG/CGLS bits */ - data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) + data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + /* disable cgcg and cgls in FSM */ if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); @@ -7930,7 +7969,10 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) { + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) + return; + + if (enable) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset FGCG override */ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; @@ -7961,6 +8003,97 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, } } +static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + uint32_t reg_idx = 0; + uint32_t i; + + const uint32_t tcp_ctrl_regs[] = { + mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG + }; + + const uint32_t tcp_ctrl_regs_nv12[] = { + mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG, + mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG, + }; + + const uint32_t sm_ctlr_regs[] = { + mmCGTS_SA0_QUAD0_SM_CTRL_REG, + mmCGTS_SA0_QUAD1_SM_CTRL_REG, + mmCGTS_SA1_QUAD0_SM_CTRL_REG, + mmCGTS_SA1_QUAD1_SM_CTRL_REG + }; + + if (adev->asic_type == CHIP_NAVI12) { + for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + + tcp_ctrl_regs_nv12[i]; + reg_data = RREG32(reg_idx); + reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK; + WREG32(reg_idx, reg_data); + } + } else { + for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + + tcp_ctrl_regs[i]; + reg_data = RREG32(reg_idx); + reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK; + WREG32(reg_idx, reg_data); + } + } + + for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) { + reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] + + sm_ctlr_regs[i]; + reg_data = RREG32(reg_idx); + reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK; + reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT; + WREG32(reg_idx, reg_data); + } +} + static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -7977,6 +8110,10 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v10_0_update_3d_clock_gating(adev, enable); /* === CGCG + CGLS === */ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); + + if ((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12)) + gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS * === CGCG + CGLS === @@ -9324,17 +9461,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device * static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) { - u32 data, wgp_bitmask; - data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + u32 disabled_mask = + ~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + u32 efuse_setting = 0; + u32 vbios_setting = 0; + + efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); + efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; - data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; - data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; - wgp_bitmask = - amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + disabled_mask |= efuse_setting | vbios_setting; - return (~data) & wgp_bitmask; + return (~disabled_mask); } static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 7a15e669b68d..5793977953cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -90,45 +90,56 @@ static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev, RC_MEM_POWER_SD_EN, 0); WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); - /* only one clock gating mode (LS/DS/SD) can be enabled */ - if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_LS_EN, enable); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_LS_EN, enable); - } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_DS_EN, enable); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, enable); - } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_SD_EN, enable); - /* RC should not use shut down mode, fallback to ds */ - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, enable); - } - - /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to - * be set for SRAM LS/DS/SD */ - if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | - AMD_CG_SUPPORT_HDP_SD)) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_CTRL_EN, 1); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - RC_MEM_POWER_CTRL_EN, 1); + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, 1); + /* RC should not use shut down mode, fallback to ds or ls if allowed */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } + + /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to + * be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } } - WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); - - /* restore IPH & RC clock override after clock/power mode changing */ - WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1); + /* disable IPH & RC clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + IPH_MEM_CLK_SOFT_OVERRIDE, 0); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); } static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index f7e93bbc4e15..7ded6b2f058e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -568,6 +568,9 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad { uint32_t def, data, def1, data1; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + return; + switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: @@ -582,7 +585,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad break; } - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + if (enable) { data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | @@ -627,6 +630,9 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + return; + switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: @@ -639,7 +645,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade break; } - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + if (enable) data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; else data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 05ddec7ba7e2..7b79eeaa88aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -51,6 +51,8 @@ #define mmBIF_MMSCH1_DOORBELL_RANGE 0x01d8 #define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -218,8 +220,11 @@ static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + def = data = RREG32_PCIE(smnCPM_CONTROL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + if (enable) { data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | @@ -244,8 +249,11 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + def = data = RREG32_PCIE(smnPCIE_CNTL2); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + if (enable) { data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); @@ -463,6 +471,43 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) WREG32_PCIE(smnPCIE_LC_CNTL3, data); } +static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + uint32_t link_width = 0; + + if (!((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12))) + return; + + reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL); + link_width = (reg_data & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; + + /* + * Program PCIE_LC_CNTL6.LC_SPC_MODE_8GT to 0x2 (4 symbols per clock data) + * if link_width is 0x3 (x4) + */ + if (0x3 == link_width) { + reg_data = RREG32_PCIE(smnPCIE_LC_CNTL6); + reg_data &= ~PCIE_LC_CNTL6__LC_SPC_MODE_8GT_MASK; + reg_data |= (0x2 << PCIE_LC_CNTL6__LC_SPC_MODE_8GT__SHIFT); + WREG32_PCIE(smnPCIE_LC_CNTL6, reg_data); + } +} + +static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev) +{ + uint32_t reg_data = 0; + + if (adev->asic_type != CHIP_NAVI10) + return; + + reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL); + reg_data |= PCIE_LC_LINK_WIDTH_CNTL__LC_L1_RECONFIG_EN_MASK; + WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data); +} + const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, @@ -484,4 +529,6 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, .enable_aspm = nbio_v2_3_enable_aspm, .program_aspm = nbio_v2_3_program_aspm, + .apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa, + .apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 455d0425787c..94a2c0742ee5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -64,6 +64,13 @@ #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" +#define codec_info_build(type, width, height, level) \ + .codec_type = type,\ + .max_width = width,\ + .max_height = height,\ + .max_pixels_per_frame = height * width,\ + .max_level = level, + static const struct amd_ip_funcs nv_common_ip_funcs; /* Navi */ @@ -309,6 +316,23 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode = .codec_array = sriov_sc_video_codecs_decode_array, }; +/* Beige Goby*/ +static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs bg_video_codecs_decode = { + .codec_count = ARRAY_SIZE(bg_video_codecs_decode_array), + .codec_array = bg_video_codecs_decode_array, +}; + +static const struct amdgpu_video_codecs bg_video_codecs_encode = { + .codec_count = 0, + .codec_array = NULL, +}; + static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -335,6 +359,12 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, else *codecs = &sc_video_codecs_decode; return 0; + case CHIP_BEIGE_GOBY: + if (encode) + *codecs = &bg_video_codecs_encode; + else + *codecs = &bg_video_codecs_decode; + return 0; case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: @@ -1275,7 +1305,6 @@ static int nv_common_early_init(void *handle) break; case CHIP_VANGOGH: - adev->apu_flags |= AMD_APU_IS_VANGOGH; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CP_LS | @@ -1411,6 +1440,12 @@ static int nv_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->nbio.funcs->apply_lc_spc_mode_wa) + adev->nbio.funcs->apply_lc_spc_mode_wa(adev); + + if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa) + adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev); + /* enable pcie gen2/3 link */ nv_pcie_gen3_enable(adev); /* enable aspm */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ae5464e2535a..8931000dcd41 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -144,7 +144,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) }; @@ -288,7 +288,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003e0), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x03fbe1fe) }; @@ -1896,8 +1896,11 @@ static int sdma_v4_0_late_init(void *handle) sdma_v4_0_setup_ulv(adev); - if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count) - adev->sdma.funcs->reset_ras_error_count(adev); + if (!amdgpu_persistent_edc_harvesting_supported(adev)) { + if (adev->sdma.funcs && + adev->sdma.funcs->reset_ras_error_count) + adev->sdma.funcs->reset_ras_error_count(adev); + } if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) return adev->sdma.funcs->ras_late_init(adev, &ih_info); diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c index e9c474c217ec..b6f1322f908c 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -43,9 +43,12 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool if (adev->flags & AMD_IS_APU) return; + if (!(adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + return; + def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + if (enable) data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); else diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index de85577c9cfd..b02436401d46 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1360,10 +1360,7 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RAVEN: adev->asic_funcs = &soc15_asic_funcs; - if (adev->pdev->device == 0x15dd) - adev->apu_flags |= AMD_APU_IS_RAVEN; - if (adev->pdev->device == 0x15d8) - adev->apu_flags |= AMD_APU_IS_PICASSO; + if (adev->rev_id >= 0x8) adev->apu_flags |= AMD_APU_IS_RAVEN2; @@ -1455,11 +1452,6 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RENOIR: adev->asic_funcs = &soc15_asic_funcs; - if ((adev->pdev->device == 0x1636) || - (adev->pdev->device == 0x164c)) - adev->apu_flags |= AMD_APU_IS_RENOIR; - else - adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; if (adev->apu_flags & AMD_APU_IS_RENOIR) adev->external_rev_id = adev->rev_id + 0x91; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 2660f03e63a7..dab290a4d19d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -218,7 +218,8 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) struct page *page; page = pfn_to_page(pfn); - page->zone_device_data = prange; + svm_range_bo_ref(prange->svm_bo); + page->zone_device_data = prange->svm_bo; get_page(page); lock_page(page); } @@ -293,15 +294,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, for (i = j = 0; i < npages; i++) { struct page *spage; - dst[i] = cursor.start + (j << PAGE_SHIFT); - migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); - svm_migrate_get_vram_page(prange, migrate->dst[i]); - - migrate->dst[i] = migrate_pfn(migrate->dst[i]); - migrate->dst[i] |= MIGRATE_PFN_LOCKED; - - if (migrate->src[i] & MIGRATE_PFN_VALID) { - spage = migrate_pfn_to_page(migrate->src[i]); + spage = migrate_pfn_to_page(migrate->src[i]); + if (spage && !is_zone_device_page(spage)) { + dst[i] = cursor.start + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE); r = dma_mapping_error(dev, src[i]); @@ -355,6 +354,20 @@ out_free_vram_pages: } } +#ifdef DEBUG_FORCE_MIXED_DOMAINS + for (i = 0, j = 0; i < npages; i += 4, j++) { + if (j & 1) + continue; + svm_migrate_put_vram_page(adev, dst[i]); + migrate->dst[i] = 0; + svm_migrate_put_vram_page(adev, dst[i + 1]); + migrate->dst[i + 1] = 0; + svm_migrate_put_vram_page(adev, dst[i + 2]); + migrate->dst[i + 2] = 0; + svm_migrate_put_vram_page(adev, dst[i + 3]); + migrate->dst[i + 3] = 0; + } +#endif out: return r; } @@ -365,20 +378,20 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t end) { uint64_t npages = (end - start) >> PAGE_SHIFT; + struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate; dma_addr_t *scratch; size_t size; void *buf; int r = -ENOMEM; - int retry = 0; memset(&migrate, 0, sizeof(migrate)); migrate.vma = vma; migrate.start = start; migrate.end = end; migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; - migrate.pgmap_owner = adev; + migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); size *= npages; @@ -390,7 +403,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate.dst = migrate.src + npages; scratch = (dma_addr_t *)(migrate.dst + npages); -retry: r = migrate_vma_setup(&migrate); if (r) { pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", @@ -398,17 +410,9 @@ retry: goto out_free; } if (migrate.cpages != npages) { - pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages, + pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n", + migrate.cpages, npages); - migrate_vma_finalize(&migrate); - if (retry++ >= 3) { - r = -ENOMEM; - pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n", - r, prange->svms, prange->start, prange->last); - goto out_free; - } - - goto retry; } if (migrate.cpages) { @@ -425,6 +429,12 @@ retry: out_free: kvfree(buf); out: + if (!r) { + pdd = svm_range_get_pdd_by_adev(prange, adev); + if (pdd) + WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages); + } + return r; } @@ -464,7 +474,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, prange->start, prange->last, best_loc); /* FIXME: workaround for page locking bug with invalid pages */ - svm_range_prefault(prange, mm); + svm_range_prefault(prange, mm, SVM_ADEV_PGMAP_OWNER(adev)); start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; @@ -493,15 +503,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, static void svm_migrate_page_free(struct page *page) { - /* Keep this function to avoid warning */ + struct svm_range_bo *svm_bo = page->zone_device_data; + + if (svm_bo) { + pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref)); + svm_range_bo_unref(svm_bo); + } } static int svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch) + dma_addr_t *scratch, uint64_t npages) { - uint64_t npages = migrate->cpages; struct device *dev = adev->dev; uint64_t *src; dma_addr_t *dst; @@ -518,15 +532,23 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, src = (uint64_t *)(scratch + npages); dst = scratch; - for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) { + for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) { struct page *spage; spage = migrate_pfn_to_page(migrate->src[i]); - if (!spage) { - pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n", + if (!spage || !is_zone_device_page(spage)) { + pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); - r = -ENOMEM; - goto out_oom; + if (j) { + r = svm_migrate_copy_memory_gart(adev, dst + i - j, + src + i - j, j, + FROM_VRAM_TO_RAM, + mfence); + if (r) + goto out_oom; + j = 0; + } + continue; } src[i] = svm_migrate_addr(adev, spage); if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { @@ -559,6 +581,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); migrate->dst[i] |= MIGRATE_PFN_LOCKED; + j++; } r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j, @@ -581,6 +604,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, uint64_t end) { uint64_t npages = (end - start) >> PAGE_SHIFT; + struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate; dma_addr_t *scratch; @@ -593,7 +617,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, migrate.start = start; migrate.end = end; migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; - migrate.pgmap_owner = adev; + migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev); size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); size *= npages; @@ -616,7 +640,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, if (migrate.cpages) { r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, - scratch); + scratch, npages); migrate_vma_pages(&migrate); svm_migrate_copy_done(adev, mfence); migrate_vma_finalize(&migrate); @@ -630,6 +654,12 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, out_free: kvfree(buf); out: + if (!r) { + pdd = svm_range_get_pdd_by_adev(prange, adev); + if (pdd) + WRITE_ONCE(pdd->page_out, + pdd->page_out + migrate.cpages); + } return r; } @@ -859,7 +889,7 @@ int svm_migrate_init(struct amdgpu_device *adev) pgmap->range.start = res->start; pgmap->range.end = res->end; pgmap->ops = &svm_migrate_pgmap_ops; - pgmap->owner = adev; + pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev); pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; r = devm_memremap_pages(adev->dev, pgmap); if (IS_ERR(r)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6dc22fa1e555..3426743ed228 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -730,6 +730,15 @@ struct kfd_process_device { * number of CU's a device has along with number of other competing processes */ struct attribute attr_cu_occupancy; + + /* sysfs counters for GPU retry fault and page migration tracking */ + struct kobject *kobj_counters; + struct attribute attr_faults; + struct attribute attr_page_in; + struct attribute attr_page_out; + uint64_t faults; + uint64_t page_in; + uint64_t page_out; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 09b98a83f670..21ec8a18cad2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -416,6 +416,29 @@ static ssize_t kfd_procfs_stats_show(struct kobject *kobj, return 0; } +static ssize_t kfd_sysfs_counters_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct kfd_process_device *pdd; + + if (!strcmp(attr->name, "faults")) { + pdd = container_of(attr, struct kfd_process_device, + attr_faults); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults)); + } + if (!strcmp(attr->name, "page_in")) { + pdd = container_of(attr, struct kfd_process_device, + attr_page_in); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in)); + } + if (!strcmp(attr->name, "page_out")) { + pdd = container_of(attr, struct kfd_process_device, + attr_page_out); + return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out)); + } + return 0; +} + static struct attribute attr_queue_size = { .name = "size", .mode = KFD_SYSFS_FILE_MODE @@ -451,13 +474,18 @@ static const struct sysfs_ops procfs_stats_ops = { .show = kfd_procfs_stats_show, }; -static struct attribute *procfs_stats_attrs[] = { - NULL -}; - static struct kobj_type procfs_stats_type = { .sysfs_ops = &procfs_stats_ops, - .default_attrs = procfs_stats_attrs, + .release = kfd_procfs_kobj_release, +}; + +static const struct sysfs_ops sysfs_counters_ops = { + .show = kfd_sysfs_counters_show, +}; + +static struct kobj_type sysfs_counters_type = { + .sysfs_ops = &sysfs_counters_ops, + .release = kfd_procfs_kobj_release, }; int kfd_procfs_add_queue(struct queue *q) @@ -484,34 +512,31 @@ int kfd_procfs_add_queue(struct queue *q) return 0; } -static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr, +static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr, char *name) { - int ret = 0; + int ret; - if (!p || !attr || !name) - return -EINVAL; + if (!kobj || !attr || !name) + return; attr->name = name; attr->mode = KFD_SYSFS_FILE_MODE; sysfs_attr_init(attr); - ret = sysfs_create_file(p->kobj, attr); - - return ret; + ret = sysfs_create_file(kobj, attr); + if (ret) + pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret); } -static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) +static void kfd_procfs_add_sysfs_stats(struct kfd_process *p) { - int ret = 0; + int ret; int i; char stats_dir_filename[MAX_SYSFS_FILENAME_LEN]; - if (!p) - return -EINVAL; - - if (!p->kobj) - return -EFAULT; + if (!p || !p->kobj) + return; /* * Create sysfs files for each GPU: @@ -521,63 +546,87 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) */ for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; - struct kobject *kobj_stats; snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN, "stats_%u", pdd->dev->id); - kobj_stats = kfd_alloc_struct(kobj_stats); - if (!kobj_stats) - return -ENOMEM; + pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats); + if (!pdd->kobj_stats) + return; - ret = kobject_init_and_add(kobj_stats, - &procfs_stats_type, - p->kobj, - stats_dir_filename); + ret = kobject_init_and_add(pdd->kobj_stats, + &procfs_stats_type, + p->kobj, + stats_dir_filename); if (ret) { pr_warn("Creating KFD proc/stats_%s folder failed", - stats_dir_filename); - kobject_put(kobj_stats); - goto err; + stats_dir_filename); + kobject_put(pdd->kobj_stats); + pdd->kobj_stats = NULL; + return; } - pdd->kobj_stats = kobj_stats; - pdd->attr_evict.name = "evicted_ms"; - pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&pdd->attr_evict); - ret = sysfs_create_file(kobj_stats, &pdd->attr_evict); - if (ret) - pr_warn("Creating eviction stats for gpuid %d failed", - (int)pdd->dev->id); - + kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict, + "evicted_ms"); /* Add sysfs file to report compute unit occupancy */ - if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) { - pdd->attr_cu_occupancy.name = "cu_occupancy"; - pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&pdd->attr_cu_occupancy); - ret = sysfs_create_file(kobj_stats, - &pdd->attr_cu_occupancy); - if (ret) - pr_warn("Creating %s failed for gpuid: %d", - pdd->attr_cu_occupancy.name, - (int)pdd->dev->id); - } + if (pdd->dev->kfd2kgd->get_cu_occupancy) + kfd_sysfs_create_file(pdd->kobj_stats, + &pdd->attr_cu_occupancy, + "cu_occupancy"); } -err: - return ret; } - -static int kfd_procfs_add_sysfs_files(struct kfd_process *p) +static void kfd_procfs_add_sysfs_counters(struct kfd_process *p) { int ret = 0; int i; + char counters_dir_filename[MAX_SYSFS_FILENAME_LEN]; - if (!p) - return -EINVAL; + if (!p || !p->kobj) + return; - if (!p->kobj) - return -EFAULT; + /* + * Create sysfs files for each GPU which supports SVM + * - proc/<pid>/counters_<gpuid>/ + * - proc/<pid>/counters_<gpuid>/faults + * - proc/<pid>/counters_<gpuid>/page_in + * - proc/<pid>/counters_<gpuid>/page_out + */ + for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { + struct kfd_process_device *pdd = p->pdds[i]; + struct kobject *kobj_counters; + + snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN, + "counters_%u", pdd->dev->id); + kobj_counters = kfd_alloc_struct(kobj_counters); + if (!kobj_counters) + return; + + ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type, + p->kobj, counters_dir_filename); + if (ret) { + pr_warn("Creating KFD proc/%s folder failed", + counters_dir_filename); + kobject_put(kobj_counters); + return; + } + + pdd->kobj_counters = kobj_counters; + kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults, + "faults"); + kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in, + "page_in"); + kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out, + "page_out"); + } +} + +static void kfd_procfs_add_sysfs_files(struct kfd_process *p) +{ + int i; + + if (!p || !p->kobj) + return; /* * Create sysfs files for each GPU: @@ -589,20 +638,14 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p) snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u", pdd->dev->id); - ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename); - if (ret) - pr_warn("Creating vram usage for gpu id %d failed", - (int)pdd->dev->id); + kfd_sysfs_create_file(p->kobj, &pdd->attr_vram, + pdd->vram_filename); snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u", pdd->dev->id); - ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename); - if (ret) - pr_warn("Creating sdma usage for gpu id %d failed", - (int)pdd->dev->id); + kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma, + pdd->sdma_filename); } - - return ret; } void kfd_procfs_del_queue(struct queue *q) @@ -800,28 +843,17 @@ struct kfd_process *kfd_create_process(struct file *filep) goto out; } - process->attr_pasid.name = "pasid"; - process->attr_pasid.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&process->attr_pasid); - ret = sysfs_create_file(process->kobj, &process->attr_pasid); - if (ret) - pr_warn("Creating pasid for pid %d failed", - (int)process->lead_thread->pid); + kfd_sysfs_create_file(process->kobj, &process->attr_pasid, + "pasid"); process->kobj_queues = kobject_create_and_add("queues", process->kobj); if (!process->kobj_queues) pr_warn("Creating KFD proc/queues folder failed"); - ret = kfd_procfs_add_sysfs_stats(process); - if (ret) - pr_warn("Creating sysfs stats dir for pid %d failed", - (int)process->lead_thread->pid); - - ret = kfd_procfs_add_sysfs_files(process); - if (ret) - pr_warn("Creating sysfs usage file for pid %d failed", - (int)process->lead_thread->pid); + kfd_procfs_add_sysfs_stats(process); + kfd_procfs_add_sysfs_files(process); + kfd_procfs_add_sysfs_counters(process); } out: if (!IS_ERR(process)) @@ -964,6 +996,50 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) p->n_pdds = 0; } +static void kfd_process_remove_sysfs(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + int i; + + if (!p->kobj) + return; + + sysfs_remove_file(p->kobj, &p->attr_pasid); + kobject_del(p->kobj_queues); + kobject_put(p->kobj_queues); + p->kobj_queues = NULL; + + for (i = 0; i < p->n_pdds; i++) { + pdd = p->pdds[i]; + + sysfs_remove_file(p->kobj, &pdd->attr_vram); + sysfs_remove_file(p->kobj, &pdd->attr_sdma); + + sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict); + if (pdd->dev->kfd2kgd->get_cu_occupancy) + sysfs_remove_file(pdd->kobj_stats, + &pdd->attr_cu_occupancy); + kobject_del(pdd->kobj_stats); + kobject_put(pdd->kobj_stats); + pdd->kobj_stats = NULL; + } + + for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) { + pdd = p->pdds[i]; + + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults); + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in); + sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out); + kobject_del(pdd->kobj_counters); + kobject_put(pdd->kobj_counters); + pdd->kobj_counters = NULL; + } + + kobject_del(p->kobj); + kobject_put(p->kobj); + p->kobj = NULL; +} + /* No process locking is needed in this function, because the process * is not findable any more. We must assume that no other thread is * using it any more, otherwise we couldn't safely free the process @@ -973,33 +1049,7 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); - int i; - - /* Remove the procfs files */ - if (p->kobj) { - sysfs_remove_file(p->kobj, &p->attr_pasid); - kobject_del(p->kobj_queues); - kobject_put(p->kobj_queues); - p->kobj_queues = NULL; - - for (i = 0; i < p->n_pdds; i++) { - struct kfd_process_device *pdd = p->pdds[i]; - - sysfs_remove_file(p->kobj, &pdd->attr_vram); - sysfs_remove_file(p->kobj, &pdd->attr_sdma); - sysfs_remove_file(p->kobj, &pdd->attr_evict); - if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) - sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy); - kobject_del(pdd->kobj_stats); - kobject_put(pdd->kobj_stats); - pdd->kobj_stats = NULL; - } - - kobject_del(p->kobj); - kobject_put(p->kobj); - p->kobj = NULL; - } - + kfd_process_remove_sysfs(p); kfd_iommu_unbind_process(p); kfd_process_free_outstanding_kfd_bos(p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 95a6c36cea4c..243dd1efcdbf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -153,6 +153,7 @@ void pqm_uninit(struct process_queue_manager *pqm) if (pqn->q && pqn->q->gws) amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, pqn->q->gws); + kfd_procfs_del_queue(pqn->q); uninit_queue(pqn->q); list_del(&pqn->process_queue_list); kfree(pqn); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index dff1011dd7ee..9a71d8919bd6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -119,28 +119,40 @@ static void svm_range_remove_notifier(struct svm_range *prange) } static int -svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr, - unsigned long *hmm_pfns, uint64_t npages) +svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, + unsigned long *hmm_pfns, uint32_t gpuidx) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; - dma_addr_t *addr = *dma_addr; + dma_addr_t *addr = prange->dma_addr[gpuidx]; + struct device *dev = adev->dev; struct page *page; int i, r; if (!addr) { - addr = kvmalloc_array(npages, sizeof(*addr), + addr = kvmalloc_array(prange->npages, sizeof(*addr), GFP_KERNEL | __GFP_ZERO); if (!addr) return -ENOMEM; - *dma_addr = addr; + prange->dma_addr[gpuidx] = addr; } - for (i = 0; i < npages; i++) { + for (i = 0; i < prange->npages; i++) { if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), "leaking dma mapping\n")) dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); page = hmm_pfn_to_page(hmm_pfns[i]); + if (is_zone_device_page(page)) { + struct amdgpu_device *bo_adev = + amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + + addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + + bo_adev->vm_manager.vram_base_offset - + bo_adev->kfd.dev->pgmap.range.start; + addr[i] |= SVM_RANGE_VRAM_DOMAIN; + pr_debug("vram address detected: 0x%llx\n", addr[i]); + continue; + } addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); r = dma_mapping_error(dev, addr[i]); if (r) { @@ -175,8 +187,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } adev = (struct amdgpu_device *)pdd->dev->kgd; - r = svm_range_dma_map_dev(adev->dev, &prange->dma_addr[gpuidx], - hmm_pfns, prange->npages); + r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx); if (r) break; } @@ -301,14 +312,6 @@ static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo) return true; } -static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo) -{ - if (svm_bo) - kref_get(&svm_bo->kref); - - return svm_bo; -} - static void svm_range_bo_release(struct kref *kref) { struct svm_range_bo *svm_bo; @@ -347,7 +350,7 @@ static void svm_range_bo_release(struct kref *kref) kfree(svm_bo); } -static void svm_range_bo_unref(struct svm_range_bo *svm_bo) +void svm_range_bo_unref(struct svm_range_bo *svm_bo) { if (!svm_bo) return; @@ -564,6 +567,24 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) return (struct amdgpu_device *)pdd->dev->kgd; } +struct kfd_process_device * +svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev) +{ + struct kfd_process *p; + int32_t gpu_idx, gpuid; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + + r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx); + if (r) { + pr_debug("failed to get device id by adev %p\n", adev); + return NULL; + } + + return kfd_process_device_from_gpuidx(p, gpu_idx); +} + static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) { struct ttm_operation_ctx ctx = { false, false }; @@ -1002,21 +1023,22 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, } static uint64_t -svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) +svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, + int domain) { struct amdgpu_device *bo_adev; uint32_t flags = prange->flags; uint32_t mapping_flags = 0; uint64_t pte_flags; - bool snoop = !prange->ttm_res; + bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; - if (prange->svm_bo && prange->ttm_res) + if (domain == SVM_RANGE_VRAM_DOMAIN) bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); switch (adev->asic_type) { case CHIP_ARCTURUS: - if (prange->svm_bo && prange->ttm_res) { + if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_adev == adev) { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1032,7 +1054,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) } break; case CHIP_ALDEBARAN: - if (prange->svm_bo && prange->ttm_res) { + if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_adev == adev) { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1062,14 +1084,14 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; pte_flags = AMDGPU_PTE_VALID; - pte_flags |= prange->ttm_res ? 0 : AMDGPU_PTE_SYSTEM; + pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n", prange->svms, prange->start, prange->last, - prange->ttm_res ? 1:0, pte_flags, mapping_flags); + (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags); return pte_flags; } @@ -1140,31 +1162,41 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va bo_va; bool table_freed = false; uint64_t pte_flags; + unsigned long last_start; + int last_domain; int r = 0; + int64_t i; pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); - if (prange->svm_bo && prange->ttm_res) { + if (prange->svm_bo && prange->ttm_res) bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); - prange->mapping.bo_va = &bo_va; - } - prange->mapping.start = prange->start; - prange->mapping.last = prange->last; - prange->mapping.offset = prange->ttm_res ? prange->offset : 0; - pte_flags = svm_range_get_pte_flags(adev, prange); + last_start = prange->start; + for (i = 0; i < prange->npages; i++) { + last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; + dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; + if ((prange->start + i) < prange->last && + last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN)) + continue; - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, - prange->mapping.start, - prange->mapping.last, pte_flags, - prange->mapping.offset, - prange->ttm_res, - dma_addr, &vm->last_update, - &table_freed); - if (r) { - pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); - goto out; + pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", + last_start, prange->start + i, last_domain ? "GPU" : "CPU"); + pte_flags = svm_range_get_pte_flags(adev, prange, last_domain); + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, + last_start, + prange->start + i, pte_flags, + last_start - prange->start, + NULL, + dma_addr, + &vm->last_update, + &table_freed); + if (r) { + pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start); + goto out; + } + last_start = prange->start + i + 1; } r = amdgpu_vm_update_pdes(adev, vm, false); @@ -1185,7 +1217,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, p->pasid, TLB_FLUSH_LEGACY); } out: - prange->mapping.bo_va = NULL; return r; } @@ -1319,6 +1350,17 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx) ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list); } +static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx) +{ + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + adev = (struct amdgpu_device *)pdd->dev->kgd; + + return SVM_ADEV_PGMAP_OWNER(adev); +} + /* * Validation+GPU mapping with concurrent invalidation (MMU notifiers) * @@ -1349,6 +1391,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm, { struct svm_validate_context ctx; struct hmm_range *hmm_range; + struct kfd_process *p; + void *owner; + int32_t idx; int r = 0; ctx.process = container_of(prange->svms, struct kfd_process, svms); @@ -1394,33 +1439,38 @@ static int svm_range_validate_and_map(struct mm_struct *mm, svm_range_reserve_bos(&ctx); - if (!prange->actual_loc) { - r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, - prange->start << PAGE_SHIFT, - prange->npages, &hmm_range, - false, true); - if (r) { - pr_debug("failed %d to get svm range pages\n", r); - goto unreserve_out; - } - - r = svm_range_dma_map(prange, ctx.bitmap, - hmm_range->hmm_pfns); - if (r) { - pr_debug("failed %d to dma map range\n", r); - goto unreserve_out; + p = container_of(prange->svms, struct kfd_process, svms); + owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap, + MAX_GPU_INSTANCE)); + for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) { + if (kfd_svm_page_owner(p, idx) != owner) { + owner = NULL; + break; } + } + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, &hmm_range, + false, true, owner); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + goto unreserve_out; + } - prange->validated_once = true; + r = svm_range_dma_map(prange, ctx.bitmap, + hmm_range->hmm_pfns); + if (r) { + pr_debug("failed %d to dma map range\n", r); + goto unreserve_out; } + prange->validated_once = true; + svm_range_lock(prange); - if (!prange->actual_loc) { - if (amdgpu_hmm_range_get_pages_done(hmm_range)) { - pr_debug("hmm update the range, need validate again\n"); - r = -EAGAIN; - goto unlock_out; - } + if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + pr_debug("hmm update the range, need validate again\n"); + r = -EAGAIN; + goto unlock_out; } if (!list_empty(&prange->child_list)) { pr_debug("range split by unmap in parallel, validate again\n"); @@ -1572,6 +1622,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, unsigned long start, unsigned long last) { struct svm_range_list *svms = prange->svms; + struct svm_range *pchild; struct kfd_process *p; int r = 0; @@ -1583,7 +1634,19 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, if (!p->xnack_enabled) { int evicted_ranges; - atomic_inc(&prange->invalid); + list_for_each_entry(pchild, &prange->child_list, child_list) { + mutex_lock_nested(&pchild->lock, 1); + if (pchild->start <= last && pchild->last >= start) { + pr_debug("increment pchild invalid [0x%lx 0x%lx]\n", + pchild->start, pchild->last); + atomic_inc(&pchild->invalid); + } + mutex_unlock(&pchild->lock); + } + + if (prange->start <= last && prange->last >= start) + atomic_inc(&prange->invalid); + evicted_ranges = atomic_inc_return(&svms->evicted_ranges); if (evicted_ranges != 1) return r; @@ -1600,7 +1663,6 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, schedule_delayed_work(&svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } else { - struct svm_range *pchild; unsigned long s, l; pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", @@ -2311,6 +2373,27 @@ static bool svm_range_skip_recover(struct svm_range *prange) return false; } +static void +svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, + struct svm_range *prange, int32_t gpuidx) +{ + struct kfd_process_device *pdd; + + if (gpuidx == MAX_GPU_INSTANCE) + /* fault is on different page of same range + * or fault is skipped to recover later + */ + pdd = svm_range_get_pdd_by_adev(prange, adev); + else + /* fault recovered + * or fault cannot recover because GPU no access on the range + */ + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + + if (pdd) + WRITE_ONCE(pdd->faults, pdd->faults + 1); +} + int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr) @@ -2320,7 +2403,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct svm_range *prange; struct kfd_process *p; uint64_t timestamp; - int32_t best_loc, gpuidx; + int32_t best_loc; + int32_t gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; int r = 0; @@ -2440,6 +2524,9 @@ out_unlock_range: out_unlock_svms: mutex_unlock(&svms->lock); mmap_read_unlock(mm); + + svm_range_count_fault(adev, p, prange, gpuidx); + mmput(mm); out: kfd_unref_process(p); @@ -2662,7 +2749,8 @@ out: /* FIXME: This is a workaround for page locking bug when some pages are * invalid during migration to VRAM */ -void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm) +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm, + void *owner) { struct hmm_range *hmm_range; int r; @@ -2673,7 +2761,7 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm) r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, prange->start << PAGE_SHIFT, prange->npages, &hmm_range, - false, true); + false, true, owner); if (!r) { amdgpu_hmm_range_get_pages_done(hmm_range); prange->validated_once = true; @@ -2718,16 +2806,6 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, best_loc == prange->actual_loc) return 0; - /* - * Prefetch to GPU without host access flag, set actual_loc to gpu, then - * validate on gpu and map to gpus will be handled afterwards. - */ - if (best_loc && !prange->actual_loc && - !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) { - prange->actual_loc = best_loc; - return 0; - } - if (!best_loc) { r = svm_migrate_vram_to_ram(prange, mm); *migrated = !r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 0c0fc399395e..3fc1fd8b4fbc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -35,6 +35,10 @@ #include "amdgpu.h" #include "kfd_priv.h" +#define SVM_RANGE_VRAM_DOMAIN (1UL << 0) +#define SVM_ADEV_PGMAP_OWNER(adev)\ + ((adev)->hive ? (void *)(adev)->hive : (void *)(adev)) + struct svm_range_bo { struct amdgpu_bo *bo; struct kref kref; @@ -110,7 +114,6 @@ struct svm_range { struct list_head update_list; struct list_head remove_list; struct list_head insert_list; - struct amdgpu_bo_va_mapping mapping; uint64_t npages; dma_addr_t *dma_addr[MAX_GPU_INSTANCE]; struct ttm_resource *ttm_res; @@ -147,6 +150,14 @@ static inline void svm_range_unlock(struct svm_range *prange) mutex_unlock(&prange->lock); } +static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo) +{ + if (svm_bo) + kref_get(&svm_bo->kref); + + return svm_bo; +} + int svm_range_list_init(struct kfd_process *p); void svm_range_list_fini(struct kfd_process *p); int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, @@ -173,13 +184,17 @@ void schedule_deferred_list_work(struct svm_range_list *svms); void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages); void svm_range_free_dma_mappings(struct svm_range *prange); -void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm, + void *owner); +struct kfd_process_device * +svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); /* SVM API and HMM page migration work together, device memory type * is initialized to not 0 when page migration register device memory. */ #define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0) +void svm_range_bo_unref(struct svm_range_bo *svm_bo); #else struct kfd_process; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b5b5ccf0ed71..01e1062dc235 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1160,6 +1160,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_feature_mask & DC_DISABLE_FRACTIONAL_PWM_MASK) init_data.flags.disable_fractional_pwm = true; + if (amdgpu_dc_feature_mask & DC_EDP_NO_POWER_SEQUENCING) + init_data.flags.edp_no_power_sequencing = true; + init_data.flags.power_down_display_on_boot = true; INIT_LIST_HEAD(&adev->dm.da_list); @@ -10118,7 +10121,7 @@ static int validate_overlay(struct drm_atomic_state *state) int i; struct drm_plane *plane; struct drm_plane_state *new_plane_state; - struct drm_plane_state *primary_state, *cursor_state, *overlay_state = NULL; + struct drm_plane_state *primary_state, *overlay_state = NULL; /* Check if primary plane is contained inside overlay */ for_each_new_plane_in_state_reverse(state, plane, new_plane_state, i) { @@ -10148,14 +10151,6 @@ static int validate_overlay(struct drm_atomic_state *state) if (!primary_state->crtc) return 0; - /* check if cursor plane is enabled */ - cursor_state = drm_atomic_get_plane_state(state, overlay_state->crtc->cursor); - if (IS_ERR(cursor_state)) - return PTR_ERR(cursor_state); - - if (drm_atomic_plane_disabling(plane->state, cursor_state)) - return 0; - /* Perform the bounds check to ensure the overlay plane covers the primary */ if (primary_state->crtc_x < overlay_state->crtc_x || primary_state->crtc_y < overlay_state->crtc_y || diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5101a4f8f69f..45640f1c26c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -297,6 +297,7 @@ struct dc_config { bool allow_seamless_boot_optimization; bool power_down_display_on_boot; bool edp_not_connected; + bool edp_no_power_sequencing; bool force_enum_edp; bool forced_clocks; bool allow_lttpr_non_transparent_mode; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 2938caaa2299..62d595ded866 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1022,8 +1022,20 @@ void dce110_edp_backlight_control( /* dc_service_sleep_in_milliseconds(50); */ /*edp 1.2*/ panel_instance = link->panel_cntl->inst; - if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) - edp_receiver_ready_T7(link); + + if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_ON) { + if (!link->dc->config.edp_no_power_sequencing) + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T7-ready. + */ + edp_receiver_ready_T7(link); + else + DC_LOG_DC("edp_receiver_ready_T7 skipped\n"); + } if (ctx->dc->ctx->dmub_srv && ctx->dc->debug.dmub_command_table) { @@ -1048,8 +1060,19 @@ void dce110_edp_backlight_control( dc_link_backlight_enable_aux(link, enable); /*edp 1.2*/ - if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF) - edp_add_delay_for_T9(link); + if (cntl.action == TRANSMITTER_CONTROL_BACKLIGHT_OFF) { + if (!link->dc->config.edp_no_power_sequencing) + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T9-ready. + */ + edp_add_delay_for_T9(link); + else + DC_LOG_DC("edp_receiver_ready_T9 skipped\n"); + } if (!enable && link->dpcd_sink_ext_caps.bits.oled) msleep(OLED_PRE_T11_DELAY); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index 5dcdc5a858fe..4bab97acb155 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -28,6 +28,7 @@ endif CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float endif +ifdef CONFIG_X86 ifdef IS_OLD_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -36,6 +37,7 @@ CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4 else CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2 endif +endif AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index d34024fd798a..45862167e6ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -50,6 +50,10 @@ dml_ccflags += -msse2 endif endif +ifneq ($(CONFIG_FRAME_WARN),0) +frame_warn_flag := -Wframe-larger-than=2048 +endif + CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) ifdef CONFIG_DRM_AMD_DC_DCN @@ -60,9 +64,9 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048 +CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) -Wframe-larger-than=2048 +CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c index 5f245bde54ff..a2a4fbeb83f8 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c @@ -119,7 +119,7 @@ bool dal_irq_service_set( dal_irq_service_ack(irq_service, source); - if (info->funcs->set) + if (info->funcs && info->funcs->set) return info->funcs->set(irq_service, info, enable); dal_irq_service_set_generic(irq_service, info, enable); @@ -153,7 +153,7 @@ bool dal_irq_service_ack( return false; } - if (info->funcs->ack) + if (info->funcs && info->funcs->ack) return info->funcs->ack(irq_service, info); dal_irq_service_ack_generic(irq_service, info); diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index 5f9346622301..1139b9eb9f6f 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -165,7 +165,7 @@ enum irq_type }; #define DAL_VALID_IRQ_SRC_NUM(src) \ - ((src) <= DAL_IRQ_SOURCES_NUMBER && (src) > DC_IRQ_SOURCE_INVALID) + ((src) < DAL_IRQ_SOURCES_NUMBER && (src) > DC_IRQ_SOURCE_INVALID) /* Number of Page Flip IRQ Sources. */ #define DAL_PFLIP_IRQ_SRC_NUM \ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 8c886ece71f6..973de346410d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -352,3 +352,63 @@ uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub) { return REG_READ(DMCUB_TIMER_CURRENT); } + +void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +{ + uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; + uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + + if (!dmub || !diag_data) + return; + + memset(diag_data, 0, sizeof(*diag_data)); + + diag_data->dmcub_version = dmub->fw_version; + + diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); + diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); + diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); + diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); + diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); + diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); + diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); + diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); + diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); + diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); + diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); + diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); + diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); + diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); + diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); + diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); + + diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); + diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); + diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); + + diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); + diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); + diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); + + diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); + diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); + diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); + diag_data->is_dmcub_enabled = is_dmub_enabled; + + REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); + diag_data->is_dmcub_soft_reset = is_soft_reset; + + REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); + diag_data->is_dmcub_secure_reset = is_sec_reset; + + REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); + diag_data->is_traceport_en = is_traceport_enabled; + + REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); + diag_data->is_cw0_enabled = is_cw0_enabled; + + REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); + diag_data->is_cw6_enabled = is_cw6_enabled; +} diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h index 2829c3e9a310..9456a6a2d518 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h @@ -36,6 +36,9 @@ struct dmub_srv; DMUB_SR(DMCUB_CNTL) \ DMUB_SR(DMCUB_CNTL2) \ DMUB_SR(DMCUB_SEC_CNTL) \ + DMUB_SR(DMCUB_INBOX0_SIZE) \ + DMUB_SR(DMCUB_INBOX0_RPTR) \ + DMUB_SR(DMCUB_INBOX0_WPTR) \ DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \ DMUB_SR(DMCUB_INBOX1_SIZE) \ DMUB_SR(DMCUB_INBOX1_RPTR) \ @@ -103,11 +106,15 @@ struct dmub_srv; DMUB_SR(DMCUB_SCRATCH14) \ DMUB_SR(DMCUB_SCRATCH15) \ DMUB_SR(DMCUB_GPINT_DATAIN1) \ + DMUB_SR(DMCUB_GPINT_DATAOUT) \ DMUB_SR(CC_DC_PIPE_DIS) \ DMUB_SR(MMHUBBUB_SOFT_RESET) \ DMUB_SR(DCN_VM_FB_LOCATION_BASE) \ DMUB_SR(DCN_VM_FB_OFFSET) \ - DMUB_SR(DMCUB_TIMER_CURRENT) + DMUB_SR(DMCUB_TIMER_CURRENT) \ + DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \ + DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \ + DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR) #define DMUB_DCN31_FIELDS() \ DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ @@ -115,6 +122,7 @@ struct dmub_srv; DMUB_SF(DMCUB_CNTL2, DMCUB_SOFT_RESET) \ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \ DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \ + DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS) \ DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \ DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \ DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \ @@ -138,11 +146,13 @@ struct dmub_srv; DMUB_SF(CC_DC_PIPE_DIS, DC_DMCUB_ENABLE) \ DMUB_SF(MMHUBBUB_SOFT_RESET, DMUIF_SOFT_RESET) \ DMUB_SF(DCN_VM_FB_LOCATION_BASE, FB_BASE) \ - DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) + DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \ + DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) struct dmub_srv_dcn31_reg_offset { #define DMUB_SR(reg) uint32_t reg; DMUB_DCN31_REGS() + DMCUB_INTERNAL_REGS() #undef DMUB_SR }; @@ -227,4 +237,6 @@ void dmub_dcn31_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset); uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub); +void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); + #endif /* _DMUB_DCN31_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index fd7e996ab1d7..2bdbd7406f56 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -208,6 +208,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) break; case DMUB_ASIC_DCN31: + dmub->regs_dcn31 = &dmub_srv_dcn31_regs; funcs->reset = dmub_dcn31_reset; funcs->reset_release = dmub_dcn31_reset_release; funcs->backdoor_load = dmub_dcn31_backdoor_load; @@ -231,9 +232,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->get_outbox0_wptr = dmub_dcn31_get_outbox0_wptr; funcs->set_outbox0_rptr = dmub_dcn31_set_outbox0_rptr; - if (asic == DMUB_ASIC_DCN31) { - dmub->regs_dcn31 = &dmub_srv_dcn31_regs; - } + funcs->get_diagnostic_data = dmub_dcn31_get_diagnostic_data; funcs->get_current_time = dmub_dcn31_get_current_time; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 332b0df53e52..ff1d3d4a6488 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -223,10 +223,12 @@ enum amd_harvest_ip_mask { }; enum DC_FEATURE_MASK { - DC_FBC_MASK = 0x1, - DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2, - DC_DISABLE_FRACTIONAL_PWM_MASK = 0x4, - DC_PSR_MASK = 0x8, + //Default value can be found at "uint amdgpu_dc_feature_mask" + DC_FBC_MASK = (1 << 0), //0x1, disabled by default + DC_MULTI_MON_PP_MCLK_SWITCH_MASK = (1 << 1), //0x2, enabled by default + DC_DISABLE_FRACTIONAL_PWM_MASK = (1 << 2), //0x4, disabled by default + DC_PSR_MASK = (1 << 3), //0x8, disabled by default + DC_EDP_NO_POWER_SEQUENCING = (1 << 4), //0x10, disabled by default }; enum DC_DEBUG_MASK { diff --git a/drivers/gpu/drm/amd/include/navi10_enum.h b/drivers/gpu/drm/amd/include/navi10_enum.h index d5ead9680c6e..84bcb96f76ea 100644 --- a/drivers/gpu/drm/amd/include/navi10_enum.h +++ b/drivers/gpu/drm/amd/include/navi10_enum.h @@ -430,7 +430,7 @@ ARRAY_2D_DEPTH = 0x00000001, */ typedef enum ENUM_NUM_SIMD_PER_CU { -NUM_SIMD_PER_CU = 0x00000004, +NUM_SIMD_PER_CU = 0x00000002, } ENUM_NUM_SIMD_PER_CU; /* diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index a276ebad47e6..769f58d5ae1a 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2902,14 +2902,15 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, return sprintf(buf, "%i\n", 0); } -static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, - struct device_attribute *attr, - char *buf) + +static ssize_t amdgpu_hwmon_show_power_cap_generic(struct device *dev, + struct device_attribute *attr, + char *buf, + enum pp_power_limit_level pp_limit_level) { struct amdgpu_device *adev = dev_get_drvdata(dev); const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; enum pp_power_type power_type = to_sensor_dev_attr(attr)->index; - enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_MAX; uint32_t limit; ssize_t size; int r; @@ -2919,17 +2920,17 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, if (adev->in_suspend && !adev->in_runpm) return -EPERM; + if ( !(pp_funcs && pp_funcs->get_power_limit)) + return -ENODATA; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; } - if (pp_funcs && pp_funcs->get_power_limit) - r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, - pp_limit_level, power_type); - else - r = -ENODATA; + r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, + pp_limit_level, power_type); if (!r) size = sysfs_emit(buf, "%u\n", limit * 1000000); @@ -2942,85 +2943,31 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, return size; } -static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, + +static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, struct device_attribute *attr, char *buf) { - struct amdgpu_device *adev = dev_get_drvdata(dev); - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - enum pp_power_type power_type = to_sensor_dev_attr(attr)->index; - enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_CURRENT; - uint32_t limit; - ssize_t size; - int r; - - if (amdgpu_in_reset(adev)) - return -EPERM; - if (adev->in_suspend && !adev->in_runpm) - return -EPERM; - - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - if (r < 0) { - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } - - if (pp_funcs && pp_funcs->get_power_limit) - r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, - pp_limit_level, power_type); - else - r = -ENODATA; + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MAX); - if (!r) - size = sysfs_emit(buf, "%u\n", limit * 1000000); - else - size = sysfs_emit(buf, "\n"); +} - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); +static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_CURRENT); - return size; } static ssize_t amdgpu_hwmon_show_power_cap_default(struct device *dev, struct device_attribute *attr, char *buf) { - struct amdgpu_device *adev = dev_get_drvdata(dev); - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - enum pp_power_type power_type = to_sensor_dev_attr(attr)->index; - enum pp_power_limit_level pp_limit_level = PP_PWR_LIMIT_DEFAULT; - uint32_t limit; - ssize_t size; - int r; + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_DEFAULT); - if (amdgpu_in_reset(adev)) - return -EPERM; - if (adev->in_suspend && !adev->in_runpm) - return -EPERM; - - r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - if (r < 0) { - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } - - if (pp_funcs && pp_funcs->get_power_limit) - r = pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, - pp_limit_level, power_type); - else - r = -ENODATA; - - if (!r) - size = sysfs_emit(buf, "%u\n", limit * 1000000); - else - size = sysfs_emit(buf, "\n"); - - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - - return size; } + static ssize_t amdgpu_hwmon_show_power_label(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index cb375f1beebd..ebe672142808 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1453,10 +1453,14 @@ static int smu_hw_fini(void *handle) if (smu->is_apu) { smu_powergate_sdma(&adev->smu, true); - smu_dpm_set_vcn_enable(smu, false); - smu_dpm_set_jpeg_enable(smu, false); } + smu_dpm_set_vcn_enable(smu, false); + smu_dpm_set_jpeg_enable(smu, false); + + adev->vcn.cur_state = AMD_PG_STATE_GATE; + adev->jpeg.cur_state = AMD_PG_STATE_GATE; + if (!smu->pm_enabled) return 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 7664334d8144..18a1ffdca227 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -189,10 +189,11 @@ err0_out: static int yellow_carp_system_features_control(struct smu_context *smu, bool en) { struct smu_feature *feature = &smu->smu_feature; + struct amdgpu_device *adev = smu->adev; uint32_t feature_mask[2]; int ret = 0; - if (!en) + if (!en && !adev->in_s0ix) ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); bitmap_zero(feature->enabled, feature->feature_num); diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index ebe9dccf2d83..0b8648396fb2 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -352,6 +352,7 @@ static struct drm_framebuffer *psb_user_framebuffer_create const struct drm_mode_fb_cmd2 *cmd) { struct drm_gem_object *obj; + struct drm_framebuffer *fb; /* * Find the GEM object and thus the gtt range object that is @@ -362,7 +363,11 @@ static struct drm_framebuffer *psb_user_framebuffer_create return ERR_PTR(-ENOENT); /* Let the core code do all the work */ - return psb_framebuffer_create(dev, cmd, obj); + fb = psb_framebuffer_create(dev, cmd, obj); + if (IS_ERR(fb)) + drm_gem_object_put(obj); + + return fb; } static int psbfb_probe(struct drm_fb_helper *fb_helper, diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 390869bd6b63..be716b56e8e0 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1791,10 +1791,23 @@ static struct intel_shared_dpll *dg1_ddi_get_pll(struct intel_encoder *encoder) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); enum phy phy = intel_port_to_phy(i915, encoder->port); + enum intel_dpll_id id; + u32 val; - return _cnl_ddi_get_pll(i915, DG1_DPCLKA_CFGCR0(phy), - DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy), - DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)); + val = intel_de_read(i915, DG1_DPCLKA_CFGCR0(phy)); + val &= DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy); + val >>= DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy); + id = val; + + /* + * _DG1_DPCLKA0_CFGCR0 maps between DPLL 0 and 1 with one bit for phy A + * and B while _DG1_DPCLKA1_CFGCR0 maps between DPLL 2 and 3 with one + * bit for phy C and D. + */ + if (phy >= PHY_C) + id += DPLL_ID_DG1_DPLL2; + + return intel_get_shared_dpll_by_id(i915, id); } static void icl_ddi_combo_enable_clock(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 5c9222283044..6cc03b9e4321 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2868,7 +2868,7 @@ static int intel_dp_vsc_sdp_unpack(struct drm_dp_vsc_sdp *vsc, if (size < sizeof(struct dp_sdp)) return -EINVAL; - memset(vsc, 0, size); + memset(vsc, 0, sizeof(*vsc)); if (sdp->sdp_header.HB0 != 0) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 9ceddfbb1687..7f03df236613 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1279,7 +1279,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; /* Waiting to drain ELSP? */ - synchronize_hardirq(to_pci_dev(engine->i915->drm.dev)->irq); + intel_synchronize_hardirq(engine->i915); intel_engine_flush_submission(engine); /* ELSP is empty, but there are ready requests? E.g. after reset */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 0c423f096e2b..37d74d4ed59b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -184,8 +184,11 @@ static int xcs_resume(struct intel_engine_cs *engine) ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n", ring->head, ring->tail); - /* Double check the ring is empty & disabled before we resume */ - synchronize_hardirq(engine->i915->drm.irq); + /* + * Double check the ring is empty & disabled before we resume. Called + * from atomic context during PCI probe, so _hardirq(). + */ + intel_synchronize_hardirq(engine->i915); if (!stop_ring(engine)) goto err; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 850b499c71c8..73de45472f60 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -42,7 +42,6 @@ #include <drm/drm_aperture.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_ioctl.h> -#include <drm/drm_irq.h> #include <drm/drm_managed.h> #include <drm/drm_probe_helper.h> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a11bdb667241..c03943198089 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -33,7 +33,6 @@ #include <linux/sysrq.h> #include <drm/drm_drv.h> -#include <drm/drm_irq.h> #include "display/intel_de.h" #include "display/intel_display_types.h" @@ -4564,10 +4563,6 @@ void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv) bool intel_irqs_enabled(struct drm_i915_private *dev_priv) { - /* - * We only use drm_irq_uninstall() at unload and VT switch, so - * this is the only thing we need to check. - */ return dev_priv->runtime_pm.irqs_enabled; } @@ -4575,3 +4570,8 @@ void intel_synchronize_irq(struct drm_i915_private *i915) { synchronize_irq(to_pci_dev(i915->drm.dev)->irq); } + +void intel_synchronize_hardirq(struct drm_i915_private *i915) +{ + synchronize_hardirq(to_pci_dev(i915->drm.dev)->irq); +} diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index db34d5dbe402..e43b6734f21b 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -94,6 +94,7 @@ void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); bool intel_irqs_enabled(struct drm_i915_private *dev_priv); void intel_synchronize_irq(struct drm_i915_private *i915); +void intel_synchronize_hardirq(struct drm_i915_private *i915); int intel_get_crtc_scanline(struct intel_crtc *crtc); void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e915ec034c98..94fde5ca26ae 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -10513,7 +10513,6 @@ enum skl_power_gate { #define _DG1_DPCLKA1_CFGCR0 0x16C280 #define _DG1_DPCLKA_PHY_IDX(phy) ((phy) % 2) #define _DG1_DPCLKA_PLL_IDX(pll) ((pll) % 2) -#define _DG1_PHY_DPLL_MAP(phy) ((phy) >= PHY_C ? DPLL_ID_DG1_DPLL2 : DPLL_ID_DG1_DPLL0) #define DG1_DPCLKA_CFGCR0(phy) _MMIO_PHY((phy) / 2, \ _DG1_DPCLKA_CFGCR0, \ _DG1_DPCLKA1_CFGCR0) @@ -10521,8 +10520,6 @@ enum skl_power_gate { #define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy) (_DG1_DPCLKA_PHY_IDX(phy) * 2) #define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL(pll, phy) (_DG1_DPCLKA_PLL_IDX(pll) << DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) #define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(phy) (0x3 << DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) -#define DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_DPLL_MAP(clk_sel, phy) \ - (((clk_sel) >> DG1_DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(phy)) + _DG1_PHY_DPLL_MAP(phy)) /* ADLS Clocks */ #define _ADLS_DPCLKA_CFGCR0 0x164280 diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 406681317419..573154268d43 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1325,6 +1325,7 @@ radeon_user_framebuffer_create(struct drm_device *dev, /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */ if (obj->import_attach) { DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n"); + drm_gem_object_put(obj); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 8cd135fa6dcd..5c23b77cb81a 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -374,13 +374,13 @@ radeon_pci_shutdown(struct pci_dev *pdev) if (radeon_device_is_virtual()) radeon_pci_remove(pdev); -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) || defined(CONFIG_MACH_LOONGSON64) /* * Some adapters need to be suspended before a * shutdown occurs in order to prevent an error - * during kexec. - * Make this power specific becauase it breaks - * some non-power boards. + * during kexec, shutdown or reboot. + * Make this power and Loongson specific because + * it breaks some other boards. */ radeon_suspend_kms(pci_get_drvdata(pdev), true, true, false); #endif diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index bfaaa3c969a3..56ede9d63b12 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo); * function are calling it. */ -static void radeon_update_memory_usage(struct radeon_bo *bo, - unsigned mem_type, int sign) +static void radeon_update_memory_usage(struct ttm_buffer_object *bo, + unsigned int mem_type, int sign) { - struct radeon_device *rdev = bo->rdev; + struct radeon_device *rdev = radeon_get_rdev(bo->bdev); switch (mem_type) { case TTM_PL_TT: if (sign > 0) - atomic64_add(bo->tbo.base.size, &rdev->gtt_usage); + atomic64_add(bo->base.size, &rdev->gtt_usage); else - atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage); + atomic64_sub(bo->base.size, &rdev->gtt_usage); break; case TTM_PL_VRAM: if (sign > 0) - atomic64_add(bo->tbo.base.size, &rdev->vram_usage); + atomic64_add(bo->base.size, &rdev->vram_usage); else - atomic64_sub(bo->tbo.base.size, &rdev->vram_usage); + atomic64_sub(bo->base.size, &rdev->vram_usage); break; } } @@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); - radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1); - mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); mutex_unlock(&bo->rdev->gem.mutex); @@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, } void radeon_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, + unsigned int old_type, struct ttm_resource *new_mem) { struct radeon_bo *rbo; + radeon_update_memory_usage(bo, old_type, -1); + if (new_mem) + radeon_update_memory_usage(bo, new_mem->mem_type, 1); + if (!radeon_ttm_bo_is_radeon_bo(bo)) return; rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 1); radeon_vm_bo_invalidate(rbo->rdev, rbo); - - /* update statistics */ - if (!new_mem) - return; - - radeon_update_memory_usage(rbo, bo->resource->mem_type, -1); - radeon_update_memory_usage(rbo, new_mem->mem_type, 1); } vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 1739c6a142cd..1afc7992ef91 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo, extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, bool force_drop); extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, + unsigned int old_type, struct ttm_resource *new_mem); extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index ad2a5a791bba..a06d4cc2fb1c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, struct ttm_resource *old_mem = bo->resource; struct radeon_device *rdev; struct radeon_bo *rbo; - int r; + int r, old_type; if (new_mem->mem_type == TTM_PL_TT) { r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem); @@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, if (WARN_ON_ONCE(rbo->tbo.pin_count > 0)) return -EINVAL; + /* Save old type for statistics update */ + old_type = old_mem->mem_type; + rdev = radeon_get_rdev(bo->bdev); if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { ttm_bo_move_null(bo, new_mem); @@ -261,7 +264,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, out: /* update statistics */ atomic64_add(bo->base.size, &rdev->num_bytes_moved); - radeon_bo_move_notify(bo, evict, new_mem); + radeon_bo_move_notify(bo, old_type, new_mem); return 0; } @@ -682,7 +685,11 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev, static void radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - radeon_bo_move_notify(bo, false, NULL); + unsigned int old_type = TTM_PL_SYSTEM; + + if (bo->resource) + old_type = bo->resource->mem_type; + radeon_bo_move_notify(bo, old_type, NULL); } static struct ttm_device_funcs radeon_bo_driver = { |