diff options
Diffstat (limited to 'drivers/gpu/drm/msm/adreno/a6xx_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 117 |
1 files changed, 57 insertions, 60 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index fdc578016e0b..36c8fb699b56 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, submit->seqno); trace_msm_gpu_submit_flush(submit, - gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, - REG_A6XX_CP_ALWAYS_ON_COUNTER_HI)); + gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO)); a6xx_flush(gpu, ring); } @@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu) } } - gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, - REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova); + gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); return 0; } @@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu) * memory rendering at this point in time and we don't want to block off * part of the virtual memory space. */ - gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, - REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); + gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); /* Turn on 64 bit addressing for all blocks */ @@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu) if (!adreno_is_a650_family(adreno_gpu)) { /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ - gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, - REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000); + gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000); gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO, - REG_A6XX_UCHE_GMEM_RANGE_MAX_HI, 0x00100000 + adreno_gpu->gmem - 1); } @@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu) goto out; /* Set the ringbuffer address */ - gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI, - gpu->rb[0]->iova); + gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); /* Targets that support extended APRIV can use the RPTR shadow from * hardware but all the other ones need to disable the feature. Targets @@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu) } gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO, - REG_A6XX_CP_RB_RPTR_ADDR_HI, shadowptr(a6xx_gpu, gpu->rb[0])); } @@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) gpu_read(gpu, REG_A6XX_RBBM_STATUS), gpu_read(gpu, REG_A6XX_CP_RB_RPTR), gpu_read(gpu, REG_A6XX_CP_RB_WPTR), - gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI), + gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), - gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI), + gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE)); /* Turn off the hangcheck timer to keep it from bothering us */ @@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) /* Force the GPU power on so we can read this register */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); - *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO, - REG_A6XX_CP_ALWAYS_ON_COUNTER_HI); + *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO); a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); @@ -1786,43 +1778,16 @@ a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct iommu_domain *iommu; - struct msm_mmu *mmu; - struct msm_gem_address_space *aspace; - u64 start, size; - - iommu = iommu_domain_alloc(&platform_bus_type); - if (!iommu) - return NULL; + unsigned long quirks = 0; /* * This allows GPU to set the bus attributes required to use system * cache on behalf of the iommu page table walker. */ if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) - adreno_set_llc_attributes(iommu); - - mmu = msm_iommu_new(&pdev->dev, iommu); - if (IS_ERR(mmu)) { - iommu_domain_free(iommu); - return ERR_CAST(mmu); - } - - /* - * Use the aperture start or SZ_16M, whichever is greater. This will - * ensure that we align with the allocated pagetable range while still - * allowing room in the lower 32 bits for GMEM and whatnot - */ - start = max_t(u64, SZ_16M, iommu->geometry.aperture_start); - size = iommu->geometry.aperture_end - start + 1; - - aspace = msm_gem_address_space_create(mmu, "gpu", - start & GENMASK_ULL(48, 0), size); + quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; - if (IS_ERR(aspace) && !IS_ERR(mmu)) - mmu->funcs->destroy(mmu); - - return aspace; + return adreno_iommu_create_address_space(gpu, pdev, quirks); } static struct msm_gem_address_space * @@ -1851,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR); } +static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct msm_cp_state cp_state = { + .ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE), + .ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE), + .ib1_rem = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE), + .ib2_rem = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE), + }; + bool progress; + + /* + * Adjust the remaining data to account for what has already been + * fetched from memory, but not yet consumed by the SQE. + * + * This is not *technically* correct, the amount buffered could + * exceed the IB size due to hw prefetching ahead, but: + * + * (1) We aren't trying to find the exact position, just whether + * progress has been made + * (2) The CP_REG_TO_MEM at the end of a submit should be enough + * to prevent prefetching into an unrelated submit. (And + * either way, at some point the ROQ will be full.) + */ + cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16; + cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16; + + progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state)); + + ring->last_cp_state = cp_state; + + return progress; +} + static u32 a618_get_speed_bin(u32 fuse) { if (fuse == 0) @@ -1906,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) if (val == UINT_MAX) { DRM_DEV_ERROR(dev, - "missing support for speed-bin: %u. Some OPPs may not be supported by hardware", + "missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n", fuse); return UINT_MAX; } @@ -1916,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse) static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) { - u32 supp_hw = UINT_MAX; + u32 supp_hw; u32 speedbin; int ret; @@ -1928,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev) if (ret == -ENOENT) { return 0; } else if (ret) { - DRM_DEV_ERROR(dev, - "failed to read speed-bin (%d). Some OPPs may not be supported by hardware", - ret); - goto done; + dev_err_probe(dev, ret, + "failed to read speed-bin. Some OPPs may not be supported by hardware\n"); + return ret; } supp_hw = fuse_to_supp_hw(dev, rev, speedbin); -done: ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1); if (ret) return ret; @@ -1969,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = { .create_address_space = a6xx_create_address_space, .create_private_address_space = a6xx_create_private_address_space, .get_rptr = a6xx_get_rptr, + .progress = a6xx_progress, }, .get_timestamp = a6xx_get_timestamp, }; @@ -2005,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev))) adreno_gpu->base.hw_apriv = true; - /* - * For now only clamp to idle freq for devices where this is known not - * to cause power supply issues: - */ - if (info && (info->revn == 618)) - gpu->clamp_to_idle = true; - a6xx_llc_slices_init(pdev, a6xx_gpu); ret = a6xx_set_supported_hw(&pdev->dev, config->rev); @@ -2026,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) return ERR_PTR(ret); } + /* + * For now only clamp to idle freq for devices where this is known not + * to cause power supply issues: + */ + if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu)) + gpu->clamp_to_idle = true; + /* Check if there is a GMU phandle and set it up */ node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0); |