diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 20:42:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 20:42:10 -0800 |
commit | e60e1ee60630cafef5e430c2ae364877e061d980 (patch) | |
tree | 816aeef8fe8d4a2c6a1ebbc7a350839bac8dd4c2 /drivers/gpu/drm/msm | |
parent | 5d352e69c60e54b5f04d6e337a1d2bf0dbf3d94a (diff) | |
parent | f150891fd9878ef0d9197c4e8451ce67c3bdd014 (diff) | |
download | linux-e60e1ee60630cafef5e430c2ae364877e061d980.tar.gz linux-e60e1ee60630cafef5e430c2ae364877e061d980.tar.bz2 linux-e60e1ee60630cafef5e430c2ae364877e061d980.zip |
Merge tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main drm pull request for v4.15.
Core:
- Atomic object lifetime fixes
- Atomic iterator improvements
- Sparse/smatch fixes
- Legacy kms ioctls to be interruptible
- EDID override improvements
- fb/gem helper cleanups
- Simple outreachy patches
- Documentation improvements
- Fix dma-buf rcu races
- DRM mode object leasing for improving VR use cases.
- vgaarb improvements for non-x86 platforms.
New driver:
- tve200: Faraday Technology TVE200 block.
This "TV Encoder" encodes a ITU-T BT.656 stream and can be found in
the StorLink SL3516 (later Cortina Systems CS3516) as well as the
Grain Media GM8180.
New bridges:
- SiI9234 support
New panels:
- S6E63J0X03, OTM8009A, Seiko 43WVF1G, 7" rpi touch panel, Toshiba
LT089AC19000, Innolux AT043TN24
i915:
- Remove Coffeelake from alpha support
- Cannonlake workarounds
- Infoframe refactoring for DisplayPort
- VBT updates
- DisplayPort vswing/emph/buffer translation refactoring
- CCS fixes
- Restore GPU clock boost on missed vblanks
- Scatter list updates for userptr allocations
- Gen9+ transition watermarks
- Display IPC (Isochronous Priority Control)
- Private PAT management
- GVT: improved error handling and pci config sanitizing
- Execlist refactoring
- Transparent Huge Page support
- User defined priorities support
- HuC/GuC firmware refactoring
- DP MST fixes
- eDP power sequencing fixes
- Use RCU instead of stop_machine
- PSR state tracking support
- Eviction fixes
- BDW DP aux channel timeout fixes
- LSPCON fixes
- Cannonlake PLL fixes
amdgpu:
- Per VM BO support
- Powerplay cleanups
- CI powerplay support
- PASID mgr for kfd
- SR-IOV fixes
- initial GPU reset for vega10
- Prime mmap support
- TTM updates
- Clock query interface for Raven
- Fence to handle ioctl
- UVD encode ring support on Polaris
- Transparent huge page DMA support
- Compute LRU pipe tweaks
- BO flag to allow buffers to opt out of implicit sync
- CTX priority setting API
- VRAM lost infrastructure plumbing
qxl:
- fix flicker since atomic rework
amdkfd:
- Further improvements from internal AMD tree
- Usermode events
- Drop radeon support
nouveau:
- Pascal temperature sensor support
- Improved BAR2 handling
- MMU rework to support Pascal MMU
exynos:
- Improved HDMI/mixer support
- HDMI audio interface support
tegra:
- Prep work for tegra186
- Cleanup/fixes
msm:
- Preemption support for a5xx
- Display fixes for 8x96 (snapdragon 820)
- Async cursor plane fixes
- FW loading rework
- GPU debugging improvements
vc4:
- Prep for DSI panels
- fix T-format tiling scanout
- New madvise ioctl
Rockchip:
- LVDS support
omapdrm:
- omap4 HDMI CEC support
etnaviv:
- GPU performance counters groundwork
sun4i:
- refactor driver load + TCON backend
- HDMI improvements
- A31 support
- Misc fixes
udl:
- Probe/EDID read fixes.
tilcdc:
- Misc fixes.
pl111:
- Support more variants
adv7511:
- Improve EDID handling.
- HDMI CEC support
sii8620:
- Add remote control support"
* tag 'drm-for-v4.15' of git://people.freedesktop.org/~airlied/linux: (1480 commits)
drm/rockchip: analogix_dp: Use mutex rather than spinlock
drm/mode_object: fix documentation for object lookups.
drm/i915: Reorder context-close to avoid calling i915_vma_close() under RCU
drm/i915: Move init_clock_gating() back to where it was
drm/i915: Prune the reservation shared fence array
drm/i915: Idle the GPU before shinking everything
drm/i915: Lock llist_del_first() vs llist_del_all()
drm/i915: Calculate ironlake intermediate watermarks correctly, v2.
drm/i915: Disable lazy PPGTT page table optimization for vGPU
drm/i915/execlists: Remove the priority "optimisation"
drm/i915: Filter out spurious execlists context-switch interrupts
drm/amdgpu: use irq-safe lock for kiq->ring_lock
drm/amdgpu: bypass lru touch for KIQ ring submission
drm/amdgpu: Potential uninitialized variable in amdgpu_vm_update_directories()
drm/amdgpu: potential uninitialized variable in amdgpu_vce_ring_parse_cs()
drm/amd/powerplay: initialize a variable before using it
drm/amd/powerplay: suppress KASAN out of bounds warning in vega10_populate_all_memory_levels
drm/amd/amdgpu: fix evicted VRAM bo adjudgement condition
drm/vblank: Tune drm_crtc_accurate_vblank_count() WARN down to a debug
drm/rockchip: add CONFIG_OF dependency for lvds
...
Diffstat (limited to 'drivers/gpu/drm/msm')
42 files changed, 1926 insertions, 685 deletions
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index ced70783b44e..92b3844202d2 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -9,6 +9,7 @@ msm-y := \ adreno/a4xx_gpu.o \ adreno/a5xx_gpu.o \ adreno/a5xx_power.o \ + adreno/a5xx_preempt.o \ hdmi/hdmi.o \ hdmi/hdmi_audio.o \ hdmi/hdmi_bridge.o \ @@ -58,7 +59,8 @@ msm-y := \ msm_iommu.o \ msm_perf.o \ msm_rd.o \ - msm_ringbuffer.o + msm_ringbuffer.o \ + msm_submitqueue.o msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 7791313405b5..4baef2738178 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -44,7 +44,7 @@ static bool a3xx_idle(struct msm_gpu *gpu); static bool a3xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a3xx_idle(gpu); } @@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) static bool a3xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -444,9 +444,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = msm_gpu_pm_suspend, .pm_resume = msm_gpu_pm_resume, .recover = a3xx_recover, - .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -492,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a3xx_registers; adreno_gpu->reg_offsets = a3xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 58341ef6f15b..8199a4b9f2fa 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) static bool a4xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a4xx_idle(gpu); } @@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) static bool a4xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -532,9 +532,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = a4xx_pm_suspend, .pm_resume = a4xx_pm_resume, .recover = a4xx_recover, - .last_fence = adreno_last_fence, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -574,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a4xx_registers; adreno_gpu->reg_offsets = a4xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 17c59d839e6f..a1f4eeeb73e2 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -26,8 +26,9 @@ static void a5xx_dump(struct msm_gpu *gpu); #define GPU_PAS_ID 13 -static int zap_shader_load_mdt(struct device *dev, const char *fwname) +static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) { + struct device *dev = &gpu->pdev->dev; const struct firmware *fw; struct device_node *np; struct resource r; @@ -55,10 +56,10 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) mem_size = resource_size(&r); /* Request the MDT file for the firmware */ - ret = request_firmware(&fw, fwname, dev); - if (ret) { + fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); + if (IS_ERR(fw)) { DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname); - return ret; + return PTR_ERR(fw); } /* Figure out how much memory we need */ @@ -75,9 +76,26 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) goto out; } - /* Load the rest of the MDT */ - ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, mem_region, mem_phys, - mem_size); + /* + * Load the rest of the MDT + * + * Note that we could be dealing with two different paths, since + * with upstream linux-firmware it would be in a qcom/ subdir.. + * adreno_request_fw() handles this, but qcom_mdt_load() does + * not. But since we've already gotten thru adreno_request_fw() + * we know which of the two cases it is: + */ + if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { + ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, + mem_region, mem_phys, mem_size); + } else { + char newname[strlen("qcom/") + strlen(fwname) + 1]; + + sprintf(newname, "qcom/%s", fwname); + + ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, + mem_region, mem_phys, mem_size); + } if (ret) goto out; @@ -95,14 +113,65 @@ out: return ret; } +static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + uint32_t wptr; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + + /* Make sure to wrap wptr if we need to */ + wptr = get_wptr(ring); + + spin_unlock_irqrestore(&ring->lock, flags); + + /* Make sure everything is posted before making a decision */ + mb(); + + /* Update HW if this is the current ring and we are not in preempt */ + if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu)) + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); +} + static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x02); + + /* Turn off protected mode to write to special registers */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Set the save preemption record for the ring/command */ + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id])); + + /* Turn back on protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); + + /* Enable local preemption for finegrain preemption */ + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x02); + + /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x02); + + /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { switch (submit->cmd[i].type) { case MSM_SUBMIT_CMD_IB_TARGET_BUF: @@ -120,16 +189,54 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } } + /* + * Write the render mode to NULL (0) to indicate to the CP that the IBs + * are done rendering - otherwise a lucky preemption would start + * replaying from the last checkpoint + */ + OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + + /* Turn off IB level preemptions */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x01); + + /* Write the fence to the scratch register */ OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); + /* + * Execute a CACHE_FLUSH_TS event. This will ensure that the + * timestamp is written to the memory and then triggers the interrupt + */ OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); - OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence))); - OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence))); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, submit->seqno); - gpu->funcs->flush(gpu); + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + /* + * If dword[2:1] are non zero, they specify an address for the CP to + * write the value of dword[3] to on preemption complete. Write 0 to + * skip the write + */ + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + /* Data value - not used if the address above is 0 */ + OUT_RING(ring, 0x01); + /* Set bit 0 to trigger an interrupt on preempt complete */ + OUT_RING(ring, 0x01); + + a5xx_flush(gpu, ring); + + /* Check to see if we need to start preemption */ + a5xx_preempt_trigger(gpu); } static const struct { @@ -245,7 +352,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) static int a5xx_me_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT7(ring, CP_ME_INIT, 8); @@ -276,11 +383,54 @@ static int a5xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; +} + +static int a5xx_preempt_start(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + struct msm_ringbuffer *ring = gpu->rb[0]; + + if (gpu->nr_rings == 1) + return 0; + + /* Turn off protected mode to write to special registers */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Set the save preemption record for the ring/command */ + OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2); + OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id])); + OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id])); + + /* Turn back on protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); + + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x00); + + OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); + OUT_RING(ring, 0x01); - return a5xx_idle(gpu) ? 0 : -EINVAL; + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); + OUT_RING(ring, 0x01); + + /* Yield the floor on command completion */ + OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x00); + OUT_RING(ring, 0x01); + OUT_RING(ring, 0x01); + + gpu->funcs->flush(gpu, ring); + + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } + static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, const struct firmware *fw, u64 *iova) { @@ -381,7 +531,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) return -ENODEV; } - ret = zap_shader_load_mdt(&pdev->dev, adreno_gpu->info->zapfw); + ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw); loaded = !ret; @@ -396,6 +546,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \ A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \ + A5XX_RBBM_INT_0_MASK_CP_SW | \ A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) @@ -536,13 +687,14 @@ static int a5xx_hw_init(struct msm_gpu *gpu) REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000); gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); - /* Load the GPMU firmware before starting the HW init */ - a5xx_gpmu_ucode_init(gpu); - ret = adreno_hw_init(gpu); if (ret) return ret; + a5xx_preempt_hw_init(gpu); + + a5xx_gpmu_ucode_init(gpu); + ret = a5xx_ucode_init(gpu); if (ret) return ret; @@ -565,11 +717,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * ticking correctly */ if (adreno_is_a530(adreno_gpu)) { - OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1); - OUT_RING(gpu->rb, 0x0F); + OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); + OUT_RING(gpu->rb[0], 0x0F); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } @@ -582,11 +734,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) */ ret = a5xx_zap_shader_init(gpu); if (!ret) { - OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1); - OUT_RING(gpu->rb, 0x00000000); + OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); + OUT_RING(gpu->rb[0], 0x00000000); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else { /* Print a warning so if we die, we know why */ @@ -595,6 +747,9 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); } + /* Last step - yield the ringbuffer */ + a5xx_preempt_start(gpu); + return 0; } @@ -625,6 +780,8 @@ static void a5xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); + a5xx_preempt_fini(gpu); + if (a5xx_gpu->pm4_bo) { if (a5xx_gpu->pm4_iova) msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); @@ -660,18 +817,27 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); } -bool a5xx_idle(struct msm_gpu *gpu) +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + if (ring != a5xx_gpu->cur_ring) { + WARN(1, "Tried to idle a non-current ringbuffer\n"); + return false; + } + /* wait for CP to drain ringbuffer: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, ring)) return false; if (spin_until(_a5xx_check_idle(gpu))) { - DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n", + DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", gpu->name, __builtin_return_address(0), gpu_read(gpu, REG_A5XX_RBBM_STATUS), - gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS)); - + gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), + gpu_read(gpu, REG_A5XX_CP_RB_RPTR), + gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); return false; } @@ -802,9 +968,10 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); - dev_err(dev->dev, "gpu fault fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", - gpu->funcs->last_fence(gpu), + dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + ring ? ring->id : -1, ring ? ring->seqno : 0, gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR), @@ -854,8 +1021,13 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu) if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP) a5xx_gpmu_err_irq(gpu); - if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) + if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { + a5xx_preempt_trigger(gpu); msm_gpu_retire(gpu); + } + + if (status & A5XX_RBBM_INT_0_MASK_CP_SW) + a5xx_preempt_irq(gpu); return IRQ_HANDLED; } @@ -985,6 +1157,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m) } #endif +static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + + return a5xx_gpu->cur_ring; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -992,9 +1172,9 @@ static const struct adreno_gpu_funcs funcs = { .pm_suspend = a5xx_pm_suspend, .pm_resume = a5xx_pm_resume, .recover = a5xx_recover, - .last_fence = adreno_last_fence, .submit = a5xx_submit, - .flush = adreno_flush, + .flush = a5xx_flush, + .active_ring = a5xx_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -1030,7 +1210,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_gpu->lm_leakage = 0x4E001A; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); @@ -1039,5 +1219,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) if (gpu->aspace) msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler); + /* Set up the preemption specific bits and pieces for each ringbuffer */ + a5xx_preempt_init(gpu); + return gpu; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index e94451685bf8..6fb8c2f9b9e4 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -35,10 +35,100 @@ struct a5xx_gpu { uint32_t gpmu_dwords; uint32_t lm_leakage; + + struct msm_ringbuffer *cur_ring; + struct msm_ringbuffer *next_ring; + + struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS]; + struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS]; + uint64_t preempt_iova[MSM_GPU_MAX_RINGS]; + + atomic_t preempt_state; + struct timer_list preempt_timer; }; #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base) +/* + * In order to do lockless preemption we use a simple state machine to progress + * through the process. + * + * PREEMPT_NONE - no preemption in progress. Next state START. + * PREEMPT_START - The trigger is evaulating if preemption is possible. Next + * states: TRIGGERED, NONE + * PREEMPT_ABORT - An intermediate state before moving back to NONE. Next + * state: NONE. + * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next + * states: FAULTED, PENDING + * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger + * recovery. Next state: N/A + * PREEMPT_PENDING: Preemption complete interrupt fired - the callback is + * checking the success of the operation. Next state: FAULTED, NONE. + */ + +enum preempt_state { + PREEMPT_NONE = 0, + PREEMPT_START, + PREEMPT_ABORT, + PREEMPT_TRIGGERED, + PREEMPT_FAULTED, + PREEMPT_PENDING, +}; + +/* + * struct a5xx_preempt_record is a shared buffer between the microcode and the + * CPU to store the state for preemption. The record itself is much larger + * (64k) but most of that is used by the CP for storage. + * + * There is a preemption record assigned per ringbuffer. When the CPU triggers a + * preemption, it fills out the record with the useful information (wptr, ring + * base, etc) and the microcode uses that information to set up the CP following + * the preemption. When a ring is switched out, the CP will save the ringbuffer + * state back to the record. In this way, once the records are properly set up + * the CPU can quickly switch back and forth between ringbuffers by only + * updating a few registers (often only the wptr). + * + * These are the CPU aware registers in the record: + * @magic: Must always be 0x27C4BAFC + * @info: Type of the record - written 0 by the CPU, updated by the CP + * @data: Data field from SET_RENDER_MODE or a checkpoint. Written and used by + * the CP + * @cntl: Value of RB_CNTL written by CPU, save/restored by CP + * @rptr: Value of RB_RPTR written by CPU, save/restored by CP + * @wptr: Value of RB_WPTR written by CPU, save/restored by CP + * @rptr_addr: Value of RB_RPTR_ADDR written by CPU, save/restored by CP + * @rbase: Value of RB_BASE written by CPU, save/restored by CP + * @counter: GPU address of the storage area for the performance counters + */ +struct a5xx_preempt_record { + uint32_t magic; + uint32_t info; + uint32_t data; + uint32_t cntl; + uint32_t rptr; + uint32_t wptr; + uint64_t rptr_addr; + uint64_t rbase; + uint64_t counter; +}; + +/* Magic identifier for the preemption record */ +#define A5XX_PREEMPT_RECORD_MAGIC 0x27C4BAFCUL + +/* + * Even though the structure above is only a few bytes, we need a full 64k to + * store the entire preemption record from the CP + */ +#define A5XX_PREEMPT_RECORD_SIZE (64 * 1024) + +/* + * The preemption counter block is a storage area for the value of the + * preemption counters that are saved immediately before context switch. We + * append it on to the end of the allocation for the preemption record. + */ +#define A5XX_PREEMPT_COUNTER_SIZE (16 * 4) + + int a5xx_power_init(struct msm_gpu *gpu); void a5xx_gpmu_ucode_init(struct msm_gpu *gpu); @@ -55,7 +145,22 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } -bool a5xx_idle(struct msm_gpu *gpu); +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); +void a5xx_preempt_init(struct msm_gpu *gpu); +void a5xx_preempt_hw_init(struct msm_gpu *gpu); +void a5xx_preempt_trigger(struct msm_gpu *gpu); +void a5xx_preempt_irq(struct msm_gpu *gpu); +void a5xx_preempt_fini(struct msm_gpu *gpu); + +/* Return true if we are in a preempt state */ +static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu) +{ + int preempt_state = atomic_read(&a5xx_gpu->preempt_state); + + return !(preempt_state == PREEMPT_NONE || + preempt_state == PREEMPT_ABORT); +} + #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index 04aab1dcae2b..e5700bbf09dd 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; if (!a5xx_gpu->gpmu_dwords) return 0; @@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); - if (!a5xx_idle(gpu)) { + if (!a5xx_idle(gpu, ring)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", gpu->name); return -EINVAL; @@ -264,7 +264,8 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) return; /* Get the firmware */ - if (request_firmware(&fw, adreno_gpu->info->gpmufw, drm->dev)) { + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->gpmufw); + if (IS_ERR(fw)) { DRM_ERROR("%s: Could not get GPMU firmware. GPMU will not be active\n", gpu->name); return; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c new file mode 100644 index 000000000000..40f4840ef98e --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -0,0 +1,305 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "msm_gem.h" +#include "a5xx_gpu.h" + +/* + * Try to transition the preemption state from old to new. Return + * true on success or false if the original state wasn't 'old' + */ +static inline bool try_preempt_state(struct a5xx_gpu *a5xx_gpu, + enum preempt_state old, enum preempt_state new) +{ + enum preempt_state cur = atomic_cmpxchg(&a5xx_gpu->preempt_state, + old, new); + + return (cur == old); +} + +/* + * Force the preemption state to the specified state. This is used in cases + * where the current state is known and won't change + */ +static inline void set_preempt_state(struct a5xx_gpu *gpu, + enum preempt_state new) +{ + /* + * preempt_state may be read by other cores trying to trigger a + * preemption or in the interrupt handler so barriers are needed + * before... + */ + smp_mb__before_atomic(); + atomic_set(&gpu->preempt_state, new); + /* ... and after*/ + smp_mb__after_atomic(); +} + +/* Write the most recent wptr for the given ring into the hardware */ +static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) +{ + unsigned long flags; + uint32_t wptr; + + if (!ring) + return; + + spin_lock_irqsave(&ring->lock, flags); + wptr = get_wptr(ring); + spin_unlock_irqrestore(&ring->lock, flags); + + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); +} + +/* Return the highest priority ringbuffer with something in it */ +static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) +{ + unsigned long flags; + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + bool empty; + struct msm_ringbuffer *ring = gpu->rb[i]; + + spin_lock_irqsave(&ring->lock, flags); + empty = (get_wptr(ring) == ring->memptrs->rptr); + spin_unlock_irqrestore(&ring->lock, flags); + + if (!empty) + return ring; + } + + return NULL; +} + +static void a5xx_preempt_timer(unsigned long data) +{ + struct a5xx_gpu *a5xx_gpu = (struct a5xx_gpu *) data; + struct msm_gpu *gpu = &a5xx_gpu->base.base; + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + + if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED)) + return; + + dev_err(dev->dev, "%s: preemption timed out\n", gpu->name); + queue_work(priv->wq, &gpu->recover_work); +} + +/* Try to trigger a preemption switch */ +void a5xx_preempt_trigger(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + unsigned long flags; + struct msm_ringbuffer *ring; + + if (gpu->nr_rings == 1) + return; + + /* + * Try to start preemption by moving from NONE to START. If + * unsuccessful, a preemption is already in flight + */ + if (!try_preempt_state(a5xx_gpu, PREEMPT_NONE, PREEMPT_START)) + return; + + /* Get the next ring to preempt to */ + ring = get_next_ring(gpu); + + /* + * If no ring is populated or the highest priority ring is the current + * one do nothing except to update the wptr to the latest and greatest + */ + if (!ring || (a5xx_gpu->cur_ring == ring)) { + /* + * Its possible that while a preemption request is in progress + * from an irq context, a user context trying to submit might + * fail to update the write pointer, because it determines + * that the preempt state is not PREEMPT_NONE. + * + * Close the race by introducing an intermediate + * state PREEMPT_ABORT to let the submit path + * know that the ringbuffer is not going to change + * and can safely update the write pointer. + */ + + set_preempt_state(a5xx_gpu, PREEMPT_ABORT); + update_wptr(gpu, a5xx_gpu->cur_ring); + set_preempt_state(a5xx_gpu, PREEMPT_NONE); + return; + } + + /* Make sure the wptr doesn't update while we're in motion */ + spin_lock_irqsave(&ring->lock, flags); + a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring); + spin_unlock_irqrestore(&ring->lock, flags); + + /* Set the address of the incoming preemption record */ + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, + REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, + a5xx_gpu->preempt_iova[ring->id]); + + a5xx_gpu->next_ring = ring; + + /* Start a timer to catch a stuck preemption */ + mod_timer(&a5xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000)); + + /* Set the preemption state to triggered */ + set_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED); + + /* Make sure everything is written before hitting the button */ + wmb(); + + /* And actually start the preemption */ + gpu_write(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL, 1); +} + +void a5xx_preempt_irq(struct msm_gpu *gpu) +{ + uint32_t status; + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + + if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING)) + return; + + /* Delete the preemption watchdog timer */ + del_timer(&a5xx_gpu->preempt_timer); + + /* + * The hardware should be setting CP_CONTEXT_SWITCH_CNTL to zero before + * firing the interrupt, but there is a non zero chance of a hardware + * condition or a software race that could set it again before we have a + * chance to finish. If that happens, log and go for recovery + */ + status = gpu_read(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL); + if (unlikely(status)) { + set_preempt_state(a5xx_gpu, PREEMPT_FAULTED); + dev_err(dev->dev, "%s: Preemption failed to complete\n", + gpu->name); + queue_work(priv->wq, &gpu->recover_work); + return; + } + + a5xx_gpu->cur_ring = a5xx_gpu->next_ring; + a5xx_gpu->next_ring = NULL; + + update_wptr(gpu, a5xx_gpu->cur_ring); + + set_preempt_state(a5xx_gpu, PREEMPT_NONE); +} + +void a5xx_preempt_hw_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + a5xx_gpu->preempt[i]->wptr = 0; + a5xx_gpu->preempt[i]->rptr = 0; + a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova; + } + + /* Write a 0 to signal that we aren't switching pagetables */ + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, + REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0); + + /* Reset the preemption state */ + set_preempt_state(a5xx_gpu, PREEMPT_NONE); + + /* Always come up on rb 0 */ + a5xx_gpu->cur_ring = gpu->rb[0]; +} + +static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, + struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = &a5xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + struct a5xx_preempt_record *ptr; + struct drm_gem_object *bo = NULL; + u64 iova = 0; + + ptr = msm_gem_kernel_new(gpu->dev, + A5XX_PREEMPT_RECORD_SIZE + A5XX_PREEMPT_COUNTER_SIZE, + MSM_BO_UNCACHED, gpu->aspace, &bo, &iova); + + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + a5xx_gpu->preempt_bo[ring->id] = bo; + a5xx_gpu->preempt_iova[ring->id] = iova; + a5xx_gpu->preempt[ring->id] = ptr; + + /* Set up the defaults on the preemption record */ + + ptr->magic = A5XX_PREEMPT_RECORD_MAGIC; + ptr->info = 0; + ptr->data = 0; + ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT; + ptr->rptr_addr = rbmemptr(ring, rptr); + ptr->counter = iova + A5XX_PREEMPT_RECORD_SIZE; + + return 0; +} + +void a5xx_preempt_fini(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + if (!a5xx_gpu->preempt_bo[i]) + continue; + + msm_gem_put_vaddr(a5xx_gpu->preempt_bo[i]); + + if (a5xx_gpu->preempt_iova[i]) + msm_gem_put_iova(a5xx_gpu->preempt_bo[i], gpu->aspace); + + drm_gem_object_unreference(a5xx_gpu->preempt_bo[i]); + a5xx_gpu->preempt_bo[i] = NULL; + } +} + +void a5xx_preempt_init(struct msm_gpu *gpu) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); + int i; + + /* No preemption if we only have one ring */ + if (gpu->nr_rings <= 1) + return; + + for (i = 0; i < gpu->nr_rings; i++) { + if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) { + /* + * On any failure our adventure is over. Clean up and + * set nr_rings to 1 to force preemption off + */ + a5xx_preempt_fini(gpu); + gpu->nr_rings = 1; + + return; + } + } + + setup_timer(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, + (unsigned long) a5xx_gpu); +} diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index c75c4df4bc39..05022ea2a007 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -125,51 +125,24 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) { struct msm_drm_private *priv = dev->dev_private; struct platform_device *pdev = priv->gpu_pdev; - struct adreno_platform_config *config; - struct adreno_rev rev; - const struct adreno_info *info; - struct msm_gpu *gpu = NULL; + struct msm_gpu *gpu = platform_get_drvdata(priv->gpu_pdev); + int ret; - if (!pdev) { + if (!gpu) { dev_err(dev->dev, "no adreno device\n"); return NULL; } - config = pdev->dev.platform_data; - rev = config->rev; - info = adreno_info(config->rev); - - if (!info) { - dev_warn(dev->dev, "Unknown GPU revision: %u.%u.%u.%u\n", - rev.core, rev.major, rev.minor, rev.patchid); + pm_runtime_get_sync(&pdev->dev); + mutex_lock(&dev->struct_mutex); + ret = msm_gpu_hw_init(gpu); + mutex_unlock(&dev->struct_mutex); + pm_runtime_put_sync(&pdev->dev); + if (ret) { + dev_err(dev->dev, "gpu hw init failed: %d\n", ret); return NULL; } - DBG("Found GPU: %u.%u.%u.%u", rev.core, rev.major, - rev.minor, rev.patchid); - - gpu = info->init(dev); - if (IS_ERR(gpu)) { - dev_warn(dev->dev, "failed to load adreno gpu\n"); - gpu = NULL; - /* not fatal */ - } - - if (gpu) { - int ret; - - pm_runtime_get_sync(&pdev->dev); - mutex_lock(&dev->struct_mutex); - ret = msm_gpu_hw_init(gpu); - mutex_unlock(&dev->struct_mutex); - pm_runtime_put_sync(&pdev->dev); - if (ret) { - dev_err(dev->dev, "gpu hw init failed: %d\n", ret); - gpu->funcs->destroy(gpu); - gpu = NULL; - } - } - return gpu; } @@ -282,6 +255,9 @@ static int adreno_get_pwrlevels(struct device *dev, static int adreno_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; + const struct adreno_info *info; + struct drm_device *drm = dev_get_drvdata(master); + struct msm_gpu *gpu; u32 val; int ret; @@ -302,13 +278,39 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) return ret; dev->platform_data = &config; - set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev)); + set_gpu_pdev(drm, to_platform_device(dev)); + + info = adreno_info(config.rev); + + if (!info) { + dev_warn(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n", + config.rev.core, config.rev.major, + config.rev.minor, config.rev.patchid); + return -ENXIO; + } + + DBG("Found GPU: %u.%u.%u.%u", config.rev.core, config.rev.major, + config.rev.minor, config.rev.patchid); + + gpu = info->init(drm); + if (IS_ERR(gpu)) { + dev_warn(drm->dev, "failed to load adreno gpu\n"); + return PTR_ERR(gpu); + } + + dev_set_drvdata(dev, gpu); + return 0; } static void adreno_unbind(struct device *dev, struct device *master, void *data) { + struct msm_gpu *gpu = dev_get_drvdata(dev); + + gpu->funcs->pm_suspend(gpu); + gpu->funcs->destroy(gpu); + set_gpu_pdev(dev_get_drvdata(master), NULL); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index c8b4ac254bb5..e2ffecce59a3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -21,8 +21,6 @@ #include "msm_gem.h" #include "msm_mmu.h" -#define RB_SIZE SZ_32K -#define RB_BLKSIZE 32 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) { @@ -58,72 +56,181 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) return ret; } return -EINVAL; + case MSM_PARAM_NR_RINGS: + *value = gpu->nr_rings; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; } } +const struct firmware * +adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) +{ + struct drm_device *drm = adreno_gpu->base.dev; + const struct firmware *fw = NULL; + char newname[strlen("qcom/") + strlen(fwname) + 1]; + int ret; + + sprintf(newname, "qcom/%s", fwname); + + /* + * Try first to load from qcom/$fwfile using a direct load (to avoid + * a potential timeout waiting for usermode helper) + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_NEW)) { + + ret = request_firmware_direct(&fw, newname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s from new location\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_NEW; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + newname, ret); + return ERR_PTR(ret); + } + } + + /* + * Then try the legacy location without qcom/ prefix + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) { + + ret = request_firmware_direct(&fw, fwname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s from legacy location\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_LEGACY; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + fwname, ret); + return ERR_PTR(ret); + } + } + + /* + * Finally fall back to request_firmware() for cases where the + * usermode helper is needed (I think mainly android) + */ + if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || + (adreno_gpu->fwloc == FW_LOCATION_HELPER)) { + + ret = request_firmware(&fw, newname, drm->dev); + if (!ret) { + dev_info(drm->dev, "loaded %s with helper\n", + newname); + adreno_gpu->fwloc = FW_LOCATION_HELPER; + return fw; + } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { + dev_err(drm->dev, "failed to load %s: %d\n", + newname, ret); + return ERR_PTR(ret); + } + } + + dev_err(drm->dev, "failed to load %s\n", fwname); + return ERR_PTR(-ENOENT); +} + +static int adreno_load_fw(struct adreno_gpu *adreno_gpu) +{ + const struct firmware *fw; + + if (adreno_gpu->pm4) + return 0; + + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pm4fw); + if (IS_ERR(fw)) + return PTR_ERR(fw); + adreno_gpu->pm4 = fw; + + fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->pfpfw); + if (IS_ERR(fw)) { + release_firmware(adreno_gpu->pm4); + adreno_gpu->pm4 = NULL; + return PTR_ERR(fw); + } + adreno_gpu->pfp = fw; + + return 0; +} + int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - int ret; + int ret, i; DBG("%s", gpu->name); - ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); - if (ret) { - gpu->rb_iova = 0; - dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); + ret = adreno_load_fw(adreno_gpu); + if (ret) return ret; - } - /* reset ringbuffer: */ - gpu->rb->cur = gpu->rb->start; + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (!ring) + continue; + + ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova); + if (ret) { + ring->iova = 0; + dev_err(gpu->dev->dev, + "could not map ringbuffer %d: %d\n", i, ret); + return ret; + } + + ring->cur = ring->start; + ring->next = ring->start; - /* reset completed fence seqno: */ - adreno_gpu->memptrs->fence = gpu->fctx->completed_fence; - adreno_gpu->memptrs->rptr = 0; + /* reset completed fence seqno: */ + ring->memptrs->fence = ring->seqno; + ring->memptrs->rptr = 0; + } - /* Setup REG_CP_RB_CNTL: */ + /* + * Setup REG_CP_RB_CNTL. The same value is used across targets (with + * the excpetion of A430 that disables the RPTR shadow) - the cacluation + * for the ringbuffer size and block size is moved to msm_gpu.h for the + * pre-processor to deal with and the A430 variant is ORed in here + */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - /* size is log2(quad-words): */ - AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); + MSM_GPU_RB_CNTL_DEFAULT | + (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); - /* Setup ringbuffer address: */ + /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); + REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); if (!adreno_is_a430(adreno_gpu)) { adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, REG_ADRENO_CP_RB_RPTR_ADDR_HI, - rbmemptr(adreno_gpu, rptr)); + rbmemptr(gpu->rb[0], rptr)); } return 0; } -static uint32_t get_wptr(struct msm_ringbuffer *ring) -{ - return ring->cur - ring->start; -} - /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ -static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, + struct msm_ringbuffer *ring) { if (adreno_is_a430(adreno_gpu)) - return adreno_gpu->memptrs->rptr = adreno_gpu_read( + return ring->memptrs->rptr = adreno_gpu_read( adreno_gpu, REG_ADRENO_CP_RB_RPTR); else - return adreno_gpu->memptrs->rptr; + return ring->memptrs->rptr; } -uint32_t adreno_last_fence(struct msm_gpu *gpu) +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - return adreno_gpu->memptrs->fence; + return gpu->rb[0]; } void adreno_recover(struct msm_gpu *gpu) @@ -149,7 +256,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned i; for (i = 0; i < submit->nr_cmds; i++) { @@ -164,7 +271,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, case MSM_SUBMIT_CMD_BUF: OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); - OUT_RING(ring, submit->cmd[i].iova); + OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, submit->cmd[i].size); OUT_PKT2(ring); break; @@ -172,7 +279,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { /* Flush HLSQ lazy updates to make sure there is nothing @@ -188,8 +295,8 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(adreno_gpu, fence)); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, rbmemptr(ring, fence)); + OUT_RING(ring, submit->seqno); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ OUT_PKT3(ring, CP_INTERRUPT, 1); @@ -215,20 +322,23 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } #endif - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } -void adreno_flush(struct msm_gpu *gpu) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; + /* Copy the shadow to the actual register */ + ring->cur = ring->next; + /* * Mask wptr value that we calculate to fit in the HW range. This is * to account for the possibility that the last command fit exactly into * the ringbuffer and rb->next hasn't wrapped to zero yet */ - wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); + wptr = get_wptr(ring); /* ensure writes to ringbuffer have hit system memory: */ mb(); @@ -236,17 +346,19 @@ void adreno_flush(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); } -bool adreno_idle(struct msm_gpu *gpu) +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr = get_wptr(ring); /* wait for CP to drain ringbuffer: */ - if (!spin_until(get_rptr(adreno_gpu) == wptr)) + if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) return true; /* TODO maybe we need to reset GPU here to recover from hang? */ - DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); + DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n", + gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr); + return false; } @@ -261,10 +373,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, - gpu->fctx->last_fence); - seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); - seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + seq_printf(m, "rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, ring->seqno); + + seq_printf(m, " rptr: %d\n", + get_rptr(adreno_gpu, ring)); + seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); + } /* dump these out in a form that can be parsed by demsm: */ seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); @@ -290,16 +408,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) void adreno_dump_info(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int i; printk("revision: %d (%d.%d.%d.%d)\n", adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, - gpu->fctx->last_fence); - printk("rptr: %d\n", get_rptr(adreno_gpu)); - printk("rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + printk("rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, + ring->seqno); + + printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); + printk("rb wptr: %d\n", get_wptr(ring)); + } } /* would be nice to not have to duplicate the _show() stuff with printk(): */ @@ -322,28 +447,31 @@ void adreno_dump(struct msm_gpu *gpu) } } -static uint32_t ring_freewords(struct msm_gpu *gpu) +static uint32_t ring_freewords(struct msm_ringbuffer *ring) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t size = gpu->rb->size / 4; - uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = get_rptr(adreno_gpu); + struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); + uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; + /* Use ring->next to calculate free size */ + uint32_t wptr = ring->next - ring->start; + uint32_t rptr = get_rptr(adreno_gpu, ring); return (rptr + (size - 1) - wptr) % size; } -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) { - if (spin_until(ring_freewords(gpu) >= ndwords)) - DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); + if (spin_until(ring_freewords(ring) >= ndwords)) + DRM_DEV_ERROR(ring->gpu->dev->dev, + "timeout waiting for space in ringubffer %d\n", + ring->id); } int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) + struct adreno_gpu *adreno_gpu, + const struct adreno_gpu_funcs *funcs, int nr_rings) { struct adreno_platform_config *config = pdev->dev.platform_data; struct msm_gpu_config adreno_gpu_config = { 0 }; struct msm_gpu *gpu = &adreno_gpu->base; - int ret; adreno_gpu->funcs = funcs; adreno_gpu->info = adreno_info(config->rev); @@ -366,59 +494,20 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu_config.va_start = SZ_16M; adreno_gpu_config.va_end = 0xffffffff; - adreno_gpu_config.ringsz = RB_SIZE; + adreno_gpu_config.nr_rings = nr_rings; pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_enable(&pdev->dev); - ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, + return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, adreno_gpu->info->name, &adreno_gpu_config); - if (ret) - return ret; - - ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev); - if (ret) { - dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n", - adreno_gpu->info->pm4fw, ret); - return ret; - } - - ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev); - if (ret) { - dev_err(drm->dev, "failed to load %s PFP firmware: %d\n", - adreno_gpu->info->pfpfw, ret); - return ret; - } - - adreno_gpu->memptrs = msm_gem_kernel_new(drm, - sizeof(*adreno_gpu->memptrs), MSM_BO_UNCACHED, gpu->aspace, - &adreno_gpu->memptrs_bo, &adreno_gpu->memptrs_iova); - - if (IS_ERR(adreno_gpu->memptrs)) { - ret = PTR_ERR(adreno_gpu->memptrs); - adreno_gpu->memptrs = NULL; - dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); - } - - return ret; } void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) { - struct msm_gpu *gpu = &adreno_gpu->base; - - if (adreno_gpu->memptrs_bo) { - if (adreno_gpu->memptrs) - msm_gem_put_vaddr(adreno_gpu->memptrs_bo); - - if (adreno_gpu->memptrs_iova) - msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->aspace); - - drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo); - } release_firmware(adreno_gpu->pm4); release_firmware(adreno_gpu->pfp); - msm_gpu_cleanup(gpu); + msm_gpu_cleanup(&adreno_gpu->base); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 4d9165f29f43..28e3de6e5f94 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -2,7 +2,7 @@ * Copyright (C) 2013 Red Hat * Author: Rob Clark <robdclark@gmail.com> * - * Copyright (c) 2014 The Linux Foundation. All rights reserved. + * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published by @@ -82,14 +82,6 @@ struct adreno_info { const struct adreno_info *adreno_info(struct adreno_rev rev); -#define rbmemptr(adreno_gpu, member) \ - ((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member)) - -struct adreno_rbmemptrs { - volatile uint32_t rptr; - volatile uint32_t fence; -}; - struct adreno_gpu { struct msm_gpu base; struct adreno_rev rev; @@ -101,16 +93,30 @@ struct adreno_gpu { /* interesting register offsets to dump: */ const unsigned int *registers; + /* + * Are we loading fw from legacy path? Prior to addition + * of gpu firmware to linux-firmware, the fw files were + * placed in toplevel firmware directory, following qcom's + * android kernel. But linux-firmware preferred they be + * placed in a 'qcom' subdirectory. + * + * For backwards compatibility, we try first to load from + * the new path, using request_firmware_direct() to avoid + * any potential timeout waiting for usermode helper, then + * fall back to the old path (with direct load). And + * finally fall back to request_firmware() with the new + * path to allow the usermode helper. + */ + enum { + FW_LOCATION_UNKNOWN = 0, + FW_LOCATION_NEW, /* /lib/firmware/qcom/$fwfile */ + FW_LOCATION_LEGACY, /* /lib/firmware/$fwfile */ + FW_LOCATION_HELPER, + } fwloc; + /* firmware: */ const struct firmware *pm4, *pfp; - /* ringbuffer rptr/wptr: */ - // TODO should this be in msm_ringbuffer? I think it would be - // different for z180.. - struct adreno_rbmemptrs *memptrs; - struct drm_gem_object *memptrs_bo; - uint64_t memptrs_iova; - /* * Register offsets are different between some GPUs. * GPU specific offsets will be exported by GPU specific @@ -196,22 +202,25 @@ static inline int adreno_is_a530(struct adreno_gpu *gpu) } int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); +const struct firmware *adreno_request_fw(struct adreno_gpu *adreno_gpu, + const char *fwname); int adreno_hw_init(struct msm_gpu *gpu); -uint32_t adreno_last_fence(struct msm_gpu *gpu); void adreno_recover(struct msm_gpu *gpu); void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); -void adreno_flush(struct msm_gpu *gpu); -bool adreno_idle(struct msm_gpu *gpu); +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #ifdef CONFIG_DEBUG_FS void adreno_show(struct msm_gpu *gpu, struct seq_file *m); #endif void adreno_dump_info(struct msm_gpu *gpu); void adreno_dump(struct msm_gpu *gpu); -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs); + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); @@ -220,7 +229,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu); static inline void OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -228,14 +237,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct msm_ringbuffer *ring) { - adreno_wait_ring(ring->gpu, 1); + adreno_wait_ring(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } static inline void OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } @@ -257,14 +266,14 @@ static inline u32 PM4_PARITY(u32 val) static inline void OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, PKT4(regindx, cnt)); } static inline void OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); } @@ -323,6 +332,11 @@ static inline void adreno_gpu_write64(struct adreno_gpu *gpu, adreno_gpu_write(gpu, hi, upper_32_bits(data)); } +static inline uint32_t get_wptr(struct msm_ringbuffer *ring) +{ + return (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2); +} + /* * Given a register and a count, return a value to program into * REG_CP_PROTECT_REG(n) - this will block both reads and writes for _len diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c index a5d75c9b3a73..65c1dfbbe019 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c +++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c @@ -14,7 +14,7 @@ #include "dsi_cfg.h" static const char * const dsi_v2_bus_clk_names[] = { - "core_mmss_clk", "iface_clk", "bus_clk", + "core_mmss", "iface", "bus", }; static const struct msm_dsi_config apq8064_dsi_cfg = { @@ -34,7 +34,7 @@ static const struct msm_dsi_config apq8064_dsi_cfg = { }; static const char * const dsi_6g_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", "core_mmss_clk", + "mdp_core", "iface", "bus", "core_mmss", }; static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = { @@ -55,7 +55,7 @@ static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = { }; static const char * const dsi_8916_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", + "mdp_core", "iface", "bus", }; static const struct msm_dsi_config msm8916_dsi_cfg = { @@ -99,7 +99,7 @@ static const struct msm_dsi_config msm8994_dsi_cfg = { * without it too. Figure out why it doesn't enable and uncomment below */ static const char * const dsi_8996_bus_clk_names[] = { - "mdp_core_clk", "iface_clk", "bus_clk", /* "core_mmss_clk", */ + "mdp_core", "iface", "bus", /* "core_mmss", */ }; static const struct msm_dsi_config msm8996_dsi_cfg = { diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index deaf869374ea..0f7324a686ca 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -334,46 +334,46 @@ static int dsi_regulator_init(struct msm_dsi_host *msm_host) static int dsi_clk_init(struct msm_dsi_host *msm_host) { - struct device *dev = &msm_host->pdev->dev; + struct platform_device *pdev = msm_host->pdev; const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; const struct msm_dsi_config *cfg = cfg_hnd->cfg; int i, ret = 0; /* get bus clocks */ for (i = 0; i < cfg->num_bus_clks; i++) { - msm_host->bus_clks[i] = devm_clk_get(dev, + msm_host->bus_clks[i] = msm_clk_get(pdev, cfg->bus_clk_names[i]); if (IS_ERR(msm_host->bus_clks[i])) { ret = PTR_ERR(msm_host->bus_clks[i]); - pr_err("%s: Unable to get %s, ret = %d\n", + pr_err("%s: Unable to get %s clock, ret = %d\n", __func__, cfg->bus_clk_names[i], ret); goto exit; } } /* get link and source clocks */ - msm_host->byte_clk = devm_clk_get(dev, "byte_clk"); + msm_host->byte_clk = msm_clk_get(pdev, "byte"); if (IS_ERR(msm_host->byte_clk)) { ret = PTR_ERR(msm_host->byte_clk); - pr_err("%s: can't find dsi_byte_clk. ret=%d\n", + pr_err("%s: can't find dsi_byte clock. ret=%d\n", __func__, ret); msm_host->byte_clk = NULL; goto exit; } - msm_host->pixel_clk = devm_clk_get(dev, "pixel_clk"); + msm_host->pixel_clk = msm_clk_get(pdev, "pixel"); if (IS_ERR(msm_host->pixel_clk)) { ret = PTR_ERR(msm_host->pixel_clk); - pr_err("%s: can't find dsi_pixel_clk. ret=%d\n", + pr_err("%s: can't find dsi_pixel clock. ret=%d\n", __func__, ret); msm_host->pixel_clk = NULL; goto exit; } - msm_host->esc_clk = devm_clk_get(dev, "core_clk"); + msm_host->esc_clk = msm_clk_get(pdev, "core"); if (IS_ERR(msm_host->esc_clk)) { ret = PTR_ERR(msm_host->esc_clk); - pr_err("%s: can't find dsi_esc_clk. ret=%d\n", + pr_err("%s: can't find dsi_esc clock. ret=%d\n", __func__, ret); msm_host->esc_clk = NULL; goto exit; @@ -382,22 +382,22 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->byte_clk_src = clk_get_parent(msm_host->byte_clk); if (!msm_host->byte_clk_src) { ret = -ENODEV; - pr_err("%s: can't find byte_clk_src. ret=%d\n", __func__, ret); + pr_err("%s: can't find byte_clk clock. ret=%d\n", __func__, ret); goto exit; } msm_host->pixel_clk_src = clk_get_parent(msm_host->pixel_clk); if (!msm_host->pixel_clk_src) { ret = -ENODEV; - pr_err("%s: can't find pixel_clk_src. ret=%d\n", __func__, ret); + pr_err("%s: can't find pixel_clk clock. ret=%d\n", __func__, ret); goto exit; } if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) { - msm_host->src_clk = devm_clk_get(dev, "src_clk"); + msm_host->src_clk = msm_clk_get(pdev, "src"); if (IS_ERR(msm_host->src_clk)) { ret = PTR_ERR(msm_host->src_clk); - pr_err("%s: can't find dsi_src_clk. ret=%d\n", + pr_err("%s: can't find src clock. ret=%d\n", __func__, ret); msm_host->src_clk = NULL; goto exit; @@ -406,7 +406,7 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->esc_clk_src = clk_get_parent(msm_host->esc_clk); if (!msm_host->esc_clk_src) { ret = -ENODEV; - pr_err("%s: can't get esc_clk_src. ret=%d\n", + pr_err("%s: can't get esc clock parent. ret=%d\n", __func__, ret); goto exit; } @@ -414,7 +414,7 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host) msm_host->dsi_clk_src = clk_get_parent(msm_host->src_clk); if (!msm_host->dsi_clk_src) { ret = -ENODEV; - pr_err("%s: can't get dsi_clk_src. ret=%d\n", + pr_err("%s: can't get src clock parent. ret=%d\n", __func__, ret); } } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 7c9bf91bc22b..790ca280cbfd 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -482,7 +482,7 @@ static int dsi_phy_driver_probe(struct platform_device *pdev) goto fail; } - phy->ahb_clk = devm_clk_get(dev, "iface_clk"); + phy->ahb_clk = msm_clk_get(pdev, "iface"); if (IS_ERR(phy->ahb_clk)) { dev_err(dev, "%s: Unable to get ahb clk\n", __func__); ret = PTR_ERR(phy->ahb_clk); diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c index e32a4a4f3797..7c72264101ff 100644 --- a/drivers/gpu/drm/msm/edp/edp_ctrl.c +++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c @@ -150,46 +150,46 @@ static const struct edp_pixel_clk_div clk_divs[2][EDP_PIXEL_CLK_NUM] = { static int edp_clk_init(struct edp_ctrl *ctrl) { - struct device *dev = &ctrl->pdev->dev; + struct platform_device *pdev = ctrl->pdev; int ret; - ctrl->aux_clk = devm_clk_get(dev, "core_clk"); + ctrl->aux_clk = msm_clk_get(pdev, "core"); if (IS_ERR(ctrl->aux_clk)) { ret = PTR_ERR(ctrl->aux_clk); - pr_err("%s: Can't find aux_clk, %d\n", __func__, ret); + pr_err("%s: Can't find core clock, %d\n", __func__, ret); ctrl->aux_clk = NULL; return ret; } - ctrl->pixel_clk = devm_clk_get(dev, "pixel_clk"); + ctrl->pixel_clk = msm_clk_get(pdev, "pixel"); if (IS_ERR(ctrl->pixel_clk)) { ret = PTR_ERR(ctrl->pixel_clk); - pr_err("%s: Can't find pixel_clk, %d\n", __func__, ret); + pr_err("%s: Can't find pixel clock, %d\n", __func__, ret); ctrl->pixel_clk = NULL; return ret; } - ctrl->ahb_clk = devm_clk_get(dev, "iface_clk"); + ctrl->ahb_clk = msm_clk_get(pdev, "iface"); if (IS_ERR(ctrl->ahb_clk)) { ret = PTR_ERR(ctrl->ahb_clk); - pr_err("%s: Can't find ahb_clk, %d\n", __func__, ret); + pr_err("%s: Can't find iface clock, %d\n", __func__, ret); ctrl->ahb_clk = NULL; return ret; } - ctrl->link_clk = devm_clk_get(dev, "link_clk"); + ctrl->link_clk = msm_clk_get(pdev, "link"); if (IS_ERR(ctrl->link_clk)) { ret = PTR_ERR(ctrl->link_clk); - pr_err("%s: Can't find link_clk, %d\n", __func__, ret); + pr_err("%s: Can't find link clock, %d\n", __func__, ret); ctrl->link_clk = NULL; return ret; } /* need mdp core clock to receive irq */ - ctrl->mdp_core_clk = devm_clk_get(dev, "mdp_core_clk"); + ctrl->mdp_core_clk = msm_clk_get(pdev, "mdp_core"); if (IS_ERR(ctrl->mdp_core_clk)) { ret = PTR_ERR(ctrl->mdp_core_clk); - pr_err("%s: Can't find mdp_core_clk, %d\n", __func__, ret); + pr_err("%s: Can't find mdp_core clock, %d\n", __func__, ret); ctrl->mdp_core_clk = NULL; return ret; } diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 17e069a133a4..e63dc0fb55f8 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -208,7 +208,7 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) for (i = 0; i < config->hpd_clk_cnt; i++) { struct clk *clk; - clk = devm_clk_get(&pdev->dev, config->hpd_clk_names[i]); + clk = msm_clk_get(pdev, config->hpd_clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(&pdev->dev, "failed to get hpd clk: %s (%d)\n", @@ -228,7 +228,7 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) for (i = 0; i < config->pwr_clk_cnt; i++) { struct clk *clk; - clk = devm_clk_get(&pdev->dev, config->pwr_clk_names[i]); + clk = msm_clk_get(pdev, config->pwr_clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(&pdev->dev, "failed to get pwr clk: %s (%d)\n", @@ -361,7 +361,7 @@ static const char *hpd_reg_names_none[] = {}; static struct hdmi_platform_config hdmi_tx_8660_config; static const char *hpd_reg_names_8960[] = {"core-vdda", "hdmi-mux"}; -static const char *hpd_clk_names_8960[] = {"core_clk", "master_iface_clk", "slave_iface_clk"}; +static const char *hpd_clk_names_8960[] = {"core", "master_iface", "slave_iface"}; static struct hdmi_platform_config hdmi_tx_8960_config = { HDMI_CFG(hpd_reg, 8960), @@ -370,8 +370,8 @@ static struct hdmi_platform_config hdmi_tx_8960_config = { static const char *pwr_reg_names_8x74[] = {"core-vdda", "core-vcc"}; static const char *hpd_reg_names_8x74[] = {"hpd-gdsc", "hpd-5v"}; -static const char *pwr_clk_names_8x74[] = {"extp_clk", "alt_iface_clk"}; -static const char *hpd_clk_names_8x74[] = {"iface_clk", "core_clk", "mdp_core_clk"}; +static const char *pwr_clk_names_8x74[] = {"extp", "alt_iface"}; +static const char *hpd_clk_names_8x74[] = {"iface", "core", "mdp_core"}; static unsigned long hpd_clk_freq_8x74[] = {0, 19200000, 0}; static struct hdmi_platform_config hdmi_tx_8974_config = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c index 534ce5b49781..5e631392dc85 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c @@ -48,7 +48,7 @@ static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy) for (i = 0; i < cfg->num_clks; i++) { struct clk *clk; - clk = devm_clk_get(dev, cfg->clk_names[i]); + clk = msm_clk_get(phy->pdev, cfg->clk_names[i]); if (IS_ERR(clk)) { ret = PTR_ERR(clk); dev_err(dev, "failed to get phy clock: %s (%d)\n", diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c index e6ee6b745ab7..0980da8ec966 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c @@ -48,7 +48,7 @@ static const char * const hdmi_phy_8960_reg_names[] = { }; static const char * const hdmi_phy_8960_clk_names[] = { - "slave_iface_clk", + "slave_iface", }; const struct hdmi_phy_cfg msm_hdmi_phy_8960_cfg = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c index 1fb7645cc721..0df504c61833 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c @@ -758,9 +758,7 @@ static const char * const hdmi_phy_8996_reg_names[] = { }; static const char * const hdmi_phy_8996_clk_names[] = { - "mmagic_iface_clk", - "iface_clk", - "ref_clk", + "iface", "ref", }; const struct hdmi_phy_cfg msm_hdmi_phy_8996_cfg = { diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c index c4a61e537851..4a8b8468586a 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c @@ -41,8 +41,7 @@ static const char * const hdmi_phy_8x74_reg_names[] = { }; static const char * const hdmi_phy_8x74_clk_names[] = { - "iface_clk", - "alt_iface_clk" + "iface", "alt_iface" }; const struct hdmi_phy_cfg msm_hdmi_phy_8x74_cfg = { diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c index 47fa2aba1983..14bd3bd3e040 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c @@ -290,6 +290,9 @@ static void mdp4_crtc_atomic_disable(struct drm_crtc *crtc, if (WARN_ON(!mdp4_crtc->enabled)) return; + /* Disable/save vblank irq handling before power is disabled */ + drm_crtc_vblank_off(crtc); + mdp_irq_unregister(&mdp4_kms->base, &mdp4_crtc->err); mdp4_disable(mdp4_kms); @@ -308,6 +311,10 @@ static void mdp4_crtc_atomic_enable(struct drm_crtc *crtc, return; mdp4_enable(mdp4_kms); + + /* Restore vblank irq handling after power is enabled */ + drm_crtc_vblank_on(crtc); + mdp_irq_register(&mdp4_kms->base, &mdp4_crtc->err); crtc_flush(crtc); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c index 60790df91bfa..1abc7f5c345c 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c @@ -224,7 +224,7 @@ int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder, mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_LOWER, MDP5_SPLIT_DPL_LOWER_SMART_PANEL); mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_EN, 1); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 440977677001..e414850dbbda 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -55,18 +55,23 @@ struct mdp5_crtc { struct completion pp_completion; + bool lm_cursor_enabled; + struct { /* protect REG_MDP5_LM_CURSOR* registers and cursor scanout_bo*/ spinlock_t lock; /* current cursor being scanned out: */ struct drm_gem_object *scanout_bo; + uint64_t iova; uint32_t width, height; uint32_t x, y; } cursor; }; #define to_mdp5_crtc(x) container_of(x, struct mdp5_crtc, base) +static void mdp5_crtc_restore_cursor(struct drm_crtc *crtc); + static struct mdp5_kms *get_kms(struct drm_crtc *crtc) { struct msm_drm_private *priv = crtc->dev->dev_private; @@ -114,6 +119,8 @@ static u32 crtc_flush_all(struct drm_crtc *crtc) return 0; drm_atomic_crtc_for_each_plane(plane, crtc) { + if (!plane->state->visible) + continue; flush_mask |= mdp5_plane_get_flush(plane); } @@ -242,6 +249,9 @@ static void blend_setup(struct drm_crtc *crtc) drm_atomic_crtc_for_each_plane(plane, crtc) { enum mdp5_pipe right_pipe; + if (!plane->state->visible) + continue; + pstate = to_mdp5_plane_state(plane->state); pstates[pstate->stage] = pstate; stage[pstate->stage][PIPE_LEFT] = mdp5_plane_pipe(plane); @@ -422,11 +432,14 @@ static void mdp5_crtc_atomic_disable(struct drm_crtc *crtc, if (WARN_ON(!mdp5_crtc->enabled)) return; + /* Disable/save vblank irq handling before power is disabled */ + drm_crtc_vblank_off(crtc); + if (mdp5_cstate->cmd_mode) mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->pp_done); mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->err); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); mdp5_crtc->enabled = false; } @@ -446,6 +459,29 @@ static void mdp5_crtc_atomic_enable(struct drm_crtc *crtc, pm_runtime_get_sync(dev); + if (mdp5_crtc->lm_cursor_enabled) { + /* + * Restore LM cursor state, as it might have been lost + * with suspend: + */ + if (mdp5_crtc->cursor.iova) { + unsigned long flags; + + spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); + mdp5_crtc_restore_cursor(crtc); + spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); + + mdp5_ctl_set_cursor(mdp5_cstate->ctl, + &mdp5_cstate->pipeline, 0, true); + } else { + mdp5_ctl_set_cursor(mdp5_cstate->ctl, + &mdp5_cstate->pipeline, 0, false); + } + } + + /* Restore vblank irq handling after power is enabled */ + drm_crtc_vblank_on(crtc); + mdp5_crtc_mode_set_nofb(crtc); mdp_irq_register(&mdp5_kms->base, &mdp5_crtc->err); @@ -580,6 +616,9 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc, DBG("%s: check", crtc->name); drm_atomic_crtc_state_for_each_plane_state(plane, pstate, state) { + if (!pstate->visible) + continue; + pstates[cnt].plane = plane; pstates[cnt].state = to_mdp5_plane_state(pstate); @@ -723,6 +762,50 @@ static void get_roi(struct drm_crtc *crtc, uint32_t *roi_w, uint32_t *roi_h) mdp5_crtc->cursor.y); } +static void mdp5_crtc_restore_cursor(struct drm_crtc *crtc) +{ + struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); + struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); + struct mdp5_kms *mdp5_kms = get_kms(crtc); + const enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL; + uint32_t blendcfg, stride; + uint32_t x, y, width, height; + uint32_t roi_w, roi_h; + int lm; + + assert_spin_locked(&mdp5_crtc->cursor.lock); + + lm = mdp5_cstate->pipeline.mixer->lm; + + x = mdp5_crtc->cursor.x; + y = mdp5_crtc->cursor.y; + width = mdp5_crtc->cursor.width; + height = mdp5_crtc->cursor.height; + + stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0); + + get_roi(crtc, &roi_w, &roi_h); + + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm), + MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_IMG_SIZE(lm), + MDP5_LM_CURSOR_IMG_SIZE_SRC_H(height) | + MDP5_LM_CURSOR_IMG_SIZE_SRC_W(width)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), + MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | + MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_START_XY(lm), + MDP5_LM_CURSOR_START_XY_Y_START(y) | + MDP5_LM_CURSOR_START_XY_X_START(x)); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BASE_ADDR(lm), + mdp5_crtc->cursor.iova); + + blendcfg = MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_EN; + blendcfg |= MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_ALPHA_SEL(cur_alpha); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BLEND_CONFIG(lm), blendcfg); +} + static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file, uint32_t handle, uint32_t width, uint32_t height) @@ -735,16 +818,18 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, struct platform_device *pdev = mdp5_kms->pdev; struct msm_kms *kms = &mdp5_kms->base.base; struct drm_gem_object *cursor_bo, *old_bo = NULL; - uint32_t blendcfg, stride; - uint64_t cursor_addr; struct mdp5_ctl *ctl; - int ret, lm; - enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL; + int ret; uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0); - uint32_t roi_w, roi_h; bool cursor_enable = true; unsigned long flags; + if (!mdp5_crtc->lm_cursor_enabled) { + dev_warn(dev->dev, + "cursor_set is deprecated with cursor planes\n"); + return -EINVAL; + } + if ((width > CURSOR_WIDTH) || (height > CURSOR_HEIGHT)) { dev_err(dev->dev, "bad cursor size: %dx%d\n", width, height); return -EINVAL; @@ -761,6 +846,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!handle) { DBG("Cursor off"); cursor_enable = false; + mdp5_crtc->cursor.iova = 0; pm_runtime_get_sync(&pdev->dev); goto set_cursor; } @@ -769,13 +855,11 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!cursor_bo) return -ENOENT; - ret = msm_gem_get_iova(cursor_bo, kms->aspace, &cursor_addr); + ret = msm_gem_get_iova(cursor_bo, kms->aspace, + &mdp5_crtc->cursor.iova); if (ret) return -EINVAL; - lm = mdp5_cstate->pipeline.mixer->lm; - stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0); - pm_runtime_get_sync(&pdev->dev); spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); @@ -785,22 +869,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, mdp5_crtc->cursor.width = width; mdp5_crtc->cursor.height = height; - get_roi(crtc, &roi_w, &roi_h); - - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm), - MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_IMG_SIZE(lm), - MDP5_LM_CURSOR_IMG_SIZE_SRC_H(height) | - MDP5_LM_CURSOR_IMG_SIZE_SRC_W(width)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), - MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | - MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BASE_ADDR(lm), cursor_addr); - - blendcfg = MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_EN; - blendcfg |= MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_ALPHA_SEL(cur_alpha); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BLEND_CONFIG(lm), blendcfg); + mdp5_crtc_restore_cursor(crtc); spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); @@ -815,7 +884,7 @@ set_cursor: crtc_flush(crtc, flush_mask); end: - pm_runtime_put_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); if (old_bo) { drm_flip_work_queue(&mdp5_crtc->unref_cursor_work, old_bo); /* enable vblank to complete cursor work: */ @@ -829,12 +898,18 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) struct mdp5_kms *mdp5_kms = get_kms(crtc); struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); - uint32_t lm = mdp5_cstate->pipeline.mixer->lm; uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0); + struct drm_device *dev = crtc->dev; uint32_t roi_w; uint32_t roi_h; unsigned long flags; + if (!mdp5_crtc->lm_cursor_enabled) { + dev_warn(dev->dev, + "cursor_move is deprecated with cursor planes\n"); + return -EINVAL; + } + /* don't support LM cursors when we we have source split enabled */ if (mdp5_cstate->pipeline.r_mixer) return -EINVAL; @@ -851,17 +926,12 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) pm_runtime_get_sync(&mdp5_kms->pdev->dev); spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), - MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | - MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); - mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_START_XY(lm), - MDP5_LM_CURSOR_START_XY_Y_START(y) | - MDP5_LM_CURSOR_START_XY_X_START(x)); + mdp5_crtc_restore_cursor(crtc); spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); crtc_flush(crtc, flush_mask); - pm_runtime_put_autosuspend(&mdp5_kms->pdev->dev); + pm_runtime_put_sync(&mdp5_kms->pdev->dev); return 0; } @@ -941,16 +1011,6 @@ static const struct drm_crtc_funcs mdp5_crtc_funcs = { .atomic_print_state = mdp5_crtc_atomic_print_state, }; -static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { - .set_config = drm_atomic_helper_set_config, - .destroy = mdp5_crtc_destroy, - .page_flip = drm_atomic_helper_page_flip, - .reset = mdp5_crtc_reset, - .atomic_duplicate_state = mdp5_crtc_duplicate_state, - .atomic_destroy_state = mdp5_crtc_destroy_state, - .atomic_print_state = mdp5_crtc_atomic_print_state, -}; - static const struct drm_crtc_helper_funcs mdp5_crtc_helper_funcs = { .mode_set_nofb = mdp5_crtc_mode_set_nofb, .atomic_check = mdp5_crtc_atomic_check, @@ -1119,12 +1179,10 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, mdp5_crtc->err.irq = mdp5_crtc_err_irq; mdp5_crtc->pp_done.irq = mdp5_crtc_pp_done_irq; - if (cursor_plane) - drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, - &mdp5_crtc_no_lm_cursor_funcs, NULL); - else - drm_crtc_init_with_planes(dev, crtc, plane, NULL, - &mdp5_crtc_funcs, NULL); + mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true; + + drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, + &mdp5_crtc_funcs, NULL); drm_flip_work_init(&mdp5_crtc->unref_cursor_work, "unref cursor", unref_cursor_worker); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c index 5b851380d3f2..36ad3cbe5f79 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c @@ -384,7 +384,7 @@ int mdp5_vid_encoder_set_split_display(struct drm_encoder *encoder, mdp5_ctl_pair(mdp5_encoder->ctl, mdp5_slave_enc->ctl, true); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c index bb5deb00c899..280e368bc9bb 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c @@ -54,7 +54,7 @@ void mdp5_irq_preinstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp5_write(mdp5_kms, REG_MDP5_INTR_CLEAR, 0xffffffff); mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } int mdp5_irq_postinstall(struct msm_kms *kms) @@ -72,7 +72,7 @@ int mdp5_irq_postinstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp_irq_register(mdp_kms, error_handler); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } @@ -84,7 +84,7 @@ void mdp5_irq_uninstall(struct msm_kms *kms) pm_runtime_get_sync(dev); mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } irqreturn_t mdp5_irq(struct msm_kms *kms) @@ -119,7 +119,7 @@ int mdp5_enable_vblank(struct msm_kms *kms, struct drm_crtc *crtc) pm_runtime_get_sync(dev); mdp_update_vblank_mask(to_mdp_kms(kms), mdp5_crtc_vblank(crtc), true); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); return 0; } @@ -132,5 +132,5 @@ void mdp5_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc) pm_runtime_get_sync(dev); mdp_update_vblank_mask(to_mdp_kms(kms), mdp5_crtc_vblank(crtc), false); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index f7c0698fec40..3e9bba4d6624 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -125,7 +125,7 @@ static void mdp5_complete_commit(struct msm_kms *kms, struct drm_atomic_state *s if (mdp5_kms->smp) mdp5_smp_complete_commit(mdp5_kms->smp, &mdp5_kms->state->smp); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); } static void mdp5_wait_for_crtc_commit_done(struct msm_kms *kms, @@ -496,12 +496,12 @@ static void read_mdp_hw_revision(struct mdp5_kms *mdp5_kms, pm_runtime_get_sync(dev); version = mdp5_read(mdp5_kms, REG_MDP5_HW_VERSION); - pm_runtime_put_autosuspend(dev); + pm_runtime_put_sync(dev); *major = FIELD(version, MDP5_HW_VERSION_MAJOR); *minor = FIELD(version, MDP5_HW_VERSION_MINOR); - DBG("MDP5 version v%d.%d", *major, *minor); + dev_info(dev, "MDP5 version v%d.%d", *major, *minor); } static int get_clk(struct platform_device *pdev, struct clk **clkp, @@ -599,7 +599,7 @@ static u32 mdp5_get_vblank_counter(struct drm_device *dev, unsigned int pipe) struct drm_crtc *crtc; struct drm_encoder *encoder; - if (pipe < 0 || pipe >= priv->num_crtcs) + if (pipe >= priv->num_crtcs) return 0; crtc = priv->crtcs[pipe]; @@ -683,7 +683,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) aspace = NULL;; } - pm_runtime_put_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); ret = modeset_init(mdp5_kms); if (ret) { diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c index 2bfac3712685..ff52c49095f9 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c @@ -17,19 +17,20 @@ #include "mdp5_kms.h" -struct mdp5_hw_pipe *mdp5_pipe_assign(struct drm_atomic_state *s, - struct drm_plane *plane, uint32_t caps, uint32_t blkcfg) +int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, + uint32_t caps, uint32_t blkcfg, + struct mdp5_hw_pipe **hwpipe, + struct mdp5_hw_pipe **r_hwpipe) { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); struct mdp5_state *state; struct mdp5_hw_pipe_state *old_state, *new_state; - struct mdp5_hw_pipe *hwpipe = NULL; - int i; + int i, j; state = mdp5_get_state(s); if (IS_ERR(state)) - return ERR_CAST(state); + return PTR_ERR(state); /* grab old_state after mdp5_get_state(), since now we hold lock: */ old_state = &mdp5_kms->state->hwpipe; @@ -64,31 +65,67 @@ struct mdp5_hw_pipe *mdp5_pipe_assign(struct drm_atomic_state *s, /* possible candidate, take the one with the * fewest unneeded caps bits set: */ - if (!hwpipe || (hweight_long(cur->caps & ~caps) < - hweight_long(hwpipe->caps & ~caps))) - hwpipe = cur; + if (!(*hwpipe) || (hweight_long(cur->caps & ~caps) < + hweight_long((*hwpipe)->caps & ~caps))) { + bool r_found = false; + + if (r_hwpipe) { + for (j = i + 1; j < mdp5_kms->num_hwpipes; + j++) { + struct mdp5_hw_pipe *r_cur = + mdp5_kms->hwpipes[j]; + + /* reject different types of hwpipes */ + if (r_cur->caps != cur->caps) + continue; + + /* respect priority, eg. VIG0 > VIG1 */ + if (cur->pipe > r_cur->pipe) + continue; + + *r_hwpipe = r_cur; + r_found = true; + break; + } + } + + if (!r_hwpipe || r_found) + *hwpipe = cur; + } } - if (!hwpipe) - return ERR_PTR(-ENOMEM); + if (!(*hwpipe)) + return -ENOMEM; + + if (r_hwpipe && !(*r_hwpipe)) + return -ENOMEM; if (mdp5_kms->smp) { int ret; - DBG("%s: alloc SMP blocks", hwpipe->name); + /* We don't support SMP and 2 hwpipes/plane together */ + WARN_ON(r_hwpipe); + + DBG("%s: alloc SMP blocks", (*hwpipe)->name); ret = mdp5_smp_assign(mdp5_kms->smp, &state->smp, - hwpipe->pipe, blkcfg); + (*hwpipe)->pipe, blkcfg); if (ret) - return ERR_PTR(-ENOMEM); + return -ENOMEM; - hwpipe->blkcfg = blkcfg; + (*hwpipe)->blkcfg = blkcfg; } DBG("%s: assign to plane %s for caps %x", - hwpipe->name, plane->name, caps); - new_state->hwpipe_to_plane[hwpipe->idx] = plane; + (*hwpipe)->name, plane->name, caps); + new_state->hwpipe_to_plane[(*hwpipe)->idx] = plane; - return hwpipe; + if (r_hwpipe) { + DBG("%s: assign to right of plane %s for caps %x", + (*r_hwpipe)->name, plane->name, caps); + new_state->hwpipe_to_plane[(*r_hwpipe)->idx] = plane; + } + + return 0; } void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h index 924c3e6f9517..bb2b0ac7aa2b 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h @@ -44,9 +44,10 @@ struct mdp5_hw_pipe_state { struct drm_plane *hwpipe_to_plane[SSPP_MAX]; }; -struct mdp5_hw_pipe *__must_check -mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, - uint32_t caps, uint32_t blkcfg); +int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, + uint32_t caps, uint32_t blkcfg, + struct mdp5_hw_pipe **hwpipe, + struct mdp5_hw_pipe **r_hwpipe); void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe); struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 4b22ac3413a1..be50445f9901 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -31,15 +31,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_rect *src, struct drm_rect *dest); -static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx); - static struct mdp5_kms *get_kms(struct drm_plane *plane) { struct msm_drm_private *priv = plane->dev->dev_private; @@ -254,18 +245,6 @@ static const struct drm_plane_funcs mdp5_plane_funcs = { .atomic_print_state = mdp5_plane_atomic_print_state, }; -static const struct drm_plane_funcs mdp5_cursor_plane_funcs = { - .update_plane = mdp5_update_cursor_plane_legacy, - .disable_plane = drm_atomic_helper_disable_plane, - .destroy = mdp5_plane_destroy, - .atomic_set_property = mdp5_plane_atomic_set_property, - .atomic_get_property = mdp5_plane_atomic_get_property, - .reset = mdp5_plane_reset, - .atomic_duplicate_state = mdp5_plane_duplicate_state, - .atomic_destroy_state = mdp5_plane_destroy_state, - .atomic_print_state = mdp5_plane_atomic_print_state, -}; - static int mdp5_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { @@ -414,31 +393,30 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, struct mdp5_hw_pipe *old_hwpipe = mdp5_state->hwpipe; struct mdp5_hw_pipe *old_right_hwpipe = mdp5_state->r_hwpipe; - - mdp5_state->hwpipe = mdp5_pipe_assign(state->state, - plane, caps, blkcfg); - if (IS_ERR(mdp5_state->hwpipe)) { - DBG("%s: failed to assign hwpipe!", plane->name); - return PTR_ERR(mdp5_state->hwpipe); + struct mdp5_hw_pipe *new_hwpipe = NULL; + struct mdp5_hw_pipe *new_right_hwpipe = NULL; + + ret = mdp5_pipe_assign(state->state, plane, caps, + blkcfg, &new_hwpipe, + need_right_hwpipe ? + &new_right_hwpipe : NULL); + if (ret) { + DBG("%s: failed to assign hwpipe(s)!", + plane->name); + return ret; } - if (need_right_hwpipe) { - mdp5_state->r_hwpipe = - mdp5_pipe_assign(state->state, plane, - caps, blkcfg); - if (IS_ERR(mdp5_state->r_hwpipe)) { - DBG("%s: failed to assign right hwpipe", - plane->name); - return PTR_ERR(mdp5_state->r_hwpipe); - } - } else { + mdp5_state->hwpipe = new_hwpipe; + if (need_right_hwpipe) + mdp5_state->r_hwpipe = new_right_hwpipe; + else /* * set it to NULL so that the driver knows we * don't have a right hwpipe when committing a * new state */ mdp5_state->r_hwpipe = NULL; - } + mdp5_pipe_release(state->state, old_hwpipe); mdp5_pipe_release(state->state, old_right_hwpipe); @@ -487,11 +465,98 @@ static void mdp5_plane_atomic_update(struct drm_plane *plane, } } +static int mdp5_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct mdp5_plane_state *mdp5_state = to_mdp5_plane_state(state); + struct drm_crtc_state *crtc_state; + struct drm_rect clip; + int min_scale, max_scale; + int ret; + + crtc_state = drm_atomic_get_existing_crtc_state(state->state, + state->crtc); + if (WARN_ON(!crtc_state)) + return -EINVAL; + + if (!crtc_state->active) + return -EINVAL; + + mdp5_state = to_mdp5_plane_state(state); + + /* don't use fast path if we don't have a hwpipe allocated yet */ + if (!mdp5_state->hwpipe) + return -EINVAL; + + /* only allow changing of position(crtc x/y or src x/y) in fast path */ + if (plane->state->crtc != state->crtc || + plane->state->src_w != state->src_w || + plane->state->src_h != state->src_h || + plane->state->crtc_w != state->crtc_w || + plane->state->crtc_h != state->crtc_h || + !plane->state->fb || + plane->state->fb != state->fb) + return -EINVAL; + + clip.x1 = 0; + clip.y1 = 0; + clip.x2 = crtc_state->adjusted_mode.hdisplay; + clip.y2 = crtc_state->adjusted_mode.vdisplay; + min_scale = FRAC_16_16(1, 8); + max_scale = FRAC_16_16(8, 1); + + ret = drm_plane_helper_check_state(state, &clip, min_scale, + max_scale, true, true); + if (ret) + return ret; + + /* + * if the visibility of the plane changes (i.e, if the cursor is + * clipped out completely, we can't take the async path because + * we need to stage/unstage the plane from the Layer Mixer(s). We + * also assign/unassign the hwpipe(s) tied to the plane. We avoid + * taking the fast path for both these reasons. + */ + if (state->visible != plane->state->visible) + return -EINVAL; + + return 0; +} + +static void mdp5_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + + if (plane_enabled(new_state)) { + struct mdp5_ctl *ctl; + struct mdp5_pipeline *pipeline = + mdp5_crtc_get_pipeline(plane->crtc); + int ret; + + ret = mdp5_plane_mode_set(plane, new_state->crtc, new_state->fb, + &new_state->src, &new_state->dst); + WARN_ON(ret < 0); + + ctl = mdp5_crtc_get_ctl(new_state->crtc); + + mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane)); + } + + *to_mdp5_plane_state(plane->state) = + *to_mdp5_plane_state(new_state); +} + static const struct drm_plane_helper_funcs mdp5_plane_helper_funcs = { .prepare_fb = mdp5_plane_prepare_fb, .cleanup_fb = mdp5_plane_cleanup_fb, .atomic_check = mdp5_plane_atomic_check, .atomic_update = mdp5_plane_atomic_update, + .atomic_async_check = mdp5_plane_atomic_async_check, + .atomic_async_update = mdp5_plane_atomic_async_update, }; static void set_scanout_locked(struct mdp5_kms *mdp5_kms, @@ -996,84 +1061,6 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, return ret; } -static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane, - struct drm_crtc *crtc, struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) -{ - struct drm_plane_state *plane_state, *new_plane_state; - struct mdp5_plane_state *mdp5_pstate; - struct drm_crtc_state *crtc_state = crtc->state; - int ret; - - if (!crtc_state->active || drm_atomic_crtc_needs_modeset(crtc_state)) - goto slow; - - plane_state = plane->state; - mdp5_pstate = to_mdp5_plane_state(plane_state); - - /* don't use fast path if we don't have a hwpipe allocated yet */ - if (!mdp5_pstate->hwpipe) - goto slow; - - /* only allow changing of position(crtc x/y or src x/y) in fast path */ - if (plane_state->crtc != crtc || - plane_state->src_w != src_w || - plane_state->src_h != src_h || - plane_state->crtc_w != crtc_w || - plane_state->crtc_h != crtc_h || - !plane_state->fb || - plane_state->fb != fb) - goto slow; - - new_plane_state = mdp5_plane_duplicate_state(plane); - if (!new_plane_state) - return -ENOMEM; - - new_plane_state->src_x = src_x; - new_plane_state->src_y = src_y; - new_plane_state->src_w = src_w; - new_plane_state->src_h = src_h; - new_plane_state->crtc_x = crtc_x; - new_plane_state->crtc_y = crtc_y; - new_plane_state->crtc_w = crtc_w; - new_plane_state->crtc_h = crtc_h; - - ret = mdp5_plane_atomic_check_with_state(crtc_state, new_plane_state); - if (ret) - goto slow_free; - - if (new_plane_state->visible) { - struct mdp5_ctl *ctl; - struct mdp5_pipeline *pipeline = mdp5_crtc_get_pipeline(crtc); - - ret = mdp5_plane_mode_set(plane, crtc, fb, - &new_plane_state->src, - &new_plane_state->dst); - WARN_ON(ret < 0); - - ctl = mdp5_crtc_get_ctl(crtc); - - mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane)); - } - - *to_mdp5_plane_state(plane_state) = - *to_mdp5_plane_state(new_plane_state); - - mdp5_plane_destroy_state(plane, new_plane_state); - - return 0; -slow_free: - mdp5_plane_destroy_state(plane, new_plane_state); -slow: - return drm_atomic_helper_update_plane(plane, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - src_x, src_y, src_w, src_h, ctx); -} - /* * Use this func and the one below only after the atomic state has been * successfully swapped @@ -1133,16 +1120,9 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev, mdp5_plane->nformats = mdp_get_formats(mdp5_plane->formats, ARRAY_SIZE(mdp5_plane->formats), false); - if (type == DRM_PLANE_TYPE_CURSOR) - ret = drm_universal_plane_init(dev, plane, 0xff, - &mdp5_cursor_plane_funcs, - mdp5_plane->formats, mdp5_plane->nformats, - NULL, type, NULL); - else - ret = drm_universal_plane_init(dev, plane, 0xff, - &mdp5_plane_funcs, - mdp5_plane->formats, mdp5_plane->nformats, - NULL, type, NULL); + ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_plane_funcs, + mdp5_plane->formats, mdp5_plane->nformats, + NULL, type, NULL); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 025d454163b0..bf5f8c39f34d 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -146,35 +146,6 @@ static void commit_worker(struct work_struct *work) complete_commit(container_of(work, struct msm_commit, work), true); } -/* - * this func is identical to the drm_atomic_helper_check, but we keep this - * because we might eventually need to have a more finegrained check - * sequence without using the atomic helpers. - * - * In the past, we first called drm_atomic_helper_check_planes, and then - * drm_atomic_helper_check_modeset. We needed this because the MDP5 plane's - * ->atomic_check could update ->mode_changed for pixel format changes. - * This, however isn't needed now because if there is a pixel format change, - * we just assign a new hwpipe for it with a new SMP allocation. We might - * eventually hit a condition where we would need to do a full modeset if - * we run out of planes. There, we'd probably need to set mode_changed. - */ -int msm_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state) -{ - int ret; - - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); - if (ret) - return ret; - - return ret; -} - /** * drm_atomic_helper_commit - commit validated state object * @dev: DRM device @@ -202,6 +173,18 @@ int msm_atomic_commit(struct drm_device *dev, if (ret) return ret; + /* + * Note that plane->atomic_async_check() should fail if we need + * to re-assign hwpipe or anything that touches global atomic + * state, so we'll never go down the async update path in those + * cases. + */ + if (state->async_update) { + drm_atomic_helper_async_commit(dev, state); + drm_atomic_helper_cleanup_planes(dev, state); + return 0; + } + c = commit_init(state); if (!c) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 606df7bea97b..0a3ea3034e39 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -29,9 +29,12 @@ * - 1.0.0 - initial interface * - 1.1.0 - adds madvise, and support for submits with > 4 cmd buffers * - 1.2.0 - adds explicit fence support for submit ioctl + * - 1.3.0 - adds GMEM_BASE + NR_RINGS params, SUBMITQUEUE_NEW + + * SUBMITQUEUE_CLOSE ioctls, and MSM_INFO_IOVA flag for + * MSM_GEM_INFO ioctl. */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 2 +#define MSM_VERSION_MINOR 3 #define MSM_VERSION_PATCHLEVEL 0 static void msm_fb_output_poll_changed(struct drm_device *dev) @@ -44,7 +47,7 @@ static void msm_fb_output_poll_changed(struct drm_device *dev) static const struct drm_mode_config_funcs mode_config_funcs = { .fb_create = msm_framebuffer_create, .output_poll_changed = msm_fb_output_poll_changed, - .atomic_check = msm_atomic_check, + .atomic_check = drm_atomic_helper_check, .atomic_commit = msm_atomic_commit, .atomic_state_alloc = msm_atomic_state_alloc, .atomic_state_clear = msm_atomic_state_clear, @@ -211,7 +214,6 @@ static int msm_drm_uninit(struct device *dev) struct drm_device *ddev = platform_get_drvdata(pdev); struct msm_drm_private *priv = ddev->dev_private; struct msm_kms *kms = priv->kms; - struct msm_gpu *gpu = priv->gpu; struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl; struct vblank_event *vbl_ev, *tmp; @@ -253,15 +255,6 @@ static int msm_drm_uninit(struct device *dev) if (kms && kms->funcs) kms->funcs->destroy(kms); - if (gpu) { - mutex_lock(&ddev->struct_mutex); - // XXX what do we do here? - //pm_runtime_enable(&pdev->dev); - gpu->funcs->pm_suspend(gpu); - mutex_unlock(&ddev->struct_mutex); - gpu->funcs->destroy(gpu); - } - if (priv->vram.paddr) { unsigned long attrs = DMA_ATTR_NO_KERNEL_MAPPING; drm_mm_takedown(&priv->vram.mm); @@ -514,24 +507,37 @@ static void load_gpu(struct drm_device *dev) mutex_unlock(&init_lock); } -static int msm_open(struct drm_device *dev, struct drm_file *file) +static int context_init(struct drm_device *dev, struct drm_file *file) { struct msm_file_private *ctx; - /* For now, load gpu on open.. to avoid the requirement of having - * firmware in the initrd. - */ - load_gpu(dev); - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; + msm_submitqueue_init(dev, ctx); + file->driver_priv = ctx; return 0; } +static int msm_open(struct drm_device *dev, struct drm_file *file) +{ + /* For now, load gpu on open.. to avoid the requirement of having + * firmware in the initrd. + */ + load_gpu(dev); + + return context_init(dev, file); +} + +static void context_close(struct msm_file_private *ctx) +{ + msm_submitqueue_close(ctx); + kfree(ctx); +} + static void msm_postclose(struct drm_device *dev, struct drm_file *file) { struct msm_drm_private *priv = dev->dev_private; @@ -542,7 +548,7 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file) priv->lastctx = NULL; mutex_unlock(&dev->struct_mutex); - kfree(ctx); + context_close(ctx); } static void msm_lastclose(struct drm_device *dev) @@ -737,16 +743,27 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, struct msm_drm_private *priv = dev->dev_private; struct drm_msm_wait_fence *args = data; ktime_t timeout = to_ktime(args->timeout); + struct msm_gpu_submitqueue *queue; + struct msm_gpu *gpu = priv->gpu; + int ret; if (args->pad) { DRM_ERROR("invalid pad: %08x\n", args->pad); return -EINVAL; } - if (!priv->gpu) + if (!gpu) return 0; - return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true); + queue = msm_submitqueue_get(file->driver_priv, args->queueid); + if (!queue) + return -ENOENT; + + ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout, + true); + + msm_submitqueue_put(queue); + return ret; } static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data, @@ -787,6 +804,28 @@ unlock: return ret; } + +static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_msm_submitqueue *args = data; + + if (args->flags & ~MSM_SUBMITQUEUE_FLAGS) + return -EINVAL; + + return msm_submitqueue_create(dev, file->driver_priv, args->prio, + args->flags, &args->id); +} + + +static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data, + struct drm_file *file) +{ + u32 id = *(u32 *) data; + + return msm_submitqueue_remove(file->driver_priv, id); +} + static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_AUTH|DRM_RENDER_ALLOW), @@ -796,6 +835,8 @@ static const struct drm_ioctl_desc msm_ioctls[] = { DRM_IOCTL_DEF_DRV(MSM_GEM_SUBMIT, msm_ioctl_gem_submit, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_WAIT_FENCE, msm_ioctl_wait_fence, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE, msm_ioctl_gem_madvise, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_AUTH|DRM_RENDER_ALLOW), }; static const struct vm_operations_struct vm_ops = { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 5e8109c07560..c646843d8822 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -56,11 +56,9 @@ struct msm_gem_address_space; struct msm_gem_vma; struct msm_file_private { - /* currently we don't do anything useful with this.. but when - * per-context address spaces are supported we'd keep track of - * the context's page-tables here. - */ - int dummy; + rwlock_t queuelock; + struct list_head submitqueues; + int queueid; }; enum msm_mdp_plane_property { @@ -76,6 +74,8 @@ struct msm_vblank_ctrl { spinlock_t lock; }; +#define MSM_GPU_MAX_RINGS 4 + struct msm_drm_private { struct drm_device *dev; @@ -108,7 +108,8 @@ struct msm_drm_private { struct drm_fb_helper *fbdev; - struct msm_rd_state *rd; + struct msm_rd_state *rd; /* debugfs to dump all submits */ + struct msm_rd_state *hangrd; /* debugfs to dump hanging submits */ struct msm_perf_state *perf; /* list of GEM objects: */ @@ -154,20 +155,12 @@ struct msm_drm_private { struct shrinker shrinker; struct msm_vblank_ctrl vblank_ctrl; - - /* task holding struct_mutex.. currently only used in submit path - * to detect and reject faults from copy_from_user() for submit - * ioctl. - */ - struct task_struct *struct_mutex_task; }; struct msm_format { uint32_t pixel_format; }; -int msm_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state); int msm_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock); struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev); @@ -219,6 +212,7 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); void *msm_gem_get_vaddr(struct drm_gem_object *obj); +void *msm_gem_get_vaddr_active(struct drm_gem_object *obj); void msm_gem_put_vaddr(struct drm_gem_object *obj); int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); int msm_gem_sync_object(struct drm_gem_object *obj, @@ -303,7 +297,8 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m); int msm_debugfs_late_init(struct drm_device *dev); int msm_rd_debugfs_init(struct drm_minor *minor); void msm_rd_debugfs_cleanup(struct msm_drm_private *priv); -void msm_rd_dump_submit(struct msm_gem_submit *submit); +void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, + const char *fmt, ...); int msm_perf_debugfs_init(struct drm_minor *minor); void msm_perf_debugfs_cleanup(struct msm_drm_private *priv); #else @@ -319,6 +314,18 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name, void msm_writel(u32 data, void __iomem *addr); u32 msm_readl(const void __iomem *addr); +struct msm_gpu_submitqueue; +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id); +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id); +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); +void msm_submitqueue_close(struct msm_file_private *ctx); + +void msm_submitqueue_destroy(struct kref *kref); + + #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index a2f89bac9c16..349c12f670eb 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -31,7 +31,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) return ERR_PTR(-ENOMEM); fctx->dev = dev; - fctx->name = name; + strncpy(fctx->name, name, sizeof(fctx->name)); fctx->context = dma_fence_context_alloc(1); init_waitqueue_head(&fctx->event); spin_lock_init(&fctx->spinlock); diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 56061aa1959d..1aa6a4c6530c 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -22,7 +22,7 @@ struct msm_fence_context { struct drm_device *dev; - const char *name; + char name[32]; unsigned context; /* last_fence == completed_fence --> no pending work */ uint32_t last_fence; /* last assigned fence */ diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index ea5bb0e1632c..81fe6d6740ce 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -470,14 +470,16 @@ fail: return ret; } -void *msm_gem_get_vaddr(struct drm_gem_object *obj) +static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) { struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret = 0; mutex_lock(&msm_obj->lock); - if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { + if (WARN_ON(msm_obj->madv > madv)) { + dev_err(obj->dev->dev, "Invalid madv state: %u vs %u\n", + msm_obj->madv, madv); mutex_unlock(&msm_obj->lock); return ERR_PTR(-EBUSY); } @@ -513,6 +515,22 @@ fail: return ERR_PTR(ret); } +void *msm_gem_get_vaddr(struct drm_gem_object *obj) +{ + return get_vaddr(obj, MSM_MADV_WILLNEED); +} + +/* + * Don't use this! It is for the very special case of dumping + * submits from GPU hangs or faults, were the bo may already + * be MSM_MADV_DONTNEED, but we know the buffer is still on the + * active list. + */ +void *msm_gem_get_vaddr_active(struct drm_gem_object *obj) +{ + return get_vaddr(obj, __MSM_MADV_PURGED); +} + void msm_gem_put_vaddr(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 91c210d2359c..9320e184b48d 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -138,12 +138,15 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); struct msm_gem_submit { struct drm_device *dev; struct msm_gpu *gpu; - struct list_head node; /* node in gpu submit_list */ + struct list_head node; /* node in ring submit list */ struct list_head bo_list; struct ww_acquire_ctx ticket; + uint32_t seqno; /* Sequence number of the submit on the ring */ struct dma_fence *fence; + struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool valid; /* true if no cmdstream patching needed */ + struct msm_ringbuffer *ring; unsigned int nr_cmds; unsigned int nr_bos; struct { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 93535cac0676..b8dc8f96caf2 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -31,7 +31,8 @@ #define BO_PINNED 0x2000 static struct msm_gem_submit *submit_create(struct drm_device *dev, - struct msm_gpu *gpu, uint32_t nr_bos, uint32_t nr_cmds) + struct msm_gpu *gpu, struct msm_gpu_submitqueue *queue, + uint32_t nr_bos, uint32_t nr_cmds) { struct msm_gem_submit *submit; uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) + @@ -49,6 +50,8 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->fence = NULL; submit->pid = get_pid(task_pid(current)); submit->cmd = (void *)&submit->bos[nr_bos]; + submit->queue = queue; + submit->ring = gpu->rb[queue->prio]; /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; @@ -66,6 +69,8 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) dma_fence_put(submit->fence); list_del(&submit->node); put_pid(submit->pid); + msm_submitqueue_put(submit->queue); + kfree(submit); } @@ -156,7 +161,8 @@ out: return ret; } -static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) +static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, + int i, bool backoff) { struct msm_gem_object *msm_obj = submit->bos[i].obj; @@ -166,7 +172,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i) if (submit->bos[i].flags & BO_LOCKED) ww_mutex_unlock(&msm_obj->resv->lock); - if (!(submit->bos[i].flags & BO_VALID)) + if (backoff && !(submit->bos[i].flags & BO_VALID)) submit->bos[i].iova = 0; submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED); @@ -201,10 +207,10 @@ retry: fail: for (; i >= 0; i--) - submit_unlock_unpin_bo(submit, i); + submit_unlock_unpin_bo(submit, i, true); if (slow_locked > 0) - submit_unlock_unpin_bo(submit, slow_locked); + submit_unlock_unpin_bo(submit, slow_locked, true); if (ret == -EDEADLK) { struct msm_gem_object *msm_obj = submit->bos[contended].obj; @@ -243,7 +249,8 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) if (no_implicit) continue; - ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); + ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, + write); if (ret) break; } @@ -387,7 +394,7 @@ static void submit_cleanup(struct msm_gem_submit *submit) for (i = 0; i < submit->nr_bos; i++) { struct msm_gem_object *msm_obj = submit->bos[i].obj; - submit_unlock_unpin_bo(submit, i); + submit_unlock_unpin_bo(submit, i, false); list_del_init(&msm_obj->submit_entry); drm_gem_object_unreference(&msm_obj->base); } @@ -405,6 +412,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_gpu *gpu = priv->gpu; struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; + struct msm_gpu_submitqueue *queue; + struct msm_ringbuffer *ring; int out_fence_fd = -1; unsigned i; int ret; @@ -421,6 +430,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS) return -EINVAL; + queue = msm_submitqueue_get(ctx, args->queueid); + if (!queue) + return -ENOENT; + + ring = gpu->rb[queue->prio]; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { in_fence = sync_file_get_fence(args->fence_fd); @@ -431,7 +446,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, * Wait if the fence is from a foreign context, or if the fence * array contains any fence from a foreign context. */ - if (!dma_fence_match_context(in_fence, gpu->fctx->context)) { + if (!dma_fence_match_context(in_fence, ring->fctx->context)) { ret = dma_fence_wait(in_fence, true); if (ret) return ret; @@ -449,9 +464,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out_unlock; } } - priv->struct_mutex_task = current; - submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds); + submit = submit_create(dev, gpu, queue, args->nr_bos, args->nr_cmds); if (!submit) { ret = -ENOMEM; goto out_unlock; @@ -534,7 +548,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; - submit->fence = msm_fence_alloc(gpu->fctx); + submit->fence = msm_fence_alloc(ring->fctx); if (IS_ERR(submit->fence)) { ret = PTR_ERR(submit->fence); submit->fence = NULL; @@ -567,7 +581,6 @@ out: out_unlock: if (ret && (out_fence_fd >= 0)) put_unused_fd(out_fence_fd); - priv->struct_mutex_task = NULL; mutex_unlock(&dev->struct_mutex); return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 6a887032c66a..8d4477818ec2 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -20,6 +20,8 @@ #include "msm_mmu.h" #include "msm_fence.h" +#include <linux/string_helpers.h> + /* * Power Management: @@ -221,33 +223,102 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ +static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) +{ + struct msm_gem_submit *submit; + + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno > fence) + break; + + msm_update_fence(submit->ring->fctx, + submit->fence->seqno); + } +} + +static struct msm_gem_submit * +find_submit(struct msm_ringbuffer *ring, uint32_t fence) +{ + struct msm_gem_submit *submit; + + WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); + + list_for_each_entry(submit, &ring->submits, node) + if (submit->seqno == fence) + return submit; + + return NULL; +} + static void retire_submits(struct msm_gpu *gpu); static void recover_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; struct msm_gem_submit *submit; - uint32_t fence = gpu->funcs->last_fence(gpu); - - msm_update_fence(gpu->fctx, fence + 1); + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + int i; mutex_lock(&dev->struct_mutex); dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - list_for_each_entry(submit, &gpu->submit_list, node) { - if (submit->fence->seqno == (fence + 1)) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(submit->pid, PIDTYPE_PID); - if (task) { - dev_err(dev->dev, "%s: offending task: %s\n", - gpu->name, task->comm); - } - rcu_read_unlock(); - break; + + submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); + if (submit) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + char *cmd; + + /* + * So slightly annoying, in other paths like + * mmap'ing gem buffers, mmap_sem is acquired + * before struct_mutex, which means we can't + * hold struct_mutex across the call to + * get_cmdline(). But submits are retired + * from the same in-order workqueue, so we can + * safely drop the lock here without worrying + * about the submit going away. + */ + mutex_unlock(&dev->struct_mutex); + cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); + mutex_lock(&dev->struct_mutex); + + dev_err(dev->dev, "%s: offending task: %s (%s)\n", + gpu->name, task->comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", task->comm, cmd); + } else { + msm_rd_dump_submit(priv->hangrd, submit, NULL); } + rcu_read_unlock(); + } + + + /* + * Update all the rings with the latest and greatest fence.. this + * needs to happen after msm_rd_dump_submit() to ensure that the + * bo's referenced by the offending submit are still around. + */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + uint32_t fence = ring->memptrs->fence; + + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence++; + + update_fences(gpu, ring, fence); } if (msm_gpu_active(gpu)) { @@ -258,9 +329,15 @@ static void recover_worker(struct work_struct *work) gpu->funcs->recover(gpu); pm_runtime_put_sync(&gpu->pdev->dev); - /* replay the remaining submits after the one that hung: */ - list_for_each_entry(submit, &gpu->submit_list, node) { - gpu->funcs->submit(gpu, submit, NULL); + /* + * Replay all remaining submits starting with highest priority + * ring + */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + list_for_each_entry(submit, &ring->submits, node) + gpu->funcs->submit(gpu, submit, NULL); } } @@ -281,25 +358,27 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->funcs->last_fence(gpu); + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = ring->memptrs->fence; - if (fence != gpu->hangcheck_fence) { + if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->fctx->last_fence) { + ring->hangcheck_fence = fence; + } else if (fence < ring->seqno) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + ring->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->fctx->last_fence); + gpu->name, ring->seqno); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->fctx->last_fence > gpu->hangcheck_fence) + if (ring->seqno > ring->hangcheck_fence) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -428,19 +507,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; + int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; - - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); + /* Retire the commits starting with highest priority */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - if (dma_fence_is_signaled(submit->fence)) { - retire_submit(gpu, submit); - } else { - break; + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + if (dma_fence_is_signaled(submit->fence)) + retire_submit(gpu, submit); } } } @@ -449,9 +527,10 @@ static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->funcs->last_fence(gpu); + int i; - msm_update_fence(gpu->fctx, fence); + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); mutex_lock(&dev->struct_mutex); retire_submits(gpu); @@ -472,6 +551,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -480,9 +560,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gpu_hw_init(gpu); - list_add_tail(&submit->node, &gpu->submit_list); + submit->seqno = ++ring->seqno; + + list_add_tail(&submit->node, &ring->submits); - msm_rd_dump_submit(submit); + msm_rd_dump_submit(priv->rd, submit, NULL); update_sw_cntrs(gpu); @@ -605,7 +687,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { - int ret; + int i, ret, nr_rings = config->nr_rings; + void *memptrs; + uint64_t memptrs_iova; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); @@ -613,18 +697,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; - gpu->fctx = msm_fence_context_alloc(drm, name); - if (IS_ERR(gpu->fctx)) { - ret = PTR_ERR(gpu->fctx); - gpu->fctx = NULL; - goto fail; - } INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->recover_work, recover_worker); - INIT_LIST_HEAD(&gpu->submit_list); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); @@ -689,34 +766,76 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, goto fail; } - /* Create ringbuffer: */ - gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); + memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), + MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, + &memptrs_iova); + + if (IS_ERR(memptrs)) { + ret = PTR_ERR(memptrs); + dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); goto fail; } + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", + ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); + } + + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + + memptrs += sizeof(struct msm_rbmemptrs); + memptrs_iova += sizeof(struct msm_rbmemptrs); + } + + gpu->nr_rings = nr_rings; + return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + } + platform_set_drvdata(pdev, NULL); return ret; } void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); } if (!IS_ERR_OR_NULL(gpu->aspace)) { diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index df4e2771fb85..e113d64574d3 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -33,7 +33,7 @@ struct msm_gpu_config { const char *irqname; uint64_t va_start; uint64_t va_end; - unsigned int ringsz; + unsigned int nr_rings; }; /* So far, with hardware that I've seen to date, we can have: @@ -57,9 +57,9 @@ struct msm_gpu_funcs { int (*pm_resume)(struct msm_gpu *gpu); void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); - void (*flush)(struct msm_gpu *gpu); + void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); irqreturn_t (*irq)(struct msm_gpu *irq); - uint32_t (*last_fence)(struct msm_gpu *gpu); + struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS @@ -86,16 +86,12 @@ struct msm_gpu { const struct msm_gpu_perfcntr *perfcntrs; uint32_t num_perfcntrs; - /* ringbuffer: */ - struct msm_ringbuffer *rb; - uint64_t rb_iova; + struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; + int nr_rings; /* list of GEM active objects: */ struct list_head active_list; - /* fencing: */ - struct msm_fence_context *fctx; - /* does gpu need hw_init? */ bool needs_hw_init; @@ -126,15 +122,31 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; - uint32_t hangcheck_fence; struct work_struct recover_work; - struct list_head submit_list; + struct drm_gem_object *memptrs_bo; }; +/* It turns out that all targets use the same ringbuffer size */ +#define MSM_GPU_RINGBUFFER_SZ SZ_32K +#define MSM_GPU_RINGBUFFER_BLKSIZE 32 + +#define MSM_GPU_RB_CNTL_DEFAULT \ + (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \ + AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8))) + static inline bool msm_gpu_active(struct msm_gpu *gpu) { - return gpu->fctx->last_fence > gpu->funcs->last_fence(gpu); + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (ring->seqno > ring->memptrs->fence) + return true; + } + + return false; } /* Perf-Counters: @@ -150,6 +162,15 @@ struct msm_gpu_perfcntr { const char *name; }; +struct msm_gpu_submitqueue { + int id; + u32 flags; + u32 prio; + int faults; + struct list_head node; + struct kref ref; +}; + static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) { msm_writel(data, gpu->mmio + (reg << 2)); @@ -223,4 +244,10 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev); void __init adreno_register(void); void __exit adreno_unregister(void); +static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) +{ + if (queue) + kref_put(&queue->ref, msm_submitqueue_destroy); +} + #endif /* __MSM_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index ec56794ad039..3aa8a8576abe 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -19,11 +19,17 @@ * * tail -f /sys/kernel/debug/dri/<minor>/rd > logfile.rd * - * To log the cmdstream in a format that is understood by freedreno/cffdump + * to log the cmdstream in a format that is understood by freedreno/cffdump * utility. By comparing the last successfully completed fence #, to the * cmdstream for the next fence, you can narrow down which process and submit * caused the gpu crash/lockup. * + * Additionally: + * + * tail -f /sys/kernel/debug/dri/<minor>/hangrd > logfile.rd + * + * will capture just the cmdstream from submits which triggered a GPU hang. + * * This bypasses drm_debugfs_create_files() mainly because we need to use * our own fops for a bit more control. In particular, we don't want to * do anything if userspace doesn't have the debugfs file open. @@ -220,53 +226,89 @@ static const struct file_operations rd_debugfs_fops = { .release = rd_release, }; -int msm_rd_debugfs_init(struct drm_minor *minor) + +static void rd_cleanup(struct msm_rd_state *rd) +{ + if (!rd) + return; + + mutex_destroy(&rd->read_lock); + kfree(rd); +} + +static struct msm_rd_state *rd_init(struct drm_minor *minor, const char *name) { - struct msm_drm_private *priv = minor->dev->dev_private; struct msm_rd_state *rd; struct dentry *ent; - - /* only create on first minor: */ - if (priv->rd) - return 0; + int ret = 0; rd = kzalloc(sizeof(*rd), GFP_KERNEL); if (!rd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rd->dev = minor->dev; rd->fifo.buf = rd->buf; mutex_init(&rd->read_lock); - priv->rd = rd; init_waitqueue_head(&rd->fifo_event); - ent = debugfs_create_file("rd", S_IFREG | S_IRUGO, + ent = debugfs_create_file(name, S_IFREG | S_IRUGO, minor->debugfs_root, rd, &rd_debugfs_fops); if (!ent) { - DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/rd\n", - minor->debugfs_root); + DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/%s\n", + minor->debugfs_root, name); + ret = -ENOMEM; goto fail; } + return rd; + +fail: + rd_cleanup(rd); + return ERR_PTR(ret); +} + +int msm_rd_debugfs_init(struct drm_minor *minor) +{ + struct msm_drm_private *priv = minor->dev->dev_private; + struct msm_rd_state *rd; + int ret; + + /* only create on first minor: */ + if (priv->rd) + return 0; + + rd = rd_init(minor, "rd"); + if (IS_ERR(rd)) { + ret = PTR_ERR(rd); + goto fail; + } + + priv->rd = rd; + + rd = rd_init(minor, "hangrd"); + if (IS_ERR(rd)) { + ret = PTR_ERR(rd); + goto fail; + } + + priv->hangrd = rd; + return 0; fail: msm_rd_debugfs_cleanup(priv); - return -1; + return ret; } void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) { - struct msm_rd_state *rd = priv->rd; - - if (!rd) - return; - + rd_cleanup(priv->rd); priv->rd = NULL; - mutex_destroy(&rd->read_lock); - kfree(rd); + + rd_cleanup(priv->hangrd); + priv->hangrd = NULL; } static void snapshot_buf(struct msm_rd_state *rd, @@ -276,10 +318,6 @@ static void snapshot_buf(struct msm_rd_state *rd, struct msm_gem_object *obj = submit->bos[idx].obj; const char *buf; - buf = msm_gem_get_vaddr(&obj->base); - if (IS_ERR(buf)) - return; - if (iova) { buf += iova - submit->bos[idx].iova; } else { @@ -287,20 +325,33 @@ static void snapshot_buf(struct msm_rd_state *rd, size = obj->base.size; } + /* + * Always write the GPUADDR header so can get a complete list of all the + * buffers in the cmd + */ rd_write_section(rd, RD_GPUADDR, (uint32_t[3]){ iova, size, iova >> 32 }, 12); + + /* But only dump the contents of buffers marked READ */ + if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ)) + return; + + buf = msm_gem_get_vaddr_active(&obj->base); + if (IS_ERR(buf)) + return; + rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size); msm_gem_put_vaddr(&obj->base); } /* called under struct_mutex */ -void msm_rd_dump_submit(struct msm_gem_submit *submit) +void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, + const char *fmt, ...) { struct drm_device *dev = submit->dev; - struct msm_drm_private *priv = dev->dev_private; - struct msm_rd_state *rd = priv->rd; - char msg[128]; + struct task_struct *task; + char msg[256]; int i, n; if (!rd->open) @@ -311,23 +362,32 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit) */ WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", - TASK_COMM_LEN, current->comm, task_pid_nr(current), - submit->fence->seqno); + if (fmt) { + va_list args; - rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + va_start(args, fmt); + n = vsnprintf(msg, sizeof(msg), fmt, args); + va_end(args); - if (rd_full) { - for (i = 0; i < submit->nr_bos; i++) { - /* buffers that are written to probably don't start out - * with anything interesting: - */ - if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) - continue; + rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + } - snapshot_buf(rd, submit, i, 0, 0); - } + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u", + TASK_COMM_LEN, task->comm, + pid_nr(submit->pid), submit->seqno); + } else { + n = snprintf(msg, sizeof(msg), "???/%d: fence=%u", + pid_nr(submit->pid), submit->seqno); } + rcu_read_unlock(); + + rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4)); + + for (i = 0; rd_full && i < submit->nr_bos; i++) + snapshot_buf(rd, submit, i, 0, 0); for (i = 0; i < submit->nr_cmds; i++) { uint64_t iova = submit->cmd[i].iova; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index bf065a540130..6ca98da35f63 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -18,13 +18,15 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova) { struct msm_ringbuffer *ring; + char name[32]; int ret; - if (WARN_ON(!is_power_of_2(size))) - return ERR_PTR(-EINVAL); + /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */ + BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { @@ -33,32 +35,46 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->gpu = gpu; - + ring->id = id; /* Pass NULL for the iova pointer - we will map it later */ - ring->start = msm_gem_kernel_new(gpu->dev, size, MSM_BO_WC, - gpu->aspace, &ring->bo, NULL); + ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, + MSM_BO_WC, gpu->aspace, &ring->bo, NULL); if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); ring->start = 0; goto fail; } - ring->end = ring->start + (size / 4); + ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2); + ring->next = ring->start; ring->cur = ring->start; - ring->size = size; + ring->memptrs = memptrs; + ring->memptrs_iova = memptrs_iova; + + INIT_LIST_HEAD(&ring->submits); + spin_lock_init(&ring->lock); + + snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); + + ring->fctx = msm_fence_context_alloc(gpu->dev, name); return ring; fail: - if (ring) - msm_ringbuffer_destroy(ring); + msm_ringbuffer_destroy(ring); return ERR_PTR(ret); } void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) { + if (IS_ERR_OR_NULL(ring)) + return; + + msm_fence_context_free(ring->fctx); + if (ring->bo) { + msm_gem_put_iova(ring->bo, ring->gpu->aspace); msm_gem_put_vaddr(ring->bo); drm_gem_object_unreference_unlocked(ring->bo); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 6e0e1049fa4f..cffce094aecb 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -20,14 +20,31 @@ #include "msm_drv.h" +#define rbmemptr(ring, member) \ + ((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) + +struct msm_rbmemptrs { + volatile uint32_t rptr; + volatile uint32_t fence; +}; + struct msm_ringbuffer { struct msm_gpu *gpu; - int size; + int id; struct drm_gem_object *bo; - uint32_t *start, *end, *cur; + uint32_t *start, *end, *cur, *next; + struct list_head submits; + uint64_t iova; + uint32_t seqno; + uint32_t hangcheck_fence; + struct msm_rbmemptrs *memptrs; + uint64_t memptrs_iova; + struct msm_fence_context *fctx; + spinlock_t lock; }; -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova); void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); /* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ @@ -35,9 +52,13 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); static inline void OUT_RING(struct msm_ringbuffer *ring, uint32_t data) { - if (ring->cur == ring->end) - ring->cur = ring->start; - *(ring->cur++) = data; + /* + * ring->next points to the current command being written - it won't be + * committed as ring->cur until the flush + */ + if (ring->next == ring->end) + ring->next = ring->start; + *(ring->next++) = data; } #endif /* __MSM_RINGBUFFER_H__ */ diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c new file mode 100644 index 000000000000..5115f75b5b7f --- /dev/null +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -0,0 +1,152 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kref.h> +#include "msm_gpu.h" + +void msm_submitqueue_destroy(struct kref *kref) +{ + struct msm_gpu_submitqueue *queue = container_of(kref, + struct msm_gpu_submitqueue, ref); + + kfree(queue); +} + +struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, + u32 id) +{ + struct msm_gpu_submitqueue *entry; + + if (!ctx) + return NULL; + + read_lock(&ctx->queuelock); + + list_for_each_entry(entry, &ctx->submitqueues, node) { + if (entry->id == id) { + kref_get(&entry->ref); + read_unlock(&ctx->queuelock); + + return entry; + } + } + + read_unlock(&ctx->queuelock); + return NULL; +} + +void msm_submitqueue_close(struct msm_file_private *ctx) +{ + struct msm_gpu_submitqueue *entry, *tmp; + + if (!ctx) + return; + + /* + * No lock needed in close and there won't + * be any more user ioctls coming our way + */ + list_for_each_entry_safe(entry, tmp, &ctx->submitqueues, node) + msm_submitqueue_put(entry); +} + +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id) +{ + struct msm_drm_private *priv = drm->dev_private; + struct msm_gpu_submitqueue *queue; + + if (!ctx) + return -ENODEV; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + + if (!queue) + return -ENOMEM; + + kref_init(&queue->ref); + queue->flags = flags; + + if (priv->gpu) { + if (prio >= priv->gpu->nr_rings) + return -EINVAL; + + queue->prio = prio; + } + + write_lock(&ctx->queuelock); + + queue->id = ctx->queueid++; + + if (id) + *id = queue->id; + + list_add_tail(&queue->node, &ctx->submitqueues); + + write_unlock(&ctx->queuelock); + + return 0; +} + +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) +{ + struct msm_drm_private *priv = drm->dev_private; + int default_prio; + + if (!ctx) + return 0; + + /* + * Select priority 2 as the "default priority" unless nr_rings is less + * than 2 and then pick the lowest pirority + */ + default_prio = priv->gpu ? + clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0; + + INIT_LIST_HEAD(&ctx->submitqueues); + + rwlock_init(&ctx->queuelock); + + return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL); +} + +int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id) +{ + struct msm_gpu_submitqueue *entry; + + if (!ctx) + return 0; + + /* + * id 0 is the "default" queue and can't be destroyed + * by the user + */ + if (!id) + return -ENOENT; + + write_lock(&ctx->queuelock); + + list_for_each_entry(entry, &ctx->submitqueues, node) { + if (entry->id == id) { + list_del(&entry->node); + write_unlock(&ctx->queuelock); + + msm_submitqueue_put(entry); + return 0; + } + } + + write_unlock(&ctx->queuelock); + return -ENOENT; +} + |