diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 54 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_power.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.c | 136 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/adreno_gpu.h | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_drv.c | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_drv.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_fence.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_fence.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem_submit.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 163 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.h | 42 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_ringbuffer.c | 34 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_ringbuffer.h | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_submitqueue.c | 27 |
18 files changed, 363 insertions, 210 deletions
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 789f7fb86cba..4baef2738178 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -44,7 +44,7 @@ static bool a3xx_idle(struct msm_gpu *gpu); static bool a3xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a3xx_idle(gpu); } @@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) static bool a3xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -446,6 +446,7 @@ static const struct adreno_gpu_funcs funcs = { .recover = a3xx_recover, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -491,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a3xx_registers; adreno_gpu->reg_offsets = a3xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index f87c43124099..8199a4b9f2fa 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) static bool a4xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a4xx_idle(gpu); } @@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) static bool a4xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -534,6 +534,7 @@ static const struct adreno_gpu_funcs funcs = { .recover = a4xx_recover, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -573,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a4xx_registers; adreno_gpu->reg_offsets = a4xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index e34835c3b55d..32252f8ac30c 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -117,7 +117,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; for (i = 0; i < submit->nr_cmds; i++) { @@ -138,15 +138,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); - OUT_RING(ring, lower_32_bits(rbmemptr(gpu, fence))); - OUT_RING(ring, upper_32_bits(rbmemptr(gpu, fence))); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, submit->seqno); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } static const struct { @@ -262,7 +262,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) static int a5xx_me_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT7(ring, CP_ME_INIT, 8); @@ -293,9 +293,8 @@ static int a5xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); - - return a5xx_idle(gpu) ? 0 : -EINVAL; + gpu->funcs->flush(gpu, ring); + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, @@ -581,11 +580,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * ticking correctly */ if (adreno_is_a530(adreno_gpu)) { - OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1); - OUT_RING(gpu->rb, 0x0F); + OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); + OUT_RING(gpu->rb[0], 0x0F); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } @@ -598,11 +597,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) */ ret = a5xx_zap_shader_init(gpu); if (!ret) { - OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1); - OUT_RING(gpu->rb, 0x00000000); + OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); + OUT_RING(gpu->rb[0], 0x00000000); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else { /* Print a warning so if we die, we know why */ @@ -676,18 +675,19 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); } -bool a5xx_idle(struct msm_gpu *gpu) +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { /* wait for CP to drain ringbuffer: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, ring)) return false; if (spin_until(_a5xx_check_idle(gpu))) { - DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n", + DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", gpu->name, __builtin_return_address(0), gpu_read(gpu, REG_A5XX_RBBM_STATUS), - gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS)); - + gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), + gpu_read(gpu, REG_A5XX_CP_RB_RPTR), + gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); return false; } @@ -818,9 +818,10 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); - dev_err(dev->dev, "gpu fault fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", - gpu->memptrs->fence, + dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n", + ring ? ring->id : -1, ring ? ring->seqno : 0, gpu_read(gpu, REG_A5XX_RBBM_STATUS), gpu_read(gpu, REG_A5XX_CP_RB_RPTR), gpu_read(gpu, REG_A5XX_CP_RB_WPTR), @@ -1010,6 +1011,7 @@ static const struct adreno_gpu_funcs funcs = { .recover = a5xx_recover, .submit = a5xx_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -1045,7 +1047,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_gpu->lm_leakage = 0x4E001A; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index e94451685bf8..44db48d86202 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -55,7 +55,7 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } -bool a5xx_idle(struct msm_gpu *gpu); +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index b5de2be67732..e5700bbf09dd 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; if (!a5xx_gpu->gpmu_dwords) return 0; @@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); - if (!a5xx_idle(gpu)) { + if (!a5xx_idle(gpu, ring)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", gpu->name); return -EINVAL; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 5f2501c2cd3f..fd0fb0568dd3 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -21,7 +21,6 @@ #include "msm_gem.h" #include "msm_mmu.h" -#define RB_SIZE SZ_32K #define RB_BLKSIZE 32 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) @@ -163,7 +162,7 @@ static int adreno_load_fw(struct adreno_gpu *adreno_gpu) int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - int ret; + int ret, i; DBG("%s", gpu->name); @@ -171,34 +170,42 @@ int adreno_hw_init(struct msm_gpu *gpu) if (ret) return ret; - ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); - if (ret) { - gpu->rb_iova = 0; - dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); - return ret; - } + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - /* reset ringbuffer: */ - gpu->rb->cur = gpu->rb->start; + if (!ring) + continue; - /* reset completed fence seqno: */ - gpu->memptrs->fence = gpu->fctx->completed_fence; - gpu->memptrs->rptr = 0; + ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova); + if (ret) { + ring->iova = 0; + dev_err(gpu->dev->dev, + "could not map ringbuffer %d: %d\n", i, ret); + return ret; + } + + ring->cur = ring->start; + + /* reset completed fence seqno: */ + ring->memptrs->fence = ring->seqno; + ring->memptrs->rptr = 0; + } /* Setup REG_CP_RB_CNTL: */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - /* size is log2(quad-words): */ - AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); + /* size is log2(quad-words): */ + AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | + AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | + (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); - /* Setup ringbuffer address: */ + /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); + REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); if (!adreno_is_a430(adreno_gpu)) { adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, - REG_ADRENO_CP_RB_RPTR_ADDR_HI, rbmemptr(gpu, rptr)); + REG_ADRENO_CP_RB_RPTR_ADDR_HI, + rbmemptr(gpu->rb[0], rptr)); } return 0; @@ -210,15 +217,19 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) } /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ -static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, + struct msm_ringbuffer *ring) { - struct msm_gpu *gpu = &adreno_gpu->base; - if (adreno_is_a430(adreno_gpu)) - return gpu->memptrs->rptr = adreno_gpu_read( + return ring->memptrs->rptr = adreno_gpu_read( adreno_gpu, REG_ADRENO_CP_RB_RPTR); else - return gpu->memptrs->rptr; + return ring->memptrs->rptr; +} + +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) +{ + return gpu->rb[0]; } void adreno_recover(struct msm_gpu *gpu) @@ -244,7 +255,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned i; for (i = 0; i < submit->nr_cmds; i++) { @@ -267,7 +278,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, submit->seqno); if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { /* Flush HLSQ lazy updates to make sure there is nothing @@ -283,8 +294,8 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(gpu, fence)); - OUT_RING(ring, submit->fence->seqno); + OUT_RING(ring, rbmemptr(ring, fence)); + OUT_RING(ring, submit->seqno); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ OUT_PKT3(ring, CP_INTERRUPT, 1); @@ -310,10 +321,10 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } #endif - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } -void adreno_flush(struct msm_gpu *gpu) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; @@ -323,7 +334,7 @@ void adreno_flush(struct msm_gpu *gpu) * to account for the possibility that the last command fit exactly into * the ringbuffer and rb->next hasn't wrapped to zero yet */ - wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); + wptr = get_wptr(ring) % (MSM_GPU_RINGBUFFER_SZ >> 2); /* ensure writes to ringbuffer have hit system memory: */ mb(); @@ -331,17 +342,18 @@ void adreno_flush(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); } -bool adreno_idle(struct msm_gpu *gpu) +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr = get_wptr(ring); /* wait for CP to drain ringbuffer: */ - if (!spin_until(get_rptr(adreno_gpu) == wptr)) + if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) return true; /* TODO maybe we need to reset GPU here to recover from hang? */ - DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); + DRM_ERROR("%s: timeout waiting to drain ringbuffer %d!\n", gpu->name, + ring->id); return false; } @@ -356,10 +368,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - seq_printf(m, "fence: %d/%d\n", gpu->memptrs->fence, - gpu->fctx->last_fence); - seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); - seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + seq_printf(m, "rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, ring->seqno); + + seq_printf(m, " rptr: %d\n", + get_rptr(adreno_gpu, ring)); + seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); + } /* dump these out in a form that can be parsed by demsm: */ seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); @@ -385,16 +403,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) void adreno_dump_info(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int i; printk("revision: %d (%d.%d.%d.%d)\n", adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - printk("fence: %d/%d\n", gpu->memptrs->fence, - gpu->fctx->last_fence); - printk("rptr: %d\n", get_rptr(adreno_gpu)); - printk("rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + printk("rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, + ring->seqno); + + printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); + printk("rb wptr: %d\n", get_wptr(ring)); + } } /* would be nice to not have to duplicate the _show() stuff with printk(): */ @@ -417,23 +442,26 @@ void adreno_dump(struct msm_gpu *gpu) } } -static uint32_t ring_freewords(struct msm_gpu *gpu) +static uint32_t ring_freewords(struct msm_ringbuffer *ring) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t size = gpu->rb->size / 4; - uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = get_rptr(adreno_gpu); + struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); + uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; + uint32_t wptr = get_wptr(ring); + uint32_t rptr = get_rptr(adreno_gpu, ring); return (rptr + (size - 1) - wptr) % size; } -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) { - if (spin_until(ring_freewords(gpu) >= ndwords)) - DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); + if (spin_until(ring_freewords(ring) >= ndwords)) + DRM_DEV_ERROR(ring->gpu->dev->dev, + "timeout waiting for space in ringubffer %d\n", + ring->id); } int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) + struct adreno_gpu *adreno_gpu, + const struct adreno_gpu_funcs *funcs, int nr_rings) { struct adreno_platform_config *config = pdev->dev.platform_data; struct msm_gpu_config adreno_gpu_config = { 0 }; @@ -460,7 +488,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu_config.va_start = SZ_16M; adreno_gpu_config.va_end = 0xffffffff; - adreno_gpu_config.ringsz = RB_SIZE; + adreno_gpu_config.nr_rings = nr_rings; pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); pm_runtime_use_autosuspend(&pdev->dev); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 1676282948d5..3e9a1743f476 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -208,17 +208,19 @@ int adreno_hw_init(struct msm_gpu *gpu); void adreno_recover(struct msm_gpu *gpu); void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); -void adreno_flush(struct msm_gpu *gpu); -bool adreno_idle(struct msm_gpu *gpu); +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #ifdef CONFIG_DEBUG_FS void adreno_show(struct msm_gpu *gpu, struct seq_file *m); #endif void adreno_dump_info(struct msm_gpu *gpu); void adreno_dump(struct msm_gpu *gpu); -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs); + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); @@ -227,7 +229,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu); static inline void OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -235,14 +237,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct msm_ringbuffer *ring) { - adreno_wait_ring(ring->gpu, 1); + adreno_wait_ring(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } static inline void OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } @@ -264,14 +266,14 @@ static inline u32 PM4_PARITY(u32 val) static inline void OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, PKT4(regindx, cnt)); } static inline void OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 2585bebfcf3e..bfb8c7cf800a 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -507,7 +507,7 @@ static void load_gpu(struct drm_device *dev) mutex_unlock(&init_lock); } -static int context_init(struct drm_file *file) +static int context_init(struct drm_device *dev, struct drm_file *file) { struct msm_file_private *ctx; @@ -515,7 +515,7 @@ static int context_init(struct drm_file *file) if (!ctx) return -ENOMEM; - msm_submitqueue_init(ctx); + msm_submitqueue_init(dev, ctx); file->driver_priv = ctx; @@ -529,7 +529,7 @@ static int msm_open(struct drm_device *dev, struct drm_file *file) */ load_gpu(dev); - return context_init(file); + return context_init(dev, file); } static void context_close(struct msm_file_private *ctx) @@ -743,16 +743,27 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data, struct msm_drm_private *priv = dev->dev_private; struct drm_msm_wait_fence *args = data; ktime_t timeout = to_ktime(args->timeout); + struct msm_gpu_submitqueue *queue; + struct msm_gpu *gpu = priv->gpu; + int ret; if (args->pad) { DRM_ERROR("invalid pad: %08x\n", args->pad); return -EINVAL; } - if (!priv->gpu) + if (!gpu) return 0; - return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true); + queue = msm_submitqueue_get(file->driver_priv, args->queueid); + if (!queue) + return -ENOENT; + + ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout, + true); + + msm_submitqueue_put(queue); + return ret; } static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data, @@ -802,7 +813,7 @@ static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data, if (args->flags & ~MSM_SUBMITQUEUE_FLAGS) return -EINVAL; - return msm_submitqueue_create(file->driver_priv, args->prio, + return msm_submitqueue_create(dev, file->driver_priv, args->prio, args->flags, &args->id); } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index b3b8f20f58db..2821f572ecd8 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -74,6 +74,8 @@ struct msm_vblank_ctrl { spinlock_t lock; }; +#define MSM_GPU_MAX_RINGS 1 + struct msm_drm_private { struct drm_device *dev; @@ -318,11 +320,11 @@ void msm_writel(u32 data, void __iomem *addr); u32 msm_readl(const void __iomem *addr); struct msm_gpu_submitqueue; -int msm_submitqueue_init(struct msm_file_private *ctx); +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, u32 id); -int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, - u32 flags, u32 *id); +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id); int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); void msm_submitqueue_close(struct msm_file_private *ctx); diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index a2f89bac9c16..349c12f670eb 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -31,7 +31,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) return ERR_PTR(-ENOMEM); fctx->dev = dev; - fctx->name = name; + strncpy(fctx->name, name, sizeof(fctx->name)); fctx->context = dma_fence_context_alloc(1); init_waitqueue_head(&fctx->event); spin_lock_init(&fctx->spinlock); diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 56061aa1959d..1aa6a4c6530c 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -22,7 +22,7 @@ struct msm_fence_context { struct drm_device *dev; - const char *name; + char name[32]; unsigned context; /* last_fence == completed_fence --> no pending work */ uint32_t last_fence; /* last assigned fence */ diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 17f8a6c23464..9320e184b48d 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -138,13 +138,15 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); struct msm_gem_submit { struct drm_device *dev; struct msm_gpu *gpu; - struct list_head node; /* node in gpu submit_list */ + struct list_head node; /* node in ring submit list */ struct list_head bo_list; struct ww_acquire_ctx ticket; + uint32_t seqno; /* Sequence number of the submit on the ring */ struct dma_fence *fence; struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool valid; /* true if no cmdstream patching needed */ + struct msm_ringbuffer *ring; unsigned int nr_cmds; unsigned int nr_bos; struct { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index c196cc615e77..d3551aa130fb 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -51,6 +51,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->pid = get_pid(task_pid(current)); submit->cmd = (void *)&submit->bos[nr_bos]; submit->queue = queue; + submit->ring = gpu->rb[queue->prio]; /* initially, until copy_from_user() and bo lookup succeeds: */ submit->nr_bos = 0; @@ -247,7 +248,8 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) if (no_implicit) continue; - ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write); + ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, + write); if (ret) break; } @@ -410,6 +412,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; struct msm_gpu_submitqueue *queue; + struct msm_ringbuffer *ring; int out_fence_fd = -1; unsigned i; int ret; @@ -430,6 +433,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (!queue) return -ENOENT; + ring = gpu->rb[queue->prio]; + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { in_fence = sync_file_get_fence(args->fence_fd); @@ -440,7 +445,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, * Wait if the fence is from a foreign context, or if the fence * array contains any fence from a foreign context. */ - if (!dma_fence_match_context(in_fence, gpu->fctx->context)) { + if (!dma_fence_match_context(in_fence, ring->fctx->context)) { ret = dma_fence_wait(in_fence, true); if (ret) return ret; @@ -543,8 +548,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; - submit->fence = msm_fence_alloc(gpu->fctx); - + submit->fence = msm_fence_alloc(ring->fctx); if (IS_ERR(submit->fence)) { ret = PTR_ERR(submit->fence); submit->fence = NULL; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0744837ed70f..ec28c99ee36e 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -221,6 +221,20 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ +static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) +{ + struct msm_gem_submit *submit; + + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno > fence) + break; + + msm_update_fence(submit->ring->fctx, + submit->fence->seqno); + } +} + static void retire_submits(struct msm_gpu *gpu); static void recover_worker(struct work_struct *work) @@ -228,15 +242,34 @@ static void recover_worker(struct work_struct *work) struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; struct msm_gem_submit *submit; - uint32_t fence = gpu->memptrs->fence; + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + uint64_t fence; + int i; + + /* Update all the rings with the latest and greatest fence */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - msm_update_fence(gpu->fctx, fence + 1); + fence = ring->memptrs->fence; + + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence = fence + 1; + + update_fences(gpu, ring, fence); + } mutex_lock(&dev->struct_mutex); + dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - list_for_each_entry(submit, &gpu->submit_list, node) { - if (submit->fence->seqno == (fence + 1)) { + fence = cur_ring->memptrs->fence + 1; + + list_for_each_entry(submit, &cur_ring->submits, node) { + if (submit->seqno == fence) { struct task_struct *task; rcu_read_lock(); @@ -258,9 +291,16 @@ static void recover_worker(struct work_struct *work) gpu->funcs->recover(gpu); pm_runtime_put_sync(&gpu->pdev->dev); - /* replay the remaining submits after the one that hung: */ - list_for_each_entry(submit, &gpu->submit_list, node) { - gpu->funcs->submit(gpu, submit, NULL); + /* + * Replay all remaining submits starting with highest priority + * ring + */ + + for (i = gpu->nr_rings - 1; i >= 0; i--) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + list_for_each_entry(submit, &ring->submits, node) + gpu->funcs->submit(gpu, submit, NULL); } } @@ -281,25 +321,27 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->memptrs->fence; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = ring->memptrs->fence; - if (fence != gpu->hangcheck_fence) { + if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->fctx->last_fence) { + ring->hangcheck_fence = fence; + } else if (fence < ring->seqno) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + ring->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->fctx->last_fence); + gpu->name, ring->seqno); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->fctx->last_fence > gpu->hangcheck_fence) + if (ring->seqno > ring->hangcheck_fence) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -428,19 +470,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; + int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; - - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); + /* Retire the commits starting with highest priority */ + for (i = gpu->nr_rings - 1; i >= 0; i--) { + struct msm_ringbuffer *ring = gpu->rb[i]; - if (dma_fence_is_signaled(submit->fence)) { - retire_submit(gpu, submit); - } else { - break; + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + if (dma_fence_is_signaled(submit->fence)) + retire_submit(gpu, submit); } } } @@ -449,9 +490,10 @@ static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->memptrs->fence; + int i; - msm_update_fence(gpu->fctx, fence); + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); mutex_lock(&dev->struct_mutex); retire_submits(gpu); @@ -472,6 +514,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -480,7 +523,9 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gpu_hw_init(gpu); - list_add_tail(&submit->node, &gpu->submit_list); + submit->seqno = ++ring->seqno; + + list_add_tail(&submit->node, &ring->submits); msm_rd_dump_submit(submit); @@ -605,7 +650,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { - int ret; + int i, ret, nr_rings = config->nr_rings; + void *memptrs; + uint64_t memptrs_iova; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); @@ -613,18 +660,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; - gpu->fctx = msm_fence_context_alloc(drm, name); - if (IS_ERR(gpu->fctx)) { - ret = PTR_ERR(gpu->fctx); - gpu->fctx = NULL; - goto fail; - } INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->recover_work, recover_worker); - INIT_LIST_HEAD(&gpu->submit_list); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); @@ -689,29 +729,47 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, goto fail; } - gpu->memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), + memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, - &gpu->memptrs_iova); + &memptrs_iova); - if (IS_ERR(gpu->memptrs)) { - ret = PTR_ERR(gpu->memptrs); - gpu->memptrs = NULL; + if (IS_ERR(memptrs)) { + ret = PTR_ERR(memptrs); dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); goto fail; } - /* Create ringbuffer: */ - gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); - goto fail; + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + DRM_DEV_INFO_ONCE(drm->dev, "Only creating %lu ringbuffers\n", + ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); } + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + + memptrs += sizeof(struct msm_rbmemptrs); + memptrs_iova += sizeof(struct msm_rbmemptrs); + } + + gpu->nr_rings = nr_rings; + return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + if (gpu->memptrs_bo) { msm_gem_put_vaddr(gpu->memptrs_bo); msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); @@ -724,16 +782,17 @@ fail: void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; } if (gpu->memptrs_bo) { diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 8ddda059de34..1be0317bb2c0 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -33,7 +33,7 @@ struct msm_gpu_config { const char *irqname; uint64_t va_start; uint64_t va_end; - unsigned int ringsz; + unsigned int nr_rings; }; /* So far, with hardware that I've seen to date, we can have: @@ -57,8 +57,9 @@ struct msm_gpu_funcs { int (*pm_resume)(struct msm_gpu *gpu); void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); - void (*flush)(struct msm_gpu *gpu); + void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); irqreturn_t (*irq)(struct msm_gpu *irq); + struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS @@ -67,14 +68,6 @@ struct msm_gpu_funcs { #endif }; -#define rbmemptr(gpu, member) \ - ((gpu)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) - -struct msm_rbmemptrs { - volatile uint32_t rptr; - volatile uint32_t fence; -}; - struct msm_gpu { const char *name; struct drm_device *dev; @@ -93,16 +86,12 @@ struct msm_gpu { const struct msm_gpu_perfcntr *perfcntrs; uint32_t num_perfcntrs; - /* ringbuffer: */ - struct msm_ringbuffer *rb; - uint64_t rb_iova; + struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; + int nr_rings; /* list of GEM active objects: */ struct list_head active_list; - /* fencing: */ - struct msm_fence_context *fctx; - /* does gpu need hw_init? */ bool needs_hw_init; @@ -133,21 +122,26 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; - uint32_t hangcheck_fence; struct work_struct recover_work; - struct list_head submit_list; - - struct msm_rbmemptrs *memptrs; struct drm_gem_object *memptrs_bo; - uint64_t memptrs_iova; - - }; +/* It turns out that all targets use the same ringbuffer size */ +#define MSM_GPU_RINGBUFFER_SZ SZ_32K + static inline bool msm_gpu_active(struct msm_gpu *gpu) { - return gpu->fctx->last_fence > gpu->memptrs->fence; + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (ring->seqno > ring->memptrs->fence) + return true; + } + + return false; } /* Perf-Counters: diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index bf065a540130..4db6ca719706 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -18,13 +18,15 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova) { struct msm_ringbuffer *ring; + char name[32]; int ret; - if (WARN_ON(!is_power_of_2(size))) - return ERR_PTR(-EINVAL); + /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */ + BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { @@ -33,32 +35,44 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->gpu = gpu; - + ring->id = id; /* Pass NULL for the iova pointer - we will map it later */ - ring->start = msm_gem_kernel_new(gpu->dev, size, MSM_BO_WC, - gpu->aspace, &ring->bo, NULL); + ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, + MSM_BO_WC, gpu->aspace, &ring->bo, NULL); if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); ring->start = 0; goto fail; } - ring->end = ring->start + (size / 4); + ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2); ring->cur = ring->start; - ring->size = size; + ring->memptrs = memptrs; + ring->memptrs_iova = memptrs_iova; + + INIT_LIST_HEAD(&ring->submits); + + snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); + + ring->fctx = msm_fence_context_alloc(gpu->dev, name); return ring; fail: - if (ring) - msm_ringbuffer_destroy(ring); + msm_ringbuffer_destroy(ring); return ERR_PTR(ret); } void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) { + if (IS_ERR_OR_NULL(ring)) + return; + + msm_fence_context_free(ring->fctx); + if (ring->bo) { + msm_gem_put_iova(ring->bo, ring->gpu->aspace); msm_gem_put_vaddr(ring->bo); drm_gem_object_unreference_unlocked(ring->bo); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 6e0e1049fa4f..ec44251ef9f2 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -20,14 +20,30 @@ #include "msm_drv.h" +#define rbmemptr(ring, member) \ + ((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) + +struct msm_rbmemptrs { + volatile uint32_t rptr; + volatile uint32_t fence; +}; + struct msm_ringbuffer { struct msm_gpu *gpu; - int size; + int id; struct drm_gem_object *bo; uint32_t *start, *end, *cur; + struct list_head submits; + uint64_t iova; + uint32_t seqno; + uint32_t hangcheck_fence; + struct msm_rbmemptrs *memptrs; + uint64_t memptrs_iova; + struct msm_fence_context *fctx; }; -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova); void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); /* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c index 593c3b5f44cd..5115f75b5b7f 100644 --- a/drivers/gpu/drm/msm/msm_submitqueue.c +++ b/drivers/gpu/drm/msm/msm_submitqueue.c @@ -60,9 +60,10 @@ void msm_submitqueue_close(struct msm_file_private *ctx) msm_submitqueue_put(entry); } -int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, - u32 *id) +int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, + u32 prio, u32 flags, u32 *id) { + struct msm_drm_private *priv = drm->dev_private; struct msm_gpu_submitqueue *queue; if (!ctx) @@ -75,7 +76,13 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, kref_init(&queue->ref); queue->flags = flags; - queue->prio = prio; + + if (priv->gpu) { + if (prio >= priv->gpu->nr_rings) + return -EINVAL; + + queue->prio = prio; + } write_lock(&ctx->queuelock); @@ -91,16 +98,26 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags, return 0; } -int msm_submitqueue_init(struct msm_file_private *ctx) +int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) { + struct msm_drm_private *priv = drm->dev_private; + int default_prio; + if (!ctx) return 0; + /* + * Select priority 2 as the "default priority" unless nr_rings is less + * than 2 and then pick the lowest pirority + */ + default_prio = priv->gpu ? + clamp_t(uint32_t, 2, 0, priv->gpu->nr_rings - 1) : 0; + INIT_LIST_HEAD(&ctx->submitqueues); rwlock_init(&ctx->queuelock); - return msm_submitqueue_create(ctx, 2, 0, NULL); + return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL); } int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id) |