diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 278 |
1 files changed, 213 insertions, 65 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index b61d1ba1aa9d..10c2d0839e26 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -34,6 +34,8 @@ #include "vcn/vcn_3_0_0_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" +#include <drm/drm_drv.h> + #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f #define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 @@ -50,6 +52,9 @@ #define VCN_INSTANCES_SIENNA_CICHLID 2 #define DEC_SW_RING_ENABLED FALSE +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -88,6 +93,11 @@ static int vcn_v3_0_early_init(void *handle) adev->vcn.harvest_config = 0; adev->vcn.num_enc_rings = 1; + if (adev->asic_type == CHIP_BEIGE_GOBY) { + adev->vcn.num_vcn_inst = 1; + adev->vcn.num_enc_rings = 0; + } + } else { if (adev->asic_type == CHIP_SIENNA_CICHLID) { u32 harvest; @@ -107,7 +117,10 @@ static int vcn_v3_0_early_init(void *handle) } else adev->vcn.num_vcn_inst = 1; - adev->vcn.num_enc_rings = 2; + if (adev->asic_type == CHIP_BEIGE_GOBY) + adev->vcn.num_enc_rings = 0; + else + adev->vcn.num_enc_rings = 2; } vcn_v3_0_set_dec_ring_funcs(adev); @@ -171,6 +184,7 @@ static int vcn_v3_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_fw_shared *fw_shared; + if (adev->vcn.harvest_config & (1 << i)) continue; @@ -198,6 +212,8 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; + atomic_set(&adev->vcn.inst[i].sched_score, 0); + ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { @@ -205,11 +221,10 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } - if (adev->asic_type == CHIP_SIENNA_CICHLID && i != 0) - ring->no_scheduler = true; sprintf(ring->name, "vcn_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, - AMDGPU_RING_PRIO_DEFAULT); + AMDGPU_RING_PRIO_DEFAULT, + &adev->vcn.inst[i].sched_score); if (r) return r; @@ -227,11 +242,10 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } - if (adev->asic_type == CHIP_SIENNA_CICHLID && i != 1) - ring->no_scheduler = true; sprintf(ring->name, "vcn_enc_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, - AMDGPU_RING_PRIO_DEFAULT); + AMDGPU_RING_PRIO_DEFAULT, + &adev->vcn.inst[i].sched_score); if (r) return r; } @@ -264,16 +278,20 @@ static int vcn_v3_0_sw_init(void *handle) static int vcn_v3_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, r; + int i, r, idx; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_fw_shared *fw_shared; + if (drm_dev_enter(&adev->ddev, &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) - continue; - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; - fw_shared->present_flag_0 = 0; - fw_shared->sw_ring.is_enabled = false; + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sw_ring.is_enabled = false; + } + + drm_dev_exit(idx); } if (amdgpu_sriov_vf(adev)) @@ -369,7 +387,7 @@ static int vcn_v3_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, j; + int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -384,12 +402,6 @@ static int vcn_v3_0_hw_fini(void *handle) vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); } } - ring->sched.ready = false; - - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; - ring->sched.ready = false; - } } return 0; @@ -585,6 +597,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( + UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) @@ -1253,23 +1269,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) fw_shared->rb.wptr = lower_32_bits(ring->wptr); fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); - fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - - fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[i].ring_enc[1]; - WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); - fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + if (adev->asic_type != CHIP_BEIGE_GOBY) { + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + } } return 0; @@ -1649,31 +1667,33 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); - /* Restore */ - fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; - fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - ring->wptr = 0; - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - - fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[inst_idx].ring_enc[1]; - ring->wptr = 0; - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - - /* restore wptr/rptr with pointers saved in FW shared memory*/ - WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr); - WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr); + if (adev->asic_type != CHIP_BEIGE_GOBY) { + /* Restore */ + fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + ring->wptr = 0; + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + ring->wptr = 0; + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + /* restore wptr/rptr with pointers saved in FW shared memory*/ + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr); + WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr); + } /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), @@ -1844,6 +1864,132 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) +{ + struct drm_gpu_scheduler **scheds; + + /* The create msg must be in the first IB submitted */ + if (atomic_read(&p->entity->fence_seq)) + return -EINVAL; + + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] + [AMDGPU_RING_PRIO_DEFAULT].sched; + drm_sched_entity_modify_sched(p->entity, scheds, 1); + return 0; +} + +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + uint32_t *msg, num_buffers; + struct amdgpu_bo *bo; + uint64_t start, end; + unsigned int i; + void * ptr; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); + return r; + } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; + + /* Check length */ + if (msg[1] > end - addr) { + r = -EINVAL; + goto out; + } + + if (msg[3] != RDECODE_MSG_CREATE) + goto out; + + num_buffers = msg[2]; + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { + uint32_t offset, size, *create; + + if (msg[0] != RDECODE_MESSAGE_CREATE) + continue; + + offset = msg[1]; + size = msg[2]; + + if (offset + size > end) { + r = -EINVAL; + goto out; + } + + create = ptr + addr + offset - start; + + /* H246, HEVC and VP9 can run on any instance */ + if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) + continue; + + r = vcn_v3_0_limit_sched(p); + if (r) + goto out; + } + +out: + amdgpu_bo_kunmap(bo); + return r; +} + +static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + uint32_t ib_idx) +{ + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; + uint32_t msg_lo = 0, msg_hi = 0; + unsigned i; + int r; + + /* The first instance can decode anything */ + if (!ring->me) + return 0; + + for (i = 0; i < ib->length_dw; i += 2) { + uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); + uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1); + + if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { + msg_lo = val; + } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) { + msg_hi = val; + } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && + val == 0) { + r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); + if (r) + return r; + } + } + return 0; +} + static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, @@ -1851,6 +1997,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, .set_wptr = vcn_v3_0_dec_ring_set_wptr, + .patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -2003,7 +2150,8 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs; adev->vcn.inst[i].ring_enc[j].me = i; } - DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); + if (adev->vcn.num_enc_rings > 0) + DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); } } |