summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c215
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c80
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c212
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c345
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c229
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm124
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c104
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c708
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c136
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c299
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c267
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.c160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c1118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h491
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm202
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm1126
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm58
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c138
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c64
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c41
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c31
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c25
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c5
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c223
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h16
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c564
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h55
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c136
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c19
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c59
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c69
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h5
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c35
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c140
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c243
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h41
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c311
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c141
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c38
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c58
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_surface.c53
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dp_types.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_dsc.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h179
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_plane.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_spl_translate.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_stream.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h35
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c201
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c124
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c34
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c80
-rw-r--r--drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h18
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_helpers.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c132
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h109
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c162
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c209
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h401
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h52
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h77
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c42
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c1412
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h135
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c225
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c59
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c428
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c307
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c1178
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h (renamed from drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h)19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c549
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h46
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h58
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c49
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c45
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c13
-rw-r--r--drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c21
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c131
-rw-r--r--drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h64
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c257
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h17
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c53
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c841
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h59
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h6
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/optc.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/link.h4
-rw-r--r--drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c61
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_detection.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.c50
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_factory.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.c179
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_validation.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c36
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c18
-rw-r--r--drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c120
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h19
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c101
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c43
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c11
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c62
-rw-r--r--drivers/gpu/drm/amd/display/dc/spl/dc_spl.c177
-rw-r--r--drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h16
-rw-r--r--drivers/gpu/drm/amd/display/dmub/dmub_srv.h3
-rw-r--r--drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h219
-rw-r--r--drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c4
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c13
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.c9
-rw-r--r--drivers/gpu/drm/amd/display/modules/power/power_helpers.h3
-rw-r--r--drivers/gpu/drm/amd/include/amd_shared.h9
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h)4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h (renamed from drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h)4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h29
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h37
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h17
-rw-r--r--drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h47
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h5
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm.c108
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c6
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h4
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c8
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c7
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c8
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c2
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c267
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h25
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h14
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c167
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c170
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c191
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c41
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c43
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c31
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c187
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c64
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c141
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c38
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c167
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c33
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h6
424 files changed, 16669 insertions, 6499 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 41fa3377d9cf..1a11cab741ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,6 +26,7 @@ config DRM_AMDGPU
select DRM_BUDDY
select DRM_SUBALLOC_HELPER
select DRM_EXEC
+ select DRM_PANEL_BACKLIGHT_QUIRKS
# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
# ACPI_VIDEO's dependencies must also be selected.
select INPUT if ACPI
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index c7b18c52825d..5b21674b07fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright 2017 Advanced Micro Devices, Inc.
+# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -105,7 +105,7 @@ amdgpu-y += \
# add UMC block
amdgpu-y += \
- umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o
+ umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o
# add IH block
amdgpu-y += \
@@ -200,6 +200,7 @@ amdgpu-y += \
vcn_v4_0_3.o \
vcn_v4_0_5.o \
vcn_v5_0_0.o \
+ vcn_v5_0_1.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
@@ -208,7 +209,8 @@ amdgpu-y += \
jpeg_v4_0.o \
jpeg_v4_0_3.o \
jpeg_v4_0_5.o \
- jpeg_v5_0_0.o
+ jpeg_v5_0_0.o \
+ jpeg_v5_0_1.o
# add VPE block
amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index f44de9d4b6a1..e13fbd974141 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -334,6 +334,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
AMDGPU_INIT_LEVEL_RESET_RECOVERY);
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
+ /*TBD: Ideally should clear only GFX, SDMA blocks*/
+ amdgpu_ras_clear_err_state(tmp_adev);
r = aldebaran_mode2_restore_ip(tmp_adev);
if (r)
goto end;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4653a8d2823a..69895fccb474 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -880,6 +880,7 @@ struct amdgpu_device {
bool need_swiotlb;
bool accel_working;
struct notifier_block acpi_nb;
+ struct notifier_block pm_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct debugfs_blob_wrapper debugfs_vbios_blob;
struct debugfs_blob_wrapper debugfs_discovery_blob;
@@ -1174,7 +1175,6 @@ struct amdgpu_device {
struct work_struct reset_work;
- bool job_hang;
bool dc_enabled;
/* Mask of active clusters */
uint32_t aid_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 5ef6b745f222..f3289d289913 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -71,6 +71,11 @@ struct ras_query_context;
#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */
+#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
+#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
+#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
+
enum aca_reg_idx {
ACA_REG_IDX_CTL = 0,
ACA_REG_IDX_STATUS = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index ec5e0dcf8613..deb0785350e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -140,7 +140,7 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
@@ -157,7 +157,7 @@ static int acp_poweron(struct generic_pm_domain *genpd)
* 2. turn on acp clock
* 3. power on acp tiles
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -236,7 +236,7 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block)
ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
} else if (r) {
return r;
@@ -508,7 +508,7 @@ static int acp_hw_fini(struct amdgpu_ip_block *ip_block)
/* return early if no ACP */
if (!adev->acp.acp_genpd) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -565,7 +565,7 @@ static int acp_suspend(struct amdgpu_ip_block *ip_block)
/* power up on suspend */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -575,7 +575,7 @@ static int acp_resume(struct amdgpu_ip_block *ip_block)
/* power down again on resume */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
@@ -584,19 +584,19 @@ static bool acp_is_idle(void *handle)
return true;
}
-static int acp_set_clockgating_state(void *handle,
+static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int acp_set_powergating_state(void *handle,
+static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3afcd1e8aa54..2c1b38c5cfc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -368,7 +368,7 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
{
struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
- amdgpu_bo_reserve(*bo, true);
+ (void)amdgpu_bo_reserve(*bo, true);
amdgpu_bo_kunmap(*bo);
amdgpu_bo_unpin(*bo);
amdgpu_bo_unreserve(*bo);
@@ -715,8 +715,9 @@ err:
void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
{
enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
- if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
- ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) {
+ if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+ ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) ||
+ (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) {
pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
amdgpu_gfx_off_ctrl(adev, idle);
} else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
@@ -724,7 +725,9 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
/* Disable GFXOFF and PG. Temporary workaround
* to fix some compute applications issue on GFX9.
*/
- adev->ip_blocks[AMD_IP_BLOCK_TYPE_GFX].version->funcs->set_powergating_state((void *)adev, state);
+ struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (gfx_block != NULL)
+ gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
}
amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_COMPUTE,
@@ -834,7 +837,7 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
- if (!kiq_ring->sched.ready || adev->job_hang)
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
return 0;
ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4b80ad860639..8af67f18500a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -433,6 +433,9 @@ void kgd2kfd_unlock_kfd(void);
int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault);
+
#else
static inline int kgd2kfd_init(void)
{
@@ -518,5 +521,12 @@ static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
{
return false;
}
+
+static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ return false;
+}
+
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index cc66ebb7bae1..441568163e20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -1131,6 +1131,9 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
uint32_t low, high;
uint64_t queue_addr = 0;
+ if (!amdgpu_gpu_recovery)
+ return 0;
+
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
@@ -1179,6 +1182,9 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
uint32_t low, high, pipe_reset_data = 0;
uint64_t queue_addr = 0;
+ if (!amdgpu_gpu_recovery)
+ return 0;
+
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f30548f4c3b3..1e998f972c30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -730,7 +730,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
return;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
@@ -779,7 +779,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
}
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -989,7 +989,7 @@ unwind:
if (!attachment[i])
continue;
if (attachment[i]->bo_va) {
- amdgpu_bo_reserve(bo[i], true);
+ (void)amdgpu_bo_reserve(bo[i], true);
if (--attachment[i]->bo_va->ref_count == 0)
amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
@@ -1259,11 +1259,11 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
return -EBUSY;
}
- amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+ (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
- amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+ (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ (void)amdgpu_sync_fence(sync, bo_va->last_pt_update);
return 0;
}
@@ -2352,7 +2352,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{
struct amdgpu_bo *bo = mem->bo;
- amdgpu_bo_reserve(bo, true);
+ (void)amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 45affc02548c..423fd2eebe1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -47,35 +47,37 @@
/* Check if current bios is an ATOM BIOS.
* Return true if it is ATOM BIOS. Otherwise, return false.
*/
-static bool check_atom_bios(uint8_t *bios, size_t size)
+static bool check_atom_bios(struct amdgpu_device *adev, size_t size)
{
uint16_t tmp, bios_header_start;
+ uint8_t *bios = adev->bios;
if (!bios || size < 0x49) {
- DRM_INFO("vbios mem is null or mem size is wrong\n");
+ dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n");
return false;
}
if (!AMD_IS_VALID_VBIOS(bios)) {
- DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]);
+ dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0],
+ bios[1]);
return false;
}
bios_header_start = bios[0x48] | (bios[0x49] << 8);
if (!bios_header_start) {
- DRM_INFO("Can't locate bios header\n");
+ dev_dbg(adev->dev, "Can't locate VBIOS header\n");
return false;
}
tmp = bios_header_start + 4;
if (size < tmp) {
- DRM_INFO("BIOS header is broken\n");
+ dev_dbg(adev->dev, "VBIOS header is broken\n");
return false;
}
if (!memcmp(bios + tmp, "ATOM", 4) ||
!memcmp(bios + tmp, "MOTA", 4)) {
- DRM_DEBUG("ATOMBIOS detected\n");
+ dev_dbg(adev->dev, "ATOMBIOS detected\n");
return true;
}
@@ -118,7 +120,7 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, size);
iounmap(bios);
- if (!check_atom_bios(adev->bios, size)) {
+ if (!check_atom_bios(adev, size)) {
kfree(adev->bios);
return false;
}
@@ -146,7 +148,7 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, size);
pci_unmap_rom(adev->pdev, bios);
- if (!check_atom_bios(adev->bios, size)) {
+ if (!check_atom_bios(adev, size)) {
kfree(adev->bios);
return false;
}
@@ -186,7 +188,7 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
/* read complete BIOS */
amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
- if (!check_atom_bios(adev->bios, len)) {
+ if (!check_atom_bios(adev, len)) {
kfree(adev->bios);
return false;
}
@@ -216,7 +218,7 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, romlen);
iounmap(bios);
- if (!check_atom_bios(adev->bios, romlen))
+ if (!check_atom_bios(adev, romlen))
goto free_bios;
adev->bios_size = romlen;
@@ -324,7 +326,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
break;
}
- if (!check_atom_bios(adev->bios, size)) {
+ if (!check_atom_bios(adev, size)) {
kfree(adev->bios);
return false;
}
@@ -389,7 +391,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
vhdr->ImageLength,
GFP_KERNEL);
- if (!check_atom_bios(adev->bios, vhdr->ImageLength)) {
+ if (!check_atom_bios(adev, vhdr->ImageLength)) {
kfree(adev->bios);
return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 16153d275d7a..68bce6a6d09d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -414,7 +414,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw,
+ AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (err) {
DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
amdgpu_ucode_release(&adev->pm.fw);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d891ab779ca7..5cc5f59e3018 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1105,7 +1105,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
* We can't use gang submit on with reserved VMIDs when the VM changes
* can't be invalidated by more than one engine at the same time.
*/
- if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) {
+ if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
for (i = 0; i < p->gang_size; ++i) {
struct drm_sched_entity *entity = p->entities[i];
struct drm_gpu_scheduler *sched = entity->rq->sched;
@@ -1189,7 +1189,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (!bo)
continue;
- amdgpu_vm_bo_invalidate(adev, bo, false);
+ amdgpu_vm_bo_invalidate(bo, false);
}
}
@@ -1801,13 +1801,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
return -EINVAL;
+ /* Make sure VRAM is allocated contigiously */
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
- for (i = 0; i < (*bo)->placement.num_placement; i++)
- (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
- r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
- if (r)
- return r;
+ if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
+ !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+
+ amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
+ for (i = 0; i < (*bo)->placement.num_placement; i++)
+ (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
+ r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
+ if (r)
+ return r;
+ }
return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index a68338cb7b4a..49ca8c814455 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -2095,6 +2095,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog)
amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm);
+ amdgpu_debugfs_vcn_sched_mask_init(adev);
amdgpu_debugfs_jpeg_sched_mask_init(adev);
amdgpu_debugfs_gfx_sched_mask_init(adev);
amdgpu_debugfs_compute_sched_mask_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 946c48829f19..824f9da5b6ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -343,11 +343,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
coredump->skip_vram_check = skip_vram_check;
coredump->reset_vram_lost = vram_lost;
- if (job && job->vm) {
- struct amdgpu_vm *vm = job->vm;
+ if (job && job->pasid) {
struct amdgpu_task_info *ti;
- ti = amdgpu_vm_get_task_info_vm(vm);
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
if (ti) {
coredump->reset_task_info = *ti;
amdgpu_vm_put_task_info(ti);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9095c05e0269..36053b3d48b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = {
"LAST",
};
-#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0)
+#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
/*
* Default init level where all blocks are expected to be initialized. This is
* the level of initialization expected by default and also after a full reset
@@ -199,14 +199,16 @@ void amdgpu_set_init_level(struct amdgpu_device *adev,
}
static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data);
/**
* DOC: pcie_replay_count
*
* The amdgpu driver provides a sysfs API for reporting the total number
- * of PCIe replays (NAKs)
+ * of PCIe replays (NAKs).
* The file pcie_replay_count is used for this and returns the total
- * number of replays as a sum of the NAKs generated and NAKs received
+ * number of replays as a sum of the NAKs generated and NAKs received.
*/
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
@@ -417,6 +419,9 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+ return false;
+
if (adev->has_pr3 ||
((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
return true;
@@ -429,8 +434,8 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
* @dev: drm_device pointer
*
* Return:
- * 1 if the device supporte BACO;
- * 3 if the device support MACO (only works if BACO is supported)
+ * 1 if the device supports BACO;
+ * 3 if the device supports MACO (only works if BACO is supported)
* otherwise return 0.
*/
int amdgpu_device_supports_baco(struct drm_device *dev)
@@ -577,7 +582,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
}
/**
- * amdgpu_device_aper_access - access vram by vram aperature
+ * amdgpu_device_aper_access - access vram by vram aperture
*
* @adev: amdgpu_device pointer
* @pos: offset of the buffer in vram
@@ -668,7 +673,7 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
* here is that the GPU reset is not running on another thread in parallel.
*
* For this we trylock the read side of the reset semaphore, if that succeeds
- * we know that the reset is not running in paralell.
+ * we know that the reset is not running in parallel.
*
* If the trylock fails we assert that we are either already holding the read
* side of the lock or are the reset thread itself and hold the write side of
@@ -1399,6 +1404,7 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
amdgpu_psp_wait_for_bootloader(adev);
ret = amdgpu_atomfirmware_asic_init(adev, true);
@@ -1733,7 +1739,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
uint32_t fw_ver;
err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
- /* force vPost if error occured */
+ /* force vPost if error occurred */
if (err)
return true;
@@ -2165,7 +2171,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -2199,7 +2205,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_powergating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -2378,7 +2384,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
* the module parameter virtual_display. This feature provides a virtual
* display hardware on headless boards or in virtualized environments.
* This function parses and validates the configuration string specified by
- * the user and configues the virtual display configuration (number of
+ * the user and configures the virtual display configuration (number of
* virtual connectors, crtcs, etc.) specified.
*/
static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
@@ -2441,7 +2447,7 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
*
* Parses the asic configuration parameters specified in the gpu info
- * firmware and makes them availale to the driver for use in configuring
+ * firmware and makes them available to the driver for use in configuring
* the asic.
* Returns 0 on success, -EINVAL on failure.
*/
@@ -2482,6 +2488,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_gpu_info.bin", chip_name);
if (err) {
dev_err(adev->dev,
@@ -2501,7 +2508,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
/*
- * Should be droped when DAL no longer needs it.
+ * Should be dropped when DAL no longer needs it.
*/
if (adev->asic_type == CHIP_NAVI12)
goto parse_soc_bounding_box;
@@ -3061,7 +3068,7 @@ init_failed:
*
* Writes a reset magic value to the gart pointer in VRAM. The driver calls
* this function before a GPU reset. If the value is retained after a
- * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
+ * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
*/
static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
{
@@ -3137,7 +3144,7 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
state);
if (r) {
DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
@@ -3174,7 +3181,7 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
- r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
state);
if (r) {
DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
@@ -3376,7 +3383,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
amdgpu_amdkfd_suspend(adev, false);
- /* Workaroud for ASICs need to disable SMC first */
+ /* Workaround for ASICs need to disable SMC first */
amdgpu_device_smu_fini_early(adev);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
@@ -3478,7 +3485,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
WARN_ON_ONCE(adev->gfx.gfx_off_state);
WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
adev->gfx.gfx_off_state = true;
}
@@ -3670,9 +3677,11 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
continue;
r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
- DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-early: %s failed\n",
+ block->version->funcs->name);
return r;
+ }
block->status.hw = true;
}
}
@@ -3682,7 +3691,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
{
- int i, r;
+ struct amdgpu_ip_block *block;
+ int i, r = 0;
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_SMC,
@@ -3697,34 +3707,28 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
- int j;
- struct amdgpu_ip_block *block;
+ block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
- for (j = 0; j < adev->num_ip_blocks; j++) {
- block = &adev->ip_blocks[j];
-
- if (block->version->type != ip_order[i] ||
- !block->status.valid ||
- block->status.hw)
- continue;
+ if (!block)
+ continue;
+ if (block->status.valid && !block->status.hw) {
if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
- r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
- if (r)
- return r;
+ r = amdgpu_ip_block_resume(block);
} else {
- r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
- if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
- block->status.hw = true;
+ r = block->version->funcs->hw_init(block);
}
+
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-late: %s failed\n",
+ block->version->funcs->name);
+ break;
+ }
+ block->status.hw = true;
}
}
- return 0;
+ return r;
}
/**
@@ -3765,7 +3769,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
- * First resume function for hardware IPs. The list of all the hardware
+ * Second resume function for hardware IPs. The list of all the hardware
* IPs that make up the asic is walked and the resume callbacks are run for
* all blocks except COMMON, GMC, and IH. resume puts the hardware into a
* functional state after a suspend and updates the software state as
@@ -3783,6 +3787,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
@@ -3794,6 +3799,36 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
}
/**
+ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Third resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all DCE. resume puts the hardware into a functional state after a suspend
+ * and updates the software state as necessary. This function is also used
+ * for restoring the GPU after a GPU reset.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
* amdgpu_device_ip_resume - run resume for hardware IPs
*
* @adev: amdgpu_device pointer
@@ -3822,6 +3857,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
if (adev->mman.buffer_funcs_ring->sched.ready)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ if (r)
+ return r;
+
+ amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_device_ip_resume_phase3(adev);
+
return r;
}
@@ -4271,7 +4313,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/*
* Reset domain needs to be present early, before XGMI hive discovered
- * (if any) and intitialized to use reset sem and in_gpu reset flag
+ * (if any) and initialized to use reset sem and in_gpu reset flag
* early on during init and before calling to RREG32.
*/
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
@@ -4561,6 +4603,11 @@ fence_driver_init:
amdgpu_device_check_iommu_direct_map(adev);
+ adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
+ r = register_pm_notifier(&adev->pm_nb);
+ if (r)
+ goto failed;
+
return 0;
release_ras_con:
@@ -4625,6 +4672,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
drain_workqueue(adev->mman.bdev.wq);
adev->shutdown = true;
+ unregister_pm_notifier(&adev->pm_nb);
+
/* make sure IB test finished before entering exclusive mode
* to avoid preemption on IB test
*/
@@ -4743,8 +4792,8 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
{
int ret;
- /* No need to evict vram on APUs for suspend to ram or s2idle */
- if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
+ /* No need to evict vram on APUs unless going to S4 */
+ if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
return 0;
ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
@@ -4757,6 +4806,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
* Suspend & resume.
*/
/**
+ * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
+ * @nb: notifier block
+ * @mode: suspend mode
+ * @data: data
+ *
+ * This function is called when the system is about to suspend or hibernate.
+ * It is used to evict resources from the device before the system goes to
+ * sleep while there is still access to swap.
+ */
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data)
+{
+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
+ int r;
+
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ adev->in_s4 = true;
+ fallthrough;
+ case PM_SUSPEND_PREPARE:
+ r = amdgpu_device_evict_resources(adev);
+ /*
+ * This is considered non-fatal at this time because
+ * amdgpu_device_prepare() will also fatally evict resources.
+ * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781
+ */
+ if (r)
+ drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
* amdgpu_device_prepare - prepare for device suspend
*
* @dev: drm dev pointer
@@ -4795,7 +4879,7 @@ int amdgpu_device_prepare(struct drm_device *dev)
return 0;
unprepare:
- adev->in_s0ix = adev->in_s3 = false;
+ adev->in_s0ix = adev->in_s3 = adev->in_s4 = false;
return r;
}
@@ -4902,7 +4986,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
goto exit;
}
- amdgpu_fence_driver_hw_init(adev);
if (!adev->in_s0ix) {
r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
@@ -5147,7 +5230,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
- amdgpu_ras_set_fed(adev, false);
+ amdgpu_ras_clear_err_state(adev);
amdgpu_irq_gpu_reset_resume_helper(adev);
/* some sw clean up VF needs to do before recover */
@@ -5204,16 +5287,18 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
}
/**
- * amdgpu_device_has_job_running - check if there is any job in mirror list
+ * amdgpu_device_has_job_running - check if there is any unfinished job
*
* @adev: amdgpu_device pointer
*
- * check if there is any job in mirror list
+ * check if there is any job running on the device when guest driver receives
+ * FLR notification from host driver. If there are still jobs running, then
+ * the guest driver will not respond the FLR reset. Instead, let the job hit
+ * the timeout and guest driver then issue the reset request.
*/
bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
{
int i;
- struct drm_sched_job *job;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -5221,11 +5306,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
if (!amdgpu_ring_sched_ready(ring))
continue;
- spin_lock(&ring->sched.job_list_lock);
- job = list_first_entry_or_null(&ring->sched.pending_list,
- struct drm_sched_job, list);
- spin_unlock(&ring->sched.job_list_lock);
- if (job)
+ if (amdgpu_fence_count_emitted(ring))
return true;
}
return false;
@@ -5450,7 +5531,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
amdgpu_set_init_level(tmp_adev, init_level);
if (full_reset) {
/* post card */
- amdgpu_ras_set_fed(tmp_adev, false);
+ amdgpu_ras_clear_err_state(tmp_adev);
r = amdgpu_device_asic_init(tmp_adev);
if (r) {
dev_warn(tmp_adev->dev, "asic atom init failed!");
@@ -5487,6 +5568,10 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+ r = amdgpu_device_ip_resume_phase3(tmp_adev);
+ if (r)
+ goto out;
+
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
@@ -5780,6 +5865,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
/*
+ * If it reaches here because of hang/timeout and a RAS error is
+ * detected at the same time, let RAS recovery take care of it.
+ */
+ if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
+ !amdgpu_sriov_vf(adev) &&
+ reset_context->src != AMDGPU_RESET_SRC_RAS) {
+ dev_dbg(adev->dev,
+ "Gpu recovery from source: %d yielding to RAS error recovery handling",
+ reset_context->src);
+ return 0;
+ }
+ /*
* Special case: RAS triggered and full reset isn't supported
*/
need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
@@ -5862,7 +5959,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
/*
- * Mark these ASICs to be reseted as untracked first
+ * Mark these ASICs to be reset as untracked first
* And add them back after reset completed
*/
amdgpu_unregister_gpu_instance(tmp_adev);
@@ -6065,7 +6162,7 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
*
- * Fetchs and stores in the driver the PCIE capabilities (gen speed
+ * Fetches and stores in the driver the PCIE capabilities (gen speed
* and lanes) of the slot the device is in. Handles APUs and
* virtualized environments where PCIE config space may not be available.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 1040204ac8b9..949d74eff294 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -104,7 +104,9 @@
#include "smuio_v13_0_6.h"
#include "smuio_v14_0_2.h"
#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
#include "jpeg_v5_0_0.h"
+#include "jpeg_v5_0_1.h"
#include "amdgpu_vpe.h"
#if defined(CONFIG_DRM_AMD_ISP)
@@ -1340,7 +1342,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
*/
if (adev->vcn.num_vcn_inst <
AMDGPU_MAX_VCN_INSTANCES) {
- adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
+ adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config =
ip->revision & 0xc0;
adev->vcn.num_vcn_inst++;
adev->vcn.inst_mask |=
@@ -1705,7 +1707,7 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
* so this won't overflow.
*/
for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
- adev->vcn.vcn_codec_disable_mask[v] =
+ adev->vcn.inst[v].vcn_codec_disable_mask =
le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
}
break;
@@ -1836,6 +1838,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -1890,6 +1893,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -2013,6 +2017,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
case IP_VERSION(14, 0, 0):
case IP_VERSION(14, 0, 1):
@@ -2184,6 +2189,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -2238,6 +2244,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
break;
case IP_VERSION(5, 0, 0):
@@ -2361,6 +2368,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
break;
+ case IP_VERSION(5, 0, 1):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
@@ -2405,6 +2416,7 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
aqua_vanjaram_init_soc_config(adev);
break;
default:
@@ -2652,6 +2664,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
adev->family = AMDGPU_FAMILY_AI;
break;
case IP_VERSION(9, 1, 0):
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index b119d27271c1..35c778426a7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -33,6 +33,7 @@
#include "soc15_common.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
+#include "bif/bif_4_1_d.h"
#include <asm/div64.h>
#include <linux/pci.h>
@@ -1788,3 +1789,82 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
return 0;
}
+/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel()
+ * they are called from the panic handler, and protected by the drm_panic spinlock.
+ */
+static struct amdgpu_bo *panic_abo;
+
+/* Use the indirect MMIO to write each pixel to the GPU VRAM,
+ * This is a simplified version of amdgpu_device_mm_access()
+ */
+static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb,
+ unsigned int x,
+ unsigned int y,
+ u32 color)
+{
+ struct amdgpu_res_cursor cursor;
+ unsigned long offset;
+ struct amdgpu_bo *abo = panic_abo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ uint32_t tmp;
+
+ offset = x * 4 + y * sb->pitch[0];
+ amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor);
+
+ tmp = cursor.start >> 31;
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000);
+ if (tmp != 0xffffffff)
+ WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+ WREG32_NO_KIQ(mmMM_DATA, color);
+}
+
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb)
+{
+ struct amdgpu_bo *abo;
+ struct drm_framebuffer *fb = plane->state->fb;
+
+ if (!fb)
+ return -EINVAL;
+
+ DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format);
+
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ if (!abo)
+ return -EINVAL;
+
+ sb->width = fb->width;
+ sb->height = fb->height;
+ /* Use the generic linear format, because tiling will be disabled in panic_flush() */
+ sb->format = drm_format_info(fb->format->format);
+ if (!sb->format)
+ return -EINVAL;
+
+ sb->pitch[0] = fb->pitches[0];
+
+ if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
+ if (abo->tbo.resource->mem_type != TTM_PL_VRAM) {
+ drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n");
+ return -EINVAL;
+ }
+ /* Only handle 32bits format, to simplify mmio access */
+ if (fb->format->cpp[0] != 4) {
+ drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n");
+ return -EINVAL;
+ }
+ sb->set_pixel = amdgpu_display_set_pixel;
+ panic_abo = abo;
+ return 0;
+ }
+ if (!abo->kmap.virtual &&
+ ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) {
+ drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n");
+ return -ENOMEM;
+ }
+ if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
+ iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual);
+ else
+ iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 9d19940f73c8..dfa0d642ac16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,6 +23,8 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
+#include <drm/drm_panic.h>
+
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
@@ -49,4 +51,7 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier);
int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
int amdgpu_display_resume_helper(struct amdgpu_device *adev);
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 8e81a83d37d8..9f627caedc3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -36,6 +36,7 @@
#include "amdgpu_gem.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
#include <drm/amdgpu_drm.h>
#include <drm/ttm/ttm_tt.h>
#include <linux/dma-buf.h>
@@ -60,6 +61,8 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
+ amdgpu_vm_bo_update_shared(bo);
+
return 0;
}
@@ -345,7 +348,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
/* FIXME: This should be after the "if", but needs a fix to make sure
* DMABuf imports are initialized in the right VM list.
*/
- amdgpu_vm_bo_invalidate(adev, bo, false);
+ amdgpu_vm_bo_invalidate(bo, false);
if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 38686203bea6..492b09d84571 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,7 +23,7 @@
*/
#include <drm/amdgpu_drm.h>
-#include <drm/drm_client_setup.h>
+#include <drm/clients/drm_client_setup.h>
#include <drm/drm_drv.h>
#include <drm/drm_fbdev_ttm.h>
#include <drm/drm_gem.h>
@@ -2552,7 +2552,6 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int r;
- adev->in_s4 = true;
r = amdgpu_device_suspend(drm_dev, true);
adev->in_s4 = false;
if (r)
@@ -2916,7 +2915,6 @@ static const struct drm_driver amdgpu_kms_driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
@@ -2940,7 +2938,6 @@ const struct drm_driver amdgpu_partition_driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
index 5bc2cb661af7..2d86cc6f7f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
@@ -40,7 +40,6 @@
#define DRIVER_NAME "amdgpu"
#define DRIVER_DESC "AMD GPU"
-#define DRIVER_DATE "20150101"
extern const struct drm_driver amdgpu_partition_driver;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index df2cf5c33925..91d638098889 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { };
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
ktime_t usage[AMDGPU_HW_IP_NUM];
const char *pl_name[] = {
[TTM_PL_VRAM] = "vram",
@@ -72,15 +72,8 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
[AMDGPU_PL_DOORBELL] = "doorbell",
};
unsigned int hw_ip, i;
- int ret;
-
- ret = amdgpu_bo_reserve(vm->root.bo, false);
- if (ret)
- return;
-
- amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats));
- amdgpu_bo_unreserve(vm->root.bo);
+ amdgpu_vm_get_memory(vm, stats);
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
/*
@@ -114,9 +107,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
stats[TTM_PL_VRAM].evicted/1024UL);
drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
- stats[TTM_PL_VRAM].requested/1024UL);
+ (stats[TTM_PL_VRAM].drm.shared +
+ stats[TTM_PL_VRAM].drm.private) / 1024UL);
drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
- stats[TTM_PL_TT].requested/1024UL);
+ (stats[TTM_PL_TT].drm.shared +
+ stats[TTM_PL_TT].drm.private) / 1024UL);
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
if (!usage[hw_ip])
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index ceb5163480f4..09c9194d5bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -384,7 +384,7 @@ int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
{
- if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info)
+ if (!adev->fru_info)
return;
sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
index bc58dca18035..98f3196599ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -32,7 +32,7 @@ struct amdgpu_fru_info {
char product_name[AMDGPU_PRODUCT_NAME_LEN];
char serial[20];
char manufacturer_name[32];
- char fru_id[32];
+ char fru_id[50];
};
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
index 2d4b67175b55..328a1b963548 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -122,6 +122,10 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return 0;
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3))
+ return 0;
+
if (adev->asic_type >= CHIP_SIENNA_CICHLID)
return 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 1a5df8b94661..69429df09477 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -42,6 +42,7 @@
#include "amdgpu_dma_buf.h"
#include "amdgpu_hmm.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
{
@@ -87,10 +88,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
{
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
- if (aobj) {
- amdgpu_hmm_unregister(aobj);
- ttm_bo_put(&aobj->tbo);
- }
+ amdgpu_hmm_unregister(aobj);
+ ttm_bo_put(&aobj->tbo);
}
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
@@ -179,6 +178,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
if (r)
return r;
+ amdgpu_vm_bo_update_shared(abo);
bo_va = amdgpu_vm_bo_find(vm, abo);
if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
@@ -252,6 +252,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
goto out_unlock;
amdgpu_vm_bo_del(adev, bo_va);
+ amdgpu_vm_bo_update_shared(bo);
if (!amdgpu_vm_ready(vm))
goto out_unlock;
@@ -839,7 +840,6 @@ error:
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_op *args = data;
struct drm_gem_object *gobj;
struct amdgpu_vm_bo_base *base;
@@ -899,7 +899,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
- amdgpu_vm_bo_invalidate(adev, robj, true);
+ amdgpu_vm_bo_invalidate(robj, true);
amdgpu_bo_unreserve(robj);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 69a6b6dba0a5..784b03abb3a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -515,7 +515,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
- if (!kiq_ring->sched.ready || adev->job_hang || amdgpu_in_reset(adev))
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
return 0;
spin_lock(&kiq->ring_lock);
@@ -567,7 +567,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
- if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
+ if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
return 0;
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
@@ -806,7 +806,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
/* If going to s2idle, no need to wait */
if (adev->in_s0ix) {
if (!amdgpu_dpm_set_powergating_by_smu(adev,
- AMD_IP_BLOCK_TYPE_GFX, true))
+ AMD_IP_BLOCK_TYPE_GFX, true, 0))
adev->gfx.gfx_off_state = true;
} else {
schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
@@ -818,7 +818,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
if (adev->gfx.gfx_off_state &&
- !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
+ !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
adev->gfx.gfx_off_state = false;
if (adev->gfx.funcs->init_spm_golden) {
@@ -1484,6 +1484,24 @@ static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
return 0;
}
+/**
+ * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * Provides the sysfs interface to manually run a cleaner shader, which is
+ * used to clear the GPU state between different tasks. Writing a value to the
+ * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
+ * The value written corresponds to the partition index on multi-partition
+ * devices. On single-partition devices, the value should be '0'.
+ *
+ * The cleaner shader clears the Local Data Store (LDS) and General Purpose
+ * Registers (GPRs) to ensure data isolation between GPU workloads.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
struct device_attribute *attr,
const char *buf,
@@ -1532,6 +1550,19 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
return count;
}
+/**
+ * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer to store the output data
+ *
+ * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
+ * feature for each GPU partition. Reading from the 'enforce_isolation'
+ * sysfs file returns the isolation settings for all partitions, where '0'
+ * indicates disabled and '1' indicates enabled.
+ *
+ * Return: The number of bytes read from the sysfs file.
+ */
static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1555,6 +1586,20 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
return size;
}
+/**
+ * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * This function allows control over the 'enforce_isolation' feature, which
+ * serializes access to the graphics engine. Writing '1' or '0' to the
+ * 'enforce_isolation' sysfs file enables or disables process isolation for
+ * each partition. The input should specify the setting for all partitions.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -1940,6 +1985,17 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
mutex_unlock(&adev->enforce_isolation_mutex);
}
+/**
+ * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
+ * @adev: amdgpu_device pointer
+ * @idx: Index of the GPU partition
+ *
+ * When kernel submissions come in, the jobs are given a time slice and once
+ * that time slice is up, if there are KFD user queues active, kernel
+ * submissions are blocked until KFD has had its time slice. Once the KFD time
+ * slice is up, KFD user queues are preempted and kernel submissions are
+ * unblocked and allowed to run again.
+ */
static void
amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
u32 idx)
@@ -1985,10 +2041,20 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
msleep(GFX_SLICE_PERIOD_MS);
}
+/**
+ * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring begin_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 idx;
+ bool sched_work = false;
if (!adev->gfx.enable_cleaner_shader)
return;
@@ -2007,15 +2073,28 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
mutex_lock(&adev->enforce_isolation_mutex);
if (adev->enforce_isolation[idx]) {
if (adev->kfd.init_complete)
- amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
+ sched_work = true;
}
mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
}
+/**
+ * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring end_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 idx;
+ bool sched_work = false;
if (!adev->gfx.enable_cleaner_shader)
return;
@@ -2031,9 +2110,12 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
mutex_lock(&adev->enforce_isolation_mutex);
if (adev->enforce_isolation[idx]) {
if (adev->kfd.init_complete)
- amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
+ sched_work = true;
}
mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
}
/*
@@ -2050,7 +2132,7 @@ static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
if (!adev)
return -ENODEV;
- mask = (1 << adev->gfx.num_gfx_rings) - 1;
+ mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
if ((val & mask) == 0)
return -EINVAL;
@@ -2078,7 +2160,7 @@ static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
ring = &adev->gfx.gfx_ring[i];
if (ring->sched.ready)
- mask |= 1 << i;
+ mask |= 1ULL << i;
}
*val = mask;
@@ -2120,7 +2202,7 @@ static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
if (!adev)
return -ENODEV;
- mask = (1 << adev->gfx.num_compute_rings) - 1;
+ mask = (1ULL << adev->gfx.num_compute_rings) - 1;
if ((val & mask) == 0)
return -EINVAL;
@@ -2149,7 +2231,7 @@ static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
ring = &adev->gfx.compute_ring[i];
if (ring->sched.ready)
- mask |= 1 << i;
+ mask |= 1ULL << i;
}
*val = mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 8b512dc28df8..e0bc37557d2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
/**
* amdgpu_ib_free - free an IB (Indirect Buffer)
*
- * @adev: amdgpu_device pointer
* @ib: IB object to free
* @f: the fence SA bo need wait on for the ib alloation
*
* Free an IB (all asics).
*/
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f)
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f)
{
- amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
+ amdgpu_sa_bo_free(&ib->sa_bo, f);
}
/**
@@ -193,8 +191,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
- (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
- amdgpu_vm_need_pipeline_sync(ring, job))) {
+ need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
+
need_pipe_sync = true;
if (tmp)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index f3b0aaf3ebc6..901f8b12c672 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -298,3 +298,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
return dw1 | ((u64)(dw2 & 0xffff) << 32);
}
+
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+ return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
+ ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 508f02eb0cf8..7d4395a5d8ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -110,4 +110,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
signed int offset);
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
index 263ce1811cc8..732744488b03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
@@ -77,7 +77,8 @@ static int isp_load_fw_by_psp(struct amdgpu_device *adev)
sizeof(ucode_prefix));
/* read isp fw */
- r = amdgpu_ucode_request(adev, &adev->isp.fw, "amdgpu/%s.bin", ucode_prefix);
+ r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s.bin", ucode_prefix);
if (r) {
amdgpu_ucode_release(&adev->isp.fw);
return r;
@@ -128,13 +129,13 @@ static bool isp_is_idle(void *handle)
return true;
}
-static int isp_set_clockgating_state(void *handle,
+static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int isp_set_powergating_state(void *handle,
+static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index b9d08bc96581..100f04475943 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -102,8 +102,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
return DRM_GPU_SCHED_STAT_ENODEV;
}
- adev->job_hang = true;
-
/*
* Do the coredump immediately after a job timeout to get a very
* close dump/snapshot/representation of GPU's current error status
@@ -181,7 +179,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
exit:
- adev->job_hang = false;
drm_dev_exit(idx);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
@@ -197,11 +194,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (!*job)
return -ENOMEM;
- /*
- * Initialize the scheduler to at least some ring so that we always
- * have a pointer to adev.
- */
- (*job)->base.sched = &adev->rings[0]->sched;
(*job)->vm = vm;
amdgpu_sync_create(&(*job)->explicit_sync);
@@ -255,7 +247,6 @@ void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
unsigned i;
@@ -268,7 +259,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
f = NULL;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(ring->adev, &job->ibs[i], f);
+ amdgpu_ib_free(&job->ibs[i], f);
}
static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
@@ -367,6 +358,13 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
goto error;
}
+ /*
+ * The VM structure might be released after the VMID is
+ * assigned, we had multiple problems with people trying to use
+ * the VM pointer so better set it to NULL.
+ */
+ if (!fence)
+ job->vm = NULL;
}
return fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 3eb4a4653fce..d9cb343a8708 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -27,7 +27,8 @@
#include "amdgpu_ras.h"
#define AMDGPU_MAX_JPEG_INSTANCES 4
-#define AMDGPU_MAX_JPEG_RINGS 8
+#define AMDGPU_MAX_JPEG_RINGS 10
+#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 59ec20b07a6a..32b27a1658e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1610,10 +1610,12 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
}
- r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix);
r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mes.bin", ucode_prefix);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6852d50caa89..96f4b8904e9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -41,6 +41,7 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_vram_mgr.h"
#include "amdgpu_vm.h"
+#include "amdgpu_dma_buf.h"
/**
* DOC: amdgpu_object
@@ -324,6 +325,9 @@ error_free:
*
* Allocates and pins a BO for kernel internal use.
*
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
* Returns:
@@ -347,6 +351,76 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
return 0;
}
+EXPORT_SYMBOL(amdgpu_bo_create_kernel);
+
+/**
+ * amdgpu_bo_create_isp_user - create user BO for isp
+ *
+ * @adev: amdgpu device object
+ * @dma_buf: DMABUF handle for isp buffer
+ * @domain: where to place it
+ * @bo: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate gpu_addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo,
+ u64 *gpu_addr)
+
+{
+ struct drm_gem_object *gem_obj;
+ int r;
+
+ gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf);
+ *bo = gem_to_amdgpu_bo(gem_obj);
+ if (!(*bo)) {
+ dev_err(adev->dev, "failed to get valid isp user bo\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_reserve(*bo, false);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_pin(*bo, domain);
+ if (r) {
+ dev_err(adev->dev, "(%d) isp user bo pin failed\n", r);
+ goto error_unreserve;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(*bo)->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", *bo);
+ goto error_unpin;
+ }
+
+ if (!WARN_ON(!gpu_addr))
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo);
+
+ amdgpu_bo_unreserve(*bo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(*bo);
+error_unreserve:
+ amdgpu_bo_unreserve(*bo);
+ amdgpu_bo_unref(bo);
+
+ return r;
+}
+EXPORT_SYMBOL(amdgpu_bo_create_isp_user);
/**
* amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
@@ -423,6 +497,9 @@ error:
* @cpu_addr: pointer to where the BO's CPU memory space address was stored
*
* unmaps and unpin a BO for kernel internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
*/
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr)
@@ -447,6 +524,30 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
if (cpu_addr)
*cpu_addr = NULL;
}
+EXPORT_SYMBOL(amdgpu_bo_free_kernel);
+
+/**
+ * amdgpu_bo_free_isp_user - free BO for isp use
+ *
+ * @bo: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo)
+{
+ if (bo == NULL)
+ return;
+
+ if (amdgpu_bo_reserve(bo, true) == 0) {
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ }
+ amdgpu_bo_unref(&bo);
+}
+EXPORT_SYMBOL(amdgpu_bo_free_isp_user);
/* Validate bo size is bit bigger than the request domain */
static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
@@ -1150,7 +1251,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_resource *new_mem)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_resource *old_mem = bo->resource;
struct amdgpu_bo *abo;
@@ -1158,7 +1258,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
return;
abo = ttm_to_amdgpu_bo(bo);
- amdgpu_vm_bo_invalidate(adev, abo, evict);
+ amdgpu_vm_bo_move(abo, new_mem, evict);
amdgpu_bo_kunmap(abo);
@@ -1171,75 +1271,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
old_mem ? old_mem->mem_type : -1);
}
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
- struct amdgpu_mem_stats *stats,
- unsigned int sz)
-{
- const unsigned int domain_to_pl[] = {
- [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM,
- [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT,
- [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM,
- [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS,
- [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS,
- [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA,
- [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL,
- };
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct ttm_resource *res = bo->tbo.resource;
- struct drm_gem_object *obj = &bo->tbo.base;
- uint64_t size = amdgpu_bo_size(bo);
- unsigned int type;
-
- if (!res) {
- /*
- * If no backing store use one of the preferred domain for basic
- * stats. We take the MSB since that should give a reasonable
- * view.
- */
- BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT ||
- TTM_PL_VRAM < TTM_PL_SYSTEM);
- type = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK);
- if (!type)
- return;
- type--;
- if (drm_WARN_ON_ONCE(&adev->ddev,
- type >= ARRAY_SIZE(domain_to_pl)))
- return;
- type = domain_to_pl[type];
- } else {
- type = res->mem_type;
- }
-
- if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz))
- return;
-
- /* DRM stats common fields: */
-
- if (drm_gem_object_is_shared_for_memory_stats(obj))
- stats[type].drm.shared += size;
- else
- stats[type].drm.private += size;
-
- if (res) {
- stats[type].drm.resident += size;
-
- if (!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_BOOKKEEP))
- stats[type].drm.active += size;
- else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
- stats[type].drm.purgeable += size;
- }
-
- /* amdgpu specific stats: */
-
- if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) {
- stats[TTM_PL_VRAM].requested += size;
- if (type != TTM_PL_VRAM)
- stats[TTM_PL_VRAM].evicted += size;
- } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) {
- stats[TTM_PL_TT].requested += size;
- }
-}
-
/**
* amdgpu_bo_release_notify - notification about a BO being released
* @bo: pointer to a buffer object
@@ -1455,6 +1486,45 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
}
/**
+ * amdgpu_bo_mem_stats_placement - bo placement for memory accounting
+ * @bo: the buffer object we should look at
+ *
+ * BO can have multiple preferred placements, to avoid double counting we want
+ * to file it under a single placement for memory stats.
+ * Luckily, if we take the highest set bit in preferred_domains the result is
+ * quite sensible.
+ *
+ * Returns:
+ * Which of the placements should the BO be accounted under.
+ */
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
+{
+ uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK;
+
+ if (!domain)
+ return TTM_PL_SYSTEM;
+
+ switch (rounddown_pow_of_two(domain)) {
+ case AMDGPU_GEM_DOMAIN_CPU:
+ return TTM_PL_SYSTEM;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ return TTM_PL_TT;
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ return TTM_PL_VRAM;
+ case AMDGPU_GEM_DOMAIN_GDS:
+ return AMDGPU_PL_GDS;
+ case AMDGPU_GEM_DOMAIN_GWS:
+ return AMDGPU_PL_GWS;
+ case AMDGPU_GEM_DOMAIN_OA:
+ return AMDGPU_PL_OA;
+ case AMDGPU_GEM_DOMAIN_DOORBELL:
+ return AMDGPU_PL_DOORBELL;
+ default:
+ return TTM_PL_SYSTEM;
+ }
+}
+
+/**
* amdgpu_bo_get_preferred_domain - get preferred domain
* @adev: amdgpu device object
* @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index be6769852ece..375448627f7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -260,6 +260,10 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dbuf, u32 domain,
+ struct amdgpu_bo **bo,
+ u64 *gpu_addr);
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr);
@@ -271,6 +275,7 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
struct amdgpu_bo_vm **ubo_ptr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
@@ -300,9 +305,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
- struct amdgpu_mem_stats *stats,
- unsigned int size);
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain);
@@ -337,8 +340,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct drm_suballoc **sa_bo,
unsigned int size);
-void amdgpu_sa_bo_free(struct amdgpu_device *adev,
- struct drm_suballoc **sa_bo,
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo,
struct dma_fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 448f9e742983..babe94ade247 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -208,6 +208,7 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block)
psp->boot_time_tmr = false;
fallthrough;
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
psp_v13_0_set_psp_funcs(psp);
psp->autoload_supported = false;
@@ -359,6 +360,7 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
int i;
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))
return false;
@@ -870,6 +872,7 @@ static bool psp_skip_tmr(struct psp_context *psp)
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
return true;
default:
@@ -2264,7 +2267,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
return -EINVAL;
if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA &&
- ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC)
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC &&
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2)
return -EINVAL;
ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context);
@@ -2385,6 +2389,15 @@ static int psp_hw_start(struct psp_context *psp)
}
}
+ if ((is_psp_fw_valid(psp->spdm_drv)) &&
+ (psp->funcs->bootloader_load_spdm_drv != NULL)) {
+ ret = psp_bootloader_load_spdm_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load spdm_drv failed!\n");
+ return ret;
+ }
+ }
+
if ((is_psp_fw_valid(psp->sos)) &&
(psp->funcs->bootloader_load_sos != NULL)) {
ret = psp_bootloader_load_sos(psp);
@@ -3007,10 +3020,7 @@ static int psp_hw_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
mutex_lock(&adev->firmware.mutex);
- /*
- * This sequence is just used on hw_init only once, no need on
- * resume.
- */
+
ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;
@@ -3135,6 +3145,10 @@ static int psp_resume(struct amdgpu_ip_block *ip_block)
mutex_lock(&adev->firmware.mutex);
+ ret = amdgpu_ucode_init_bo(adev);
+ if (ret)
+ goto failed;
+
ret = psp_hw_start(psp);
if (ret)
goto failed;
@@ -3289,7 +3303,8 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name)
const struct psp_firmware_header_v1_0 *asd_hdr;
int err = 0;
- err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, "amdgpu/%s_asd.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_asd.bin", chip_name);
if (err)
goto out;
@@ -3311,7 +3326,8 @@ int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name)
const struct psp_firmware_header_v1_0 *toc_hdr;
int err = 0;
- err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, "amdgpu/%s_toc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", chip_name);
if (err)
goto out;
@@ -3407,6 +3423,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes);
psp->ipkeymgr_drv.start_addr = ucode_start_addr;
break;
+ case PSP_FW_TYPE_PSP_SPDM_DRV:
+ psp->spdm_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->spdm_drv.start_addr = ucode_start_addr;
+ break;
default:
dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
break;
@@ -3474,7 +3496,8 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
uint8_t *ucode_array_start_addr;
int err = 0;
- err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos.bin", chip_name);
if (err)
goto out;
@@ -3750,7 +3773,8 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
struct amdgpu_device *adev = psp->adev;
int err;
- err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta.bin", chip_name);
if (err)
return err;
@@ -3785,7 +3809,8 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name)
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, "amdgpu/%s_cap.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_cap.bin", chip_name);
if (err) {
if (err == -ENODEV) {
dev_warn(adev->dev, "cap microcode does not exist, skip\n");
@@ -3849,13 +3874,13 @@ int psp_config_sq_perfmon(struct psp_context *psp,
return ret;
}
-static int psp_set_clockgating_state(void *handle,
+static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int psp_set_powergating_state(void *handle,
+static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3867,10 +3892,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_ip_block *ip_block;
uint32_t fw_ver;
int ret;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
dev_info(adev->dev, "PSP block is not ready yet\n.");
return -EBUSY;
}
@@ -3899,8 +3926,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
struct amdgpu_bo *fw_buf_bo = NULL;
uint64_t fw_pri_mc_addr;
void *fw_pri_cpu_addr;
+ struct amdgpu_ip_block *ip_block;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
dev_err(adev->dev, "PSP block is not ready yet.");
return -EBUSY;
}
@@ -3908,7 +3937,8 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
if (!drm_dev_enter(ddev, &idx))
return -ENODEV;
- ret = amdgpu_ucode_request(adev, &usbc_pd_fw, "amdgpu/%s", buf);
+ ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s", buf);
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 567cb1f924ca..8d5acc415d38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -80,6 +80,7 @@ enum psp_bootloader_cmd {
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
+ PSP_BL__LOAD_SPDMDRV = 0x20000000,
};
enum psp_ring_type {
@@ -120,6 +121,7 @@ struct psp_funcs {
int (*bootloader_load_dbg_drv)(struct psp_context *psp);
int (*bootloader_load_ras_drv)(struct psp_context *psp);
int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp);
+ int (*bootloader_load_spdm_drv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
int (*ring_create)(struct psp_context *psp,
enum psp_ring_type ring_type);
@@ -343,6 +345,7 @@ struct psp_context {
struct psp_bin_desc dbg_drv;
struct psp_bin_desc ras_drv;
struct psp_bin_desc ipkeymgr_drv;
+ struct psp_bin_desc spdm_drv;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@@ -434,6 +437,9 @@ struct amdgpu_psp_funcs {
#define psp_bootloader_load_ipkeymgr_drv(psp) \
((psp)->funcs->bootloader_load_ipkeymgr_drv ? \
(psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0)
+#define psp_bootloader_load_spdm_drv(psp) \
+ ((psp)->funcs->bootloader_load_spdm_drv ? \
+ (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 4c9fa24dd972..f0924aa3f4e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -36,6 +36,7 @@
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include "nbio_v4_3.h"
+#include "nbif_v6_3_1.h"
#include "nbio_v7_9.h"
#include "atom.h"
#include "amdgpu_reset.h"
@@ -192,7 +193,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
+ err_data.err_addr_cnt, false);
amdgpu_ras_save_bad_pages(adev, NULL);
}
@@ -2015,6 +2016,7 @@ static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
ret = true;
break;
@@ -2156,6 +2158,16 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
/* Fatal error events are handled on host side */
if (amdgpu_sriov_vf(adev))
return;
+ /**
+ * If the current interrupt is caused by a non-fatal RAS error, skip
+ * check for fatal error. For fatal errors, FED status of all devices
+ * in XGMI hive gets set when the first device gets fatal error
+ * interrupt. The error gets propagated to other devices as well, so
+ * make sure to ack the interrupt regardless of FED status.
+ */
+ if (!amdgpu_ras_get_fed_status(adev) &&
+ amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
+ return;
if (adev->nbio.ras &&
adev->nbio.ras->handle_ras_controller_intr_no_bifring)
@@ -2185,6 +2197,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
if (ret)
return;
+ amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block);
/* both query_poison_status and handle_poison_consumption are optional,
* but at least one of them should be implemented if we need poison
* consumption handler
@@ -2717,40 +2730,203 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
return 0;
}
+static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
+{
+ struct ta_ras_query_address_input addr_in;
+ uint32_t socket = 0;
+ int ret = 0;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.socket_id = socket;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ /* tell RAS TA the node instance is not used */
+ addr_in.ma.node_inst = TA_RAS_INV_NODE;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+
+ return ret;
+}
+
+static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
+{
+ struct ta_ras_query_address_input addr_in;
+ uint32_t die_id, socket = 0;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
+
+ /* although die id is gotten from PA in nps1 mode, the id is
+ * fitable for any nps mode
+ */
+ if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa)
+ die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT);
+ else
+ return -EINVAL;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ addr_in.ma.umc_inst = bps->mcumc_id;
+ addr_in.ma.node_inst = die_id;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+ else
+ return -EINVAL;
+}
+
/* it deal with vram only. */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages)
+ struct eeprom_table_record *bps, int pages, bool from_rom)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
+ struct ras_err_data err_data;
+ struct eeprom_table_record *err_rec;
+ struct amdgpu_ras_eeprom_control *control =
+ &adev->psp.ras_context.ras->eeprom_control;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
int ret = 0;
- uint32_t i;
+ uint32_t i, j, loop_cnt = 1;
+ bool find_pages_per_pa = false;
if (!con || !con->eh_data || !bps || pages <= 0)
return 0;
+ if (from_rom) {
+ err_data.err_addr =
+ kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ err_rec = err_data.err_addr;
+ loop_cnt = adev->umc.retire_unit;
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ }
+
mutex_lock(&con->recovery_lock);
data = con->eh_data;
- if (!data)
- goto out;
+ if (!data) {
+ /* Returning 0 as the absence of eh_data is acceptable */
+ goto free;
+ }
for (i = 0; i < pages; i++) {
- if (amdgpu_ras_check_bad_page_unlock(con,
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
- continue;
+ if (from_rom &&
+ control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) {
+ if (!find_pages_per_pa) {
+ if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) {
+ if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) {
+ /* may use old RAS TA, use PA to find pages in
+ * one row
+ */
+ if (amdgpu_umc_pages_in_a_row(adev, &err_data,
+ bps[i].retired_page <<
+ AMDGPU_GPU_PAGE_SHIFT)) {
+ ret = -EINVAL;
+ goto free;
+ } else {
+ find_pages_per_pa = true;
+ }
+ } else {
+ /* unsupported cases */
+ ret = -EOPNOTSUPP;
+ goto free;
+ }
+ }
+ } else {
+ if (amdgpu_umc_pages_in_a_row(adev, &err_data,
+ bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
+ ret = -EINVAL;
+ goto free;
+ }
+ }
+ } else {
+ if (from_rom && !find_pages_per_pa) {
+ if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) {
+ /* bad page in any NPS mode in eeprom */
+ if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) {
+ ret = -EINVAL;
+ goto free;
+ }
+ } else {
+ /* legacy bad page in eeprom, generated only in
+ * NPS1 mode
+ */
+ if (amdgpu_ras_mca2pa(adev, &bps[i], &err_data)) {
+ /* old RAS TA or ASICs which don't support to
+ * convert addrss via mca address
+ */
+ if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) {
+ find_pages_per_pa = true;
+ err_rec = &bps[i];
+ loop_cnt = 1;
+ } else {
+ /* non-nps1 mode, old RAS TA
+ * can't support it
+ */
+ ret = -EOPNOTSUPP;
+ goto free;
+ }
+ }
+ }
- if (!data->space_left &&
- amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
- ret = -ENOMEM;
- goto out;
+ if (!find_pages_per_pa)
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ err_rec = &bps[i];
+ }
}
- amdgpu_ras_reserve_page(adev, bps[i].retired_page);
+ for (j = 0; j < loop_cnt; j++) {
+ if (amdgpu_ras_check_bad_page_unlock(con,
+ err_rec[j].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ continue;
+
+ if (!data->space_left &&
+ amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+ ret = -ENOMEM;
+ goto free;
+ }
- memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
- data->count++;
- data->space_left--;
+ amdgpu_ras_reserve_page(adev, err_rec[j].retired_page);
+
+ memcpy(&data->bps[data->count], &(err_rec[j]),
+ sizeof(struct eeprom_table_record));
+ data->count++;
+ data->space_left--;
+ }
}
+
+free:
+ if (from_rom)
+ kfree(err_data.err_addr);
out:
mutex_unlock(&con->recovery_lock);
@@ -2768,7 +2944,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
- int save_count;
+ int save_count, unit_num, bad_page_num, i;
if (!con || !con->eh_data) {
if (new_cnt)
@@ -2780,19 +2956,32 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
- save_count = data->count - control->ras_num_recs;
+ bad_page_num = control->ras_num_bad_pages;
+ save_count = data->count - bad_page_num;
mutex_unlock(&con->recovery_lock);
+ unit_num = save_count / adev->umc.retire_unit;
if (new_cnt)
- *new_cnt = save_count / adev->umc.retire_unit;
+ *new_cnt = unit_num;
/* only new entries are saved */
if (save_count > 0) {
- if (amdgpu_ras_eeprom_append(control,
- &data->bps[control->ras_num_recs],
- save_count)) {
- dev_err(adev->dev, "Failed to save EEPROM table data!");
- return -EIO;
+ if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[control->ras_num_recs],
+ save_count)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ } else {
+ for (i = 0; i < unit_num; i++) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[bad_page_num + i * adev->umc.retire_unit],
+ 1)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ }
}
dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
@@ -2821,11 +3010,32 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
return -ENOMEM;
ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
- if (ret)
+ if (ret) {
dev_err(adev->dev, "Failed to load EEPROM table records!");
- else
- ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs);
+ } else {
+ if (control->ras_num_recs > 1 &&
+ adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ if ((bps[0].address == bps[1].address) &&
+ (bps[0].mem_channel == bps[1].mem_channel))
+ control->rec_type = AMDGPU_RAS_EEPROM_REC_PA;
+ else
+ control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA;
+ }
+
+ ret = amdgpu_ras_eeprom_check(control);
+ if (ret)
+ goto out;
+
+ /* HW not usable */
+ if (amdgpu_ras_is_rma(adev)) {
+ ret = -EHWPOISON;
+ goto out;
+ }
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true);
+ }
+
+out:
kfree(bps);
return ret;
}
@@ -3205,31 +3415,36 @@ static int amdgpu_ras_page_retirement_thread(void *param)
int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control;
int ret;
if (!con || amdgpu_sriov_vf(adev))
return 0;
- ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
-
+ control = &con->eeprom_control;
+ ret = amdgpu_ras_eeprom_init(control);
if (ret)
return ret;
- /* HW not usable */
- if (amdgpu_ras_is_rma(adev))
- return -EHWPOISON;
+ if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
+ control->rec_type = AMDGPU_RAS_EEPROM_REC_PA;
+
+ /* default status is MCA storage */
+ if (control->ras_num_recs <= 1 &&
+ adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA;
- if (con->eeprom_control.ras_num_recs) {
+ if (control->ras_num_recs) {
ret = amdgpu_ras_load_bad_pages(adev);
if (ret)
return ret;
amdgpu_dpm_send_hbm_bad_pages_num(
- adev, con->eeprom_control.ras_num_recs);
+ adev, control->ras_num_bad_pages);
if (con->update_channel_flag == true) {
amdgpu_dpm_send_hbm_bad_channel_flag(
- adev, con->eeprom_control.bad_channel_bitmap);
+ adev, control->bad_channel_bitmap);
con->update_channel_flag = false;
}
}
@@ -3366,6 +3581,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
return true;
default:
@@ -3378,7 +3594,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(14, 0, 3):
return true;
default:
return false;
@@ -3629,6 +3847,7 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE;
break;
@@ -3704,7 +3923,19 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
* check DF RAS */
adev->nbio.ras = &nbio_v4_3_ras;
break;
+ case IP_VERSION(6, 3, 1):
+ if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+ /* unlike other generation of nbio ras,
+ * nbif v6_3_1 only support fatal error interrupt
+ * to inform software that DF is freezed due to
+ * system fatal error event. driver should not
+ * enable nbio ras in such case. Instead,
+ * check DF RAS
+ */
+ adev->nbio.ras = &nbif_v6_3_1_ras;
+ break;
case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
if (!adev->gmc.is_app_apu)
adev->nbio.ras = &nbio_v7_9_ras;
break;
@@ -4083,7 +4314,7 @@ bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev)
if (!ras)
return false;
- return atomic_read(&ras->fed);
+ return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
}
void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
@@ -4091,8 +4322,48 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
struct amdgpu_ras *ras;
ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (status)
+ set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ else
+ clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ }
+}
+
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras)
+ ras->ras_err_state = 0;
+}
+
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
if (ras)
- atomic_set(&ras->fed, !!status);
+ set_bit(block, &ras->ras_err_state);
+}
+
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (block == AMDGPU_RAS_BLOCK__ANY)
+ return (ras->ras_err_state != 0);
+ else
+ return test_bit(block, &ras->ras_err_state) ||
+ test_bit(AMDGPU_RAS_BLOCK__LAST,
+ &ras->ras_err_state);
+ }
+
+ return false;
}
static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 6db772ecfee4..82db986c36a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -99,7 +99,8 @@ enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__IH,
AMDGPU_RAS_BLOCK__MPIO,
- AMDGPU_RAS_BLOCK__LAST
+ AMDGPU_RAS_BLOCK__LAST,
+ AMDGPU_RAS_BLOCK__ANY = -1
};
enum amdgpu_ras_mca_block {
@@ -482,6 +483,8 @@ struct ras_ecc_err {
uint64_t ipid;
uint64_t addr;
uint64_t pa_pfn;
+ /* save global channel index across all UMC instances */
+ uint32_t channel_idx;
struct ras_err_pages err_pages;
};
@@ -558,8 +561,8 @@ struct amdgpu_ras {
struct ras_ecc_log_info umc_ecc_log;
struct delayed_work page_retirement_dwork;
- /* Fatal error detected flag */
- atomic_t fed;
+ /* ras errors detected */
+ unsigned long ras_err_state;
/* RAS event manager */
struct ras_event_manager __event_mgr;
@@ -750,7 +753,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages);
+ struct eeprom_table_record *bps, int pages, bool from_rom);
int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
unsigned long *new_cnt);
@@ -952,6 +955,10 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a
void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status);
bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev);
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev);
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block);
u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type);
int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index f28f6b4ba765..52c16bfeccaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -470,9 +470,10 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
res = __write_table_ras_info(control);
control->ras_num_recs = 0;
+ control->ras_num_bad_pages = 0;
control->ras_fri = 0;
- amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages);
control->bad_channel_bitmap = 0;
amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
@@ -559,7 +560,7 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
if (amdgpu_bad_page_threshold == -1) {
dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
- con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold);
+ con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold);
dev_warn(adev->dev,
"But GPU can be operated due to bad_page_threshold = -1.\n");
return false;
@@ -621,6 +622,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
+ struct amdgpu_device *adev = to_amdgpu_device(control);
u32 a, b, i;
u8 *buf, *pp;
int res;
@@ -723,6 +725,12 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
control->ras_num_recs = 1 + (control->ras_max_record_count + b
- control->ras_fri)
% control->ras_max_record_count;
+
+ if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA)
+ control->ras_num_bad_pages = control->ras_num_recs;
+ else
+ control->ras_num_bad_pages =
+ control->ras_num_recs * adev->umc.retire_unit;
Out:
kfree(buf);
return res;
@@ -740,10 +748,10 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
/* Modify the header if it exceeds.
*/
if (amdgpu_bad_page_threshold != 0 &&
- control->ras_num_recs >= ras->bad_page_cnt_threshold) {
+ control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) {
dev_warn(adev->dev,
"Saved bad pages %d reaches threshold value %d\n",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
@@ -798,9 +806,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
*/
if (amdgpu_bad_page_threshold != 0 &&
control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
- control->ras_num_recs < ras->bad_page_cnt_threshold)
+ control->ras_num_bad_pages < ras->bad_page_cnt_threshold)
control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
- control->ras_num_recs) * 100) /
+ control->ras_num_bad_pages) * 100) /
ras->bad_page_cnt_threshold;
/* Recalc the checksum.
@@ -841,7 +849,7 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
- int res;
+ int res, i;
if (!__is_ras_eeprom_supported(adev))
return 0;
@@ -855,6 +863,10 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
return -EINVAL;
}
+ /* set the new channel index flag */
+ for (i = 0; i < num; i++)
+ record[i].retired_page |= UMC_CHANNEL_IDX_V2;
+
mutex_lock(&control->ras_tbl_mutex);
res = amdgpu_ras_eeprom_append_table(control, record, num);
@@ -864,6 +876,11 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
amdgpu_ras_debugfs_set_ret_size(control);
mutex_unlock(&control->ras_tbl_mutex);
+
+ /* clear channel index flag, the flag is only saved on eeprom */
+ for (i = 0; i < num; i++)
+ record[i].retired_page &= ~UMC_CHANNEL_IDX_V2;
+
return res;
}
@@ -1373,9 +1390,35 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
}
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+ return 0;
+}
+
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int res;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ /* Verify i2c adapter is initialized */
+ if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+ return -ENOENT;
+
+ if (!__get_eeprom_i2c_addr(adev, control))
+ return -EINVAL;
+
+ if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA)
+ control->ras_num_bad_pages = control->ras_num_recs;
+ else
+ control->ras_num_bad_pages =
+ control->ras_num_recs * adev->umc.retire_unit;
+
if (hdr->header == RAS_TABLE_HDR_VAL) {
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
- control->ras_num_recs);
+ control->ras_num_bad_pages);
if (hdr->version == RAS_TABLE_VER_V2_1) {
res = __read_table_ras_info(control);
@@ -1390,9 +1433,9 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
/* Warn if we are at 90% of the threshold or above
*/
- if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold)
+ if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
- control->ras_num_recs,
+ control->ras_num_bad_pages,
ras->bad_page_cnt_threshold);
} else if (hdr->header == RAS_TABLE_HDR_BAD &&
amdgpu_bad_page_threshold != 0) {
@@ -1403,10 +1446,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
}
res = __verify_ras_table_checksum(control);
- if (res)
- DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
+ if (res) {
+ dev_err(adev->dev, "RAS Table incorrect checksum or error:%d\n",
res);
- if (ras->bad_page_cnt_threshold > control->ras_num_recs) {
+ return -EINVAL;
+ }
+ if (ras->bad_page_cnt_threshold > control->ras_num_bad_pages) {
/* This means that, the threshold was increased since
* the last time the system was booted, and now,
* ras->bad_page_cnt_threshold - control->num_recs > 0,
@@ -1416,13 +1461,13 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
dev_info(adev->dev,
"records:%d threshold:%d, resetting "
"RAS table header signature",
- control->ras_num_recs,
+ control->ras_num_bad_pages,
ras->bad_page_cnt_threshold);
res = amdgpu_ras_eeprom_correct_header_tag(control,
RAS_TABLE_HDR_VAL);
} else {
dev_err(adev->dev, "RAS records:%d exceed threshold:%d",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
if (amdgpu_bad_page_threshold == -1) {
dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1.");
res = 0;
@@ -1431,7 +1476,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
dev_err(adev->dev,
"RAS records:%d exceed threshold:%d, "
"GPU will not be initialized. Replace this GPU or increase the threshold",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
}
}
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index b9ebda577797..81d55cb7b397 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -43,6 +43,19 @@ enum amdgpu_ras_eeprom_err_type {
AMDGPU_RAS_EEPROM_ERR_COUNT,
};
+/*
+ * one UMC MCA address could map to multiply physical address (PA),
+ * such as 1:16, we use eeprom_table_record.address to store MCA
+ * address and use eeprom_table_record.retired_page to save PA.
+ *
+ * AMDGPU_RAS_EEPROM_REC_PA: one record store one PA
+ * AMDGPU_RAS_EEPROM_REC_MCA: one record store one MCA address
+ */
+enum amdgpu_ras_eeprom_rec_type {
+ AMDGPU_RAS_EEPROM_REC_PA,
+ AMDGPU_RAS_EEPROM_REC_MCA,
+};
+
struct amdgpu_ras_eeprom_table_header {
uint32_t header;
uint32_t version;
@@ -82,6 +95,11 @@ struct amdgpu_ras_eeprom_control {
*/
u32 ras_num_recs;
+ /* the bad page number is ras_num_recs or
+ * ras_num_recs * umc.retire_unit
+ */
+ u32 ras_num_bad_pages;
+
/* First record index to read, 0-based.
* Range is [0, num_recs-1]. This is
* an absolute index, starting right after
@@ -102,6 +120,7 @@ struct amdgpu_ras_eeprom_control {
/* Record channel info which occurred bad pages
*/
u32 bad_channel_bitmap;
+ enum amdgpu_ras_eeprom_rec_type rec_type;
};
/*
@@ -145,6 +164,8 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co
void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
+
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index a0acb65f4b40..dabfbdf6f1ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -183,6 +183,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_init(adev);
break;
@@ -206,6 +207,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_fini(adev);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 36fc9578c53c..dee5a1b4e572 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -462,8 +462,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size,
enum amdgpu_ib_pool_type pool,
struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f);
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f);
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ib *ibs, struct amdgpu_job *job,
struct dma_fence **f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 10df731998b2..39070b2a4c04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -93,8 +93,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
return 0;
}
-void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo,
- struct dma_fence *fence)
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence)
{
if (sa_bo == NULL || *sa_bo == NULL) {
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 113f0d242618..174badca27e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -219,9 +219,11 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));
if (instance == 0)
err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s.bin", ucode_prefix);
else
err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s%d.bin", ucode_prefix, instance);
if (err)
goto out;
@@ -261,6 +263,8 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
IP_VERSION(4, 4, 2) ||
amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
IP_VERSION(4, 4, 5)) &&
adev->firmware.load_type ==
AMDGPU_FW_LOAD_PSP &&
@@ -358,13 +362,13 @@ static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
if (!adev)
return -ENODEV;
- mask = (1 << adev->sdma.num_instances) - 1;
+ mask = BIT_ULL(adev->sdma.num_instances) - 1;
if ((val & mask) == 0)
return -EINVAL;
for (i = 0; i < adev->sdma.num_instances; ++i) {
ring = &adev->sdma.instance[i].ring;
- if (val & (1 << i))
+ if (val & BIT_ULL(i))
ring->sched.ready = true;
else
ring->sched.ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 9f922ec50ea2..ff286940ab43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -61,7 +61,7 @@
#include "amdgpu_res_cursor.h"
#include "bif/bif_4_1_d.h"
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
@@ -1762,7 +1762,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
if (!adev->bios &&
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)))
reserve_size = max(reserve_size, (uint32_t)280 << 20);
else if (!reserve_size)
reserve_size = DISCOVERY_TMR_OFFSET;
@@ -2065,6 +2066,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
DRM_INFO("amdgpu: ttm finalized\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 2852a6064c9a..461fb8090ae0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,15 +26,15 @@
#include <linux/dma-direction.h>
#include <drm/gpu_scheduler.h>
+#include <drm/ttm/ttm_placement.h>
#include "amdgpu_vram_mgr.h"
-#include "amdgpu.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
-#define __AMDGPU_PL_LAST (TTM_PL_PRIV + 4)
+#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5)
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 4c7b53648a50..cf700824b960 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -1434,6 +1434,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type,
*
* @adev: amdgpu device
* @fw: pointer to load firmware to
+ * @required: whether the firmware is required
* @fmt: firmware name format string
* @...: variable arguments
*
@@ -1442,7 +1443,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type,
* the error code to -ENODEV, so that early_init functions will fail to load.
*/
int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
- const char *fmt, ...)
+ enum amdgpu_ucode_required required, const char *fmt, ...)
{
char fname[AMDGPU_UCODE_NAME_MAX];
va_list ap;
@@ -1456,16 +1457,24 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
return -EOVERFLOW;
}
- r = request_firmware(fw, fname, adev->dev);
+ if (required == AMDGPU_UCODE_REQUIRED)
+ r = request_firmware(fw, fname, adev->dev);
+ else {
+ r = firmware_request_nowarn(fw, fname, adev->dev);
+ if (r)
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname);
+ }
if (r)
return -ENODEV;
r = amdgpu_ucode_validate(*fw);
- if (r) {
+ if (r)
+ /*
+ * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release()
+ * regardless of success/failure, and the amdgpu_ucode_release() takes care of
+ * firmware release and need to avoid redundant release FW operation here.
+ */
dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname);
- release_firmware(*fw);
- *fw = NULL;
- }
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 4150ec0aa10d..4eedd92f000b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -126,6 +126,7 @@ enum psp_fw_type {
PSP_FW_TYPE_PSP_DBG_DRV,
PSP_FW_TYPE_PSP_RAS_DRV,
PSP_FW_TYPE_PSP_IPKEYMGR_DRV,
+ PSP_FW_TYPE_PSP_SPDM_DRV,
PSP_FW_TYPE_MAX_INDEX,
};
@@ -551,6 +552,11 @@ enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
};
+enum amdgpu_ucode_required {
+ AMDGPU_UCODE_OPTIONAL,
+ AMDGPU_UCODE_REQUIRED,
+};
+
/* conform to smu_ucode_xfer_cz.h */
#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
@@ -604,9 +610,9 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
-__printf(3, 4)
+__printf(4, 5)
int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
- const char *fmt, ...);
+ enum amdgpu_ucode_required required, const char *fmt, ...);
void amdgpu_ucode_release(const struct firmware **fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 896f3609b0ee..eafe20d8fe0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -78,7 +78,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
+ err_data.err_addr_cnt, false);
amdgpu_ras_save_bad_pages(adev, NULL);
}
@@ -166,10 +166,11 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if ((amdgpu_bad_page_threshold != 0) &&
err_data->err_addr_cnt) {
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
- err_data->err_addr_cnt);
+ err_data->err_addr_cnt, false);
amdgpu_ras_save_bad_pages(adev, &err_count);
- amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev,
+ con->eeprom_control.ras_num_bad_pages);
if (con->update_channel_flag == true) {
amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
@@ -444,3 +445,77 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
return ret;
}
+
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr)
+{
+ struct ta_ras_query_address_output addr_out;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ addr_out.pa.pa = pa_addr;
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL,
+ &addr_out, false);
+ else
+ return -EINVAL;
+}
+
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len)
+{
+ int i, ret;
+ struct ras_err_data err_data;
+
+ err_data.err_addr = kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n");
+ return 0;
+ }
+
+ ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ if (i >= len)
+ goto out;
+
+ pfns[i] = err_data.err_addr[i].retired_page;
+ }
+ ret = i;
+
+out:
+ kfree(err_data.err_addr);
+ return ret;
+}
+
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr)
+{
+ struct ta_ras_query_address_input addr_in;
+ int ret;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch;
+ addr_in.ma.umc_inst = umc;
+ addr_in.ma.node_inst = node;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+ addr_out, dump_addr);
+ if (ret)
+ return ret;
+ } else {
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index ce4179db2a6d..a4a7e61817aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -54,6 +54,22 @@
/* Page retirement tag */
#define UMC_ECC_NEW_DETECTED_TAG 0x1
+/*
+ * a flag to indicate v2 of channel index stored in eeprom
+ *
+ * v1 (legacy way): store channel index within a umc instance in eeprom
+ * range in UMC v12: 0 ~ 7
+ * v2: store global channel index in eeprom
+ * range in UMC v12: 0 ~ 127
+ *
+ * NOTE: it's better to store it in eeprom_table_record.mem_channel,
+ * but there is only 8 bits in mem_channel, and the channel number may
+ * increase in the future, we decide to save it in
+ * eeprom_table_record.retired_page. retired_page is useless in v2,
+ * we depend on eeprom_table_record.address instead of retired_page in v2.
+ * Only 48 bits are saved on eeprom, use bit 47 here.
+ */
+#define UMC_CHANNEL_IDX_V2 BIT_ULL(47)
typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
uint32_t umc_inst, uint32_t ch_inst, void *data);
@@ -70,6 +86,13 @@ struct amdgpu_umc_ras {
enum amdgpu_mca_error_type type, void *ras_error_status);
int (*update_ecc_status)(struct amdgpu_device *adev,
uint64_t status, uint64_t ipid, uint64_t addr);
+ int (*convert_ras_err_addr)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr);
+ uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page);
};
struct amdgpu_umc_funcs {
@@ -134,4 +157,12 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
void *ras_error_status);
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr);
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len);
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index bd2d3863c3ed..dde15c6a96e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -587,7 +587,8 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
break;
}
- r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (r) {
release_firmware(adev->umsch_mm.fw);
adev->umsch_mm.fw = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 31fd30dcd593..74758b5ffc6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
return -EINVAL;
}
- r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
fw_name);
@@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+ if (abo->placements[i].mem_type == TTM_PL_VRAM)
+ abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 599d3ca4e0ef..b9060bcd4806 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
return -EINVAL;
}
- r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
fw_name);
@@ -503,7 +503,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[i] = 0x0;
r = amdgpu_job_submit_direct(job, ring, &f);
- amdgpu_ib_free(ring->adev, &ib_msg, f);
+ amdgpu_ib_free(&ib_msg, f);
if (r)
goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 3e94c3ba1ba2..83faf6e6788a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -62,6 +62,7 @@
#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin"
#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin"
#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin"
+#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@@ -88,6 +89,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_5);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@@ -99,11 +101,15 @@ int amdgpu_vcn_early_init(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6))
- r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s_%d.bin", ucode_prefix, i);
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_%d.bin", ucode_prefix, i);
else
- r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s.bin", ucode_prefix);
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
if (r) {
- amdgpu_ucode_release(&adev->vcn.fw[i]);
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
return r;
}
}
@@ -151,7 +157,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
adev->vcn.using_unified_queue =
amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0);
- hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[0].fw->data;
adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
/* Bit 20-23, it is encode major and non-zero for new naming convention.
@@ -270,7 +276,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
- amdgpu_ucode_release(&adev->vcn.fw[j]);
+ amdgpu_ucode_release(&adev->vcn.inst[j].fw);
}
mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
@@ -282,7 +288,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
{
bool ret = false;
- int vcn_config = adev->vcn.vcn_config[vcn_instance];
+ int vcn_config = adev->vcn.inst[vcn_instance].vcn_config;
if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
ret = true;
@@ -362,12 +368,12 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
const struct common_firmware_header *hdr;
unsigned int offset;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
memcpy_toio(adev->vcn.inst[i].cpu_addr,
- adev->vcn.fw[i]->data + offset,
+ adev->vcn.inst[i].fw->data + offset,
le32_to_cpu(hdr->ucode_size_bytes));
drm_dev_exit(idx);
}
@@ -580,7 +586,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -591,7 +597,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
err:
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
@@ -773,7 +779,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -784,7 +790,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
err:
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
@@ -1014,7 +1020,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = 0;
error:
- amdgpu_ib_free(adev, &ib, fence);
+ amdgpu_ib_free(&ib, fence);
dma_fence_put(fence);
return r;
@@ -1025,7 +1031,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
long r;
- if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) {
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) &&
+ (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) {
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
if (r)
goto error;
@@ -1063,7 +1070,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
/* currently only support 2 FW instances */
if (i >= 2) {
dev_info(adev->dev, "More then 2 VCN FW instances!\n");
@@ -1071,12 +1078,14 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
}
idx = AMDGPU_UCODE_ID_VCN + i;
adev->firmware.ucode[idx].ucode_id = idx;
- adev->firmware.ucode[idx].fw = adev->vcn.fw[i];
+ adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
- IP_VERSION(4, 0, 3))
+ IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, UVD_HWIP, 0) ==
+ IP_VERSION(5, 0, 1))
break;
}
}
@@ -1320,3 +1329,71 @@ void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
}
}
+
+/*
+ * debugfs to enable/disable vcn job submission to specific core or
+ * instance. It is created only if the queue type is unified.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->vcn.num_vcn_inst) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (val & (1ULL << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops,
+ amdgpu_debugfs_vcn_sched_mask_get,
+ amdgpu_debugfs_vcn_sched_mask_set, "%llx\n");
+#endif
+
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.using_unified_queue)
+ return;
+ sprintf(name, "amdgpu_vcn_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_vcn_sched_mask_fops);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 1e32311c1dff..adaf4388ad28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -163,20 +163,30 @@
#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
uint32_t internal_reg_offset, addr; \
- bool video_range, aon_range; \
+ bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
addr <<= 2; \
video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \
((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \
+ video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600))))); \
aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \
((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \
+ aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600))))); \
if (video_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \
(VCN_VID_IP_ADDRESS)); \
else if (aon_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \
(VCN_AON_IP_ADDRESS)); \
+ else if (video1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) + \
+ (VCN_VID_IP_ADDRESS)); \
+ else if (aon1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) + \
+ (VCN_AON_IP_ADDRESS)); \
else \
internal_reg_offset = (0xFFFFF & addr); \
\
@@ -297,6 +307,9 @@ struct amdgpu_vcn_inst {
atomic_t dpg_enc_submission_cnt;
struct amdgpu_vcn_fw_shared fw_shared;
uint8_t aid_id;
+ const struct firmware *fw; /* VCN firmware */
+ uint8_t vcn_config;
+ uint32_t vcn_codec_disable_mask;
};
struct amdgpu_vcn_ras {
@@ -306,15 +319,12 @@ struct amdgpu_vcn_ras {
struct amdgpu_vcn {
unsigned fw_version;
struct delayed_work idle_work;
- const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */
unsigned num_enc_rings;
enum amd_powergating_state cur_state;
bool indirect_sram;
uint8_t num_vcn_inst;
struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
- uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES];
- uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES];
struct amdgpu_vcn_reg internal;
struct mutex vcn_pg_lock;
struct mutex vcn1_jpeg1_workaround;
@@ -523,5 +533,6 @@ int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev);
int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev);
void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index c704e9803e11..0af469ec6fcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -1263,12 +1263,10 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
return 0;
- tmp = kmalloc(used_size, GFP_KERNEL);
+ tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
if (!tmp)
return -ENOMEM;
- memcpy(tmp, &host_telemetry->body.error_count, used_size);
-
if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 8bf28d336807..03308261f894 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -632,13 +632,13 @@ static bool amdgpu_vkms_is_idle(void *handle)
return true;
}
-static int amdgpu_vkms_set_clockgating_state(void *handle,
+static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int amdgpu_vkms_set_powergating_state(void *handle,
+static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8d9bf7a0857f..5c07777d3239 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -36,6 +36,7 @@
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
#include "amdgpu.h"
+#include "amdgpu_vm.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_gmc.h"
@@ -311,6 +312,111 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
}
/**
+ * amdgpu_vm_update_shared - helper to update shared memory stat
+ * @base: base structure for tracking BO usage in a VM
+ *
+ * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
+ * as well.
+ */
+static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
+{
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ uint64_t size = amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+ bool shared;
+
+ spin_lock(&vm->status_lock);
+ shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ if (base->shared != shared) {
+ base->shared = shared;
+ if (shared) {
+ vm->stats[bo_memtype].drm.shared += size;
+ vm->stats[bo_memtype].drm.private -= size;
+ } else {
+ vm->stats[bo_memtype].drm.shared -= size;
+ vm->stats[bo_memtype].drm.private += size;
+ }
+ }
+ spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
+ * @bo: amdgpu buffer object
+ *
+ * Update the per VM stats for all the vm if needed from private to shared or
+ * vice versa.
+ */
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *base;
+
+ for (base = bo->vm_bo; base; base = base->next)
+ amdgpu_vm_update_shared(base);
+}
+
+/**
+ * amdgpu_vm_update_stats_locked - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Caller need to have the vm status_lock held. Useful for when multiple update
+ * need to happen at the same time.
+ */
+static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
+{
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ int64_t size = sign * amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+
+ /* For drm-total- and drm-shared-, BO are accounted by their preferred
+ * placement, see also amdgpu_bo_mem_stats_placement.
+ */
+ if (base->shared)
+ vm->stats[bo_memtype].drm.shared += size;
+ else
+ vm->stats[bo_memtype].drm.private += size;
+
+ if (res && res->mem_type < __AMDGPU_PL_NUM) {
+ uint32_t res_memtype = res->mem_type;
+
+ vm->stats[res_memtype].drm.resident += size;
+ /* BO only count as purgeable if it is resident,
+ * since otherwise there's nothing to purge.
+ */
+ if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
+ vm->stats[res_memtype].drm.purgeable += size;
+ if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
+ vm->stats[bo_memtype].evicted += size;
+ }
+}
+
+/**
+ * amdgpu_vm_update_stats - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Updates the basic memory stat when bo is added/deleted/moved.
+ */
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
+{
+ struct amdgpu_vm *vm = base->vm;
+
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(base, res, sign);
+ spin_unlock(&vm->status_lock);
+}
+
+/**
* amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
*
* @base: base structure for tracking BO usage in a VM
@@ -333,6 +439,11 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
+ spin_lock(&vm->status_lock);
+ base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
+ spin_unlock(&vm->status_lock);
+
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
@@ -674,12 +785,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
- if (adev->gfx.enable_cleaner_shader &&
- ring->funcs->emit_cleaner_shader &&
- job->enforce_isolation)
- ring->funcs->emit_cleaner_shader(ring);
-
- if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
+ if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
+ !(job->enforce_isolation && !job->vmid))
return 0;
amdgpu_ring_ib_begin(ring);
@@ -690,6 +797,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
+ if (adev->gfx.enable_cleaner_shader &&
+ ring->funcs->emit_cleaner_shader &&
+ job->enforce_isolation)
+ ring->funcs->emit_cleaner_shader(ring);
+
if (vm_flush_needed) {
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
@@ -1082,53 +1194,11 @@ error_free:
return r;
}
-static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
- struct amdgpu_mem_stats *stats,
- unsigned int size)
-{
- struct amdgpu_vm *vm = bo_va->base.vm;
- struct amdgpu_bo *bo = bo_va->base.bo;
-
- if (!bo)
- return;
-
- /*
- * For now ignore BOs which are currently locked and potentially
- * changing their location.
- */
- if (!amdgpu_vm_is_bo_always_valid(vm, bo) &&
- !dma_resv_trylock(bo->tbo.base.resv))
- return;
-
- amdgpu_bo_get_memory(bo, stats, size);
- if (!amdgpu_vm_is_bo_always_valid(vm, bo))
- dma_resv_unlock(bo->tbo.base.resv);
-}
-
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
- struct amdgpu_mem_stats *stats,
- unsigned int size)
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
{
- struct amdgpu_bo_va *bo_va, *tmp;
-
spin_lock(&vm->status_lock);
- list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats, size);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats, size);
+ memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
spin_unlock(&vm->status_lock);
}
@@ -1265,10 +1335,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
* next command submission.
*/
if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
- uint32_t mem_type = bo->tbo.resource->mem_type;
-
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
+ if (bo->tbo.resource &&
+ !(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
@@ -2075,6 +2144,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
if (*base != &bo_va->base)
continue;
+ amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
*base = bo_va->base.next;
break;
}
@@ -2143,14 +2213,12 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
/**
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
- * @adev: amdgpu_device pointer
* @bo: amdgpu buffer object
* @evicted: is the BO evicted
*
* Mark @bo as invalid.
*/
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted)
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
{
struct amdgpu_vm_bo_base *bo_base;
@@ -2176,6 +2244,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_bo_move - handle BO move
+ *
+ * @bo: amdgpu buffer object
+ * @new_mem: the new placement of the BO move
+ * @evicted: is the BO evicted
+ *
+ * Update the memory stats for the new placement and mark @bo as invalid.
+ */
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted)
+{
+ struct amdgpu_vm_bo_base *bo_base;
+
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
+ amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
+ spin_unlock(&vm->status_lock);
+ }
+
+ amdgpu_vm_bo_invalidate(bo, evicted);
+}
+
+/**
* amdgpu_vm_get_block_size - calculate VM page table size as power of two
*
* @vm_size: VM size
@@ -2594,6 +2688,16 @@ void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->is_compute_context = false;
}
+static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
+{
+ for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
+ if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
+ vm->stats[i].evicted == 0))
+ return false;
+ }
+ return true;
+}
+
/**
* amdgpu_vm_fini - tear down a vm instance
*
@@ -2617,7 +2721,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
- amdgpu_vm_put_task_info(vm->task_info);
amdgpu_vm_set_pasid(adev, vm, 0);
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
@@ -2666,6 +2769,16 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+
+ if (!amdgpu_vm_stats_is_zero(vm)) {
+ struct amdgpu_task_info *ti = vm->task_info;
+
+ dev_warn(adev->dev,
+ "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
+ ti->process_name, ti->pid, ti->task_name, ti->tgid);
+ }
+
+ amdgpu_vm_put_task_info(vm->task_info);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 5d119ac26c4f..a3e128e373bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -35,6 +35,7 @@
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"
+#include "amdgpu_ttm.h"
struct drm_exec;
@@ -202,9 +203,13 @@ struct amdgpu_vm_bo_base {
/* protected by bo being reserved */
struct amdgpu_vm_bo_base *next;
- /* protected by spinlock */
+ /* protected by vm status_lock */
struct list_head vm_status;
+ /* if the bo is counted as shared in mem stats
+ * protected by vm status_lock */
+ bool shared;
+
/* protected by the BO being reserved */
bool moved;
};
@@ -324,10 +329,7 @@ struct amdgpu_vm_fault_info {
struct amdgpu_mem_stats {
struct drm_memory_stats drm;
- /* buffers that requested this placement */
- uint64_t requested;
- /* buffers that requested this placement
- * but are currently evicted */
+ /* buffers that requested this placement but are currently evicted */
uint64_t evicted;
};
@@ -345,6 +347,9 @@ struct amdgpu_vm {
/* Lock to protect vm_bo add/del/move on all lists of vm */
spinlock_t status_lock;
+ /* Memory statistics for this vm, protected by status_lock */
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+
/* Per-VM and PT BOs who needs a validation */
struct list_head evicted;
@@ -524,8 +529,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *new_res, int sign);
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
@@ -576,8 +585,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
- struct amdgpu_mem_stats *stats,
- unsigned int size);
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]);
int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_vm *vmbo, bool immediate);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index f78a0434a48f..b0bf21682115 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -537,6 +537,7 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
if (!entry->bo)
return;
+ amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1);
entry->bo->vm_bo = NULL;
ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 110b120d7375..121ee17b522b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -236,7 +236,8 @@ int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe)
int ret;
amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix));
- ret = amdgpu_ucode_request(adev, &adev->vpe.fw, "amdgpu/%s.bin", fw_prefix);
+ ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", fw_prefix);
if (ret)
goto out;
@@ -646,16 +647,16 @@ static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
return r;
}
-static int vpe_set_clockgating_state(void *handle,
+static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int vpe_set_powergating_state(void *handle,
+static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_vpe *vpe = &adev->vpe;
if (!adev->pm.dpm_enabled)
@@ -833,7 +834,7 @@ static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 7d26a962f811..ff5e52025266 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -567,7 +567,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
else
remaining_size -= size;
}
- mutex_unlock(&mgr->lock);
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
struct drm_buddy_block *dcc_block;
@@ -584,6 +583,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
(u64)vres->base.size,
&vres->blocks);
}
+ mutex_unlock(&mgr->lock);
vres->base.start = 0;
size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index e2cb1f080e88..08d6787893b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -2161,13 +2161,13 @@ static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int cik_common_set_clockgating_state(void *handle,
+static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_common_set_powergating_state(void *handle,
+static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 1da17755ad53..444563486769 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -402,13 +402,13 @@ static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int cik_ih_set_clockgating_state(void *handle,
+static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_ih_set_powergating_state(void *handle,
+static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index ede1a028d48d..d9bd8f3f17e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -133,9 +133,11 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
@@ -696,7 +698,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1189,11 +1191,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int cik_sdma_set_clockgating_state(void *handle,
+static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1204,7 +1206,7 @@ static int cik_sdma_set_clockgating_state(void *handle,
return 0;
}
-static int cik_sdma_set_powergating_state(void *handle,
+static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index d72973bd570d..82586b76aeda 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -398,14 +398,14 @@ static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int cz_ih_set_clockgating_state(void *handle,
+static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
// TODO
return 0;
}
-static int cz_ih_set_powergating_state(void *handle,
+static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
// TODO
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 5098c50d54c8..c5e3d2251b18 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2687,6 +2687,32 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v10_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v10_0_panic_flush,
+};
+
static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2734,6 +2760,7 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs);
return 0;
}
@@ -3302,13 +3329,13 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v10_0_set_clockgating_state(void *handle,
+static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v10_0_set_powergating_state(void *handle,
+static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index c5680ff4ab9f..ea42a4472bf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2800,6 +2800,32 @@ static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v11_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v11_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v11_0_panic_flush,
+};
+
static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2847,6 +2873,7 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v11_0_drm_primary_plane_helper_funcs);
return 0;
}
@@ -3434,13 +3461,13 @@ static int dce_v11_0_hpd_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v11_0_set_clockgating_state(void *handle,
+static int dce_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v11_0_set_powergating_state(void *handle,
+static int dce_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index eb7de9122d99..915804a6a1d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2602,6 +2602,32 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v6_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_ARRAY_MODE(0x7);
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v6_0_panic_flush,
+};
+
static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2629,6 +2655,7 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs);
return 0;
}
@@ -3124,13 +3151,13 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
}
-static int dce_v6_0_set_clockgating_state(void *handle,
+static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v6_0_set_powergating_state(void *handle,
+static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 04b79ff87f75..f2edc0fece5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2613,6 +2613,31 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v8_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+}
+
+static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v8_0_panic_flush,
+};
+
static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2640,6 +2665,7 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs);
return 0;
}
@@ -3212,13 +3238,13 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
}
-static int dce_v8_0_set_clockgating_state(void *handle,
+static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v8_0_set_powergating_state(void *handle,
+static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 24dce803a829..5ba263fe5512 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -45,6 +45,7 @@
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"
/*
@@ -3673,7 +3674,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid);
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
@@ -4036,7 +4037,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -4138,18 +4139,21 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce%s.bin", ucode_prefix, wks);
if (err)
goto out;
@@ -4173,6 +4177,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec%s.bin", ucode_prefix, wks);
if (err)
goto out;
@@ -4180,6 +4185,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
if (!err) {
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
@@ -4733,6 +4739,23 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
break;
}
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 64 &&
+ adev->gfx.pfp_fw_version >= 100 &&
+ adev->gfx.mec_fw_version >= 122) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
default:
adev->gfx.enable_cleaner_shader = false;
break;
@@ -5952,7 +5975,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
else
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- if (adev->job_hang && !enable)
+ if (amdgpu_in_reset(adev) && !enable)
return 0;
for (i = 0; i < adev->usec_timeout; i++) {
@@ -6599,17 +6622,13 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
break;
}
}
@@ -7457,7 +7476,7 @@ static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
* otherwise the gfxoff disallowing will be failed to set.
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
- gfx_v10_0_set_powergating_state(ip_block->adev, AMD_PG_STATE_UNGATE);
+ gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
@@ -8345,10 +8364,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -8383,10 +8402,10 @@ static int gfx_v10_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v10_0_set_clockgating_state(void *handle,
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
new file mode 100644
index 000000000000..663c2572d440
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_10_3_0 */
+static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020002,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803080,
+ 0xbe813080, 0xbe823080,
+ 0xbe833080, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
new file mode 100644
index 000000000000..0e1c246166c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// GFX10.3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 2ae058a224f4..56c06b72a70a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -615,7 +615,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
if (!ring->is_mes_queue)
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
if (!ring->is_mes_queue)
@@ -639,6 +639,7 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *
int err = 0;
err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_toc.bin", ucode_prefix);
if (err)
goto out;
@@ -688,6 +689,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", ucode_prefix);
if (err)
goto out;
@@ -705,6 +707,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", ucode_prefix);
if (err)
goto out;
@@ -720,9 +723,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
adev->pdev->revision == 0xCE)
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/gc_11_0_0_rlc_1.bin");
else
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", ucode_prefix);
if (err)
goto out;
@@ -735,6 +740,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", ucode_prefix);
if (err)
goto out;
@@ -1885,6 +1891,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
{
+ u32 rb_bitmap_per_sa;
u32 rb_bitmap_width_per_sa;
u32 max_sa;
u32 active_sa_bitmap;
@@ -1902,9 +1909,11 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se;
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
for (i = 0; i < max_sa; i++) {
if (active_sa_bitmap & (1 << i))
- active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
}
active_rb_bitmap &= global_active_rb_bitmap;
@@ -3918,9 +3927,7 @@ static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
@@ -5458,10 +5465,10 @@ static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
-static int gfx_v11_0_set_powergating_state(void *handle,
+static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -5494,10 +5501,10 @@ static int gfx_v11_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v11_0_set_clockgating_state(void *handle,
+static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -6646,30 +6653,14 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
- int i, r = 0;
+ int r = 0;
if (amdgpu_sriov_vf(adev))
return -EINVAL;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
- mutex_lock(&adev->srbm_mutex);
- soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
- WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
-
- /* make sure dequeue is complete*/
- for (i = 0; i < adev->usec_timeout; i++) {
- if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
- break;
- udelay(1);
- }
- if (i >= adev->usec_timeout)
- r = -ETIMEDOUT;
- soc21_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
if (r) {
- dev_err(adev->dev, "fail to wait on hqd deactivate\n");
+ dev_err(adev->dev, "reset via MMIO failed %d\n", r);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index fe7c48f2fb2a..4b6e05750654 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -513,7 +513,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
if (!ring->is_mes_queue)
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
if (!ring->is_mes_queue)
@@ -537,6 +537,7 @@ static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *
int err = 0;
err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_toc.bin", ucode_prefix);
if (err)
goto out;
@@ -566,6 +567,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", ucode_prefix);
if (err)
goto out;
@@ -573,6 +575,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", ucode_prefix);
if (err)
goto out;
@@ -581,6 +584,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) {
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", ucode_prefix);
if (err)
goto out;
@@ -593,6 +597,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", ucode_prefix);
if (err)
goto out;
@@ -1437,11 +1442,19 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
}
}
- /* TODO: Add queue reset mask when FW fully supports it */
adev->gfx.gfx_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
adev->gfx.compute_supported_reset =
amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if ((adev->gfx.me_fw_version >= 2660) &&
+ (adev->gfx.mec_fw_version >= 2920)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ }
if (!adev->enable_mes_kiq) {
r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
@@ -1610,6 +1623,7 @@ static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
{
+ u32 rb_bitmap_per_sa;
u32 rb_bitmap_width_per_sa;
u32 max_sa;
u32 active_sa_bitmap;
@@ -1627,12 +1641,14 @@ static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se;
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
for (i = 0; i < max_sa; i++) {
if (active_sa_bitmap & (1 << i))
- active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
}
- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
@@ -2832,9 +2848,7 @@ static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
@@ -3864,10 +3878,10 @@ static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
}
#endif
-static int gfx_v12_0_set_powergating_state(void *handle,
+static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -4115,15 +4129,15 @@ static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v12_0_set_clockgating_state(void *handle,
+static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
gfx_v12_0_update_gfx_clock_gating(adev,
@@ -5233,24 +5247,16 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
- int r, i;
+ int r;
if (amdgpu_sriov_vf(adev))
return -EINVAL;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
- mutex_lock(&adev->srbm_mutex);
- soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
- WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
- for (i = 0; i < adev->usec_timeout; i++) {
- if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
- break;
- udelay(1);
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_err(adev->dev, "reset via MMIO failed %d\n", r);
+ return r;
}
- soc24_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
index bcc9c72ccbde..f7184b2dc4e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
@@ -26,4 +26,6 @@
extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block;
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 41f50bf380c4..f26e2cdec07a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -337,6 +337,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
@@ -345,6 +346,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
@@ -353,6 +355,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
@@ -361,6 +364,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
@@ -1906,7 +1910,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
error:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
@@ -3373,11 +3377,11 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v6_0_set_clockgating_state(void *handle,
+static int gfx_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -3395,11 +3399,11 @@ static int gfx_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v6_0_set_powergating_state(void *handle,
+static int gfx_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 824d5913103b..84745b2453ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -934,33 +934,39 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
if (adev->asic_type == CHIP_KAVERI) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2.bin", chip_name);
if (err)
goto out;
}
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
out:
if (err) {
@@ -2324,7 +2330,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
error:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
@@ -4846,11 +4852,11 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v7_0_set_clockgating_state(void *handle,
+static int gfx_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -4869,11 +4875,11 @@ static int gfx_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v7_0_set_powergating_state(void *handle,
+static int gfx_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index b7006c41e270..6a025438f9d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -914,7 +914,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -982,13 +982,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_pfp_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
}
if (err)
@@ -999,13 +1002,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_me_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
}
if (err)
@@ -1017,13 +1023,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_ce_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
}
if (err)
@@ -1044,6 +1053,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->virt.chained_ib_support = false;
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
@@ -1093,13 +1103,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_mec_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", chip_name);
}
if (err)
@@ -1112,13 +1125,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
(adev->asic_type != CHIP_TOPAZ)) {
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_mec2_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2.bin", chip_name);
}
if (!err) {
@@ -1640,7 +1656,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
RREG32(sec_ded_counter_registers[i]);
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
@@ -4304,9 +4320,7 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32(mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
+ WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
@@ -5321,7 +5335,7 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM))
/* Send msg to SMU via Powerplay */
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0);
WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
@@ -5367,10 +5381,10 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
}
}
-static int gfx_v8_0_set_powergating_state(void *handle,
+static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -5625,8 +5639,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t temp, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
@@ -5720,8 +5732,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5731,8 +5741,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
@@ -5813,12 +5821,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
}
gfx_v8_0_wait_for_rlc_serdes(adev);
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
* === MGCG + MGLS + TS(CG/LS) ===
@@ -5832,6 +5840,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
}
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -5982,10 +5992,10 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v8_0_set_clockgating_state(void *handle,
+static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0b6f09f2cc9b..fa572b40989e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1243,7 +1243,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -1429,18 +1429,21 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
int err;
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
@@ -1476,6 +1479,7 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc_am4.bin", chip_name);
else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
(smu_version >= 0x41e2b))
@@ -1483,9 +1487,11 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
*/
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_kicker_rlc.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
@@ -1518,9 +1524,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
- "amdgpu/%s_sjt_mec.bin", chip_name);
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
@@ -1531,9 +1539,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sjt_mec2.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2.bin", chip_name);
if (!err) {
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
@@ -3488,9 +3498,7 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
@@ -4780,7 +4788,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
}
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
@@ -4956,8 +4964,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t data, def;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
@@ -5022,8 +5028,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
}
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
@@ -5034,8 +5038,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (!adev->gfx.num_gfx_rings)
return;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* Enable 3D CGCG/CGLS */
if (enable) {
/* write cmd to clear cgcg/cgls ov */
@@ -5077,8 +5079,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5086,8 +5086,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t def, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset CGCG override */
@@ -5129,13 +5127,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS
* === MGCG + MGLS ===
@@ -5155,6 +5152,7 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === MGCG + MGLS === */
gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
}
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -5232,10 +5230,10 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
};
-static int gfx_v9_0_set_powergating_state(void *handle,
+static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
@@ -5277,10 +5275,10 @@ static int gfx_v9_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v9_0_set_clockgating_state(void *handle,
+static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 3f4fd2f08163..d81449f9d822 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -412,7 +412,7 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
if (r) {
dev_err(adev->dev, "ib submit failed (%d).\n", r);
- amdgpu_ib_free(adev, ib, NULL);
+ amdgpu_ib_free(ib, NULL);
}
return r;
}
@@ -611,16 +611,16 @@ static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
}
disp2_failed:
- amdgpu_ib_free(adev, &disp_ibs[2], NULL);
+ amdgpu_ib_free(&disp_ibs[2], NULL);
dma_fence_put(fences[2]);
disp1_failed:
- amdgpu_ib_free(adev, &disp_ibs[1], NULL);
+ amdgpu_ib_free(&disp_ibs[1], NULL);
dma_fence_put(fences[1]);
disp0_failed:
- amdgpu_ib_free(adev, &disp_ibs[0], NULL);
+ amdgpu_ib_free(&disp_ibs[0], NULL);
dma_fence_put(fences[0]);
pro_end:
- amdgpu_ib_free(adev, &wb_ib, NULL);
+ amdgpu_ib_free(&wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init SGPRS Failed\n");
@@ -687,10 +687,10 @@ static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
}
disp_failed:
- amdgpu_ib_free(adev, &disp_ib, NULL);
+ amdgpu_ib_free(&disp_ib, NULL);
dma_fence_put(fence);
pro_end:
- amdgpu_ib_free(adev, &wb_ib, NULL);
+ amdgpu_ib_free(&wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init VGPRS Failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index e2b3dda57030..2ba185875baa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -43,8 +43,12 @@
MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -52,10 +56,6 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
-#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
-#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
-#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
-
#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */
#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */
#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */
@@ -349,13 +349,17 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG,
GOLDEN_GB_ADDR_CONFIG);
- /* Golden settings applied by driver for ASIC with rev_id 0 */
- if (adev->rev_id == 0) {
- WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
- REDUCE_FIFO_DEPTH_BY_2, 2);
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) {
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, SPARE, 0x1);
} else {
- WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2,
- SPARE, 0x1);
+ /* Golden settings applied by driver for ASIC with rev_id 0 */
+ if (adev->rev_id == 0) {
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
+ REDUCE_FIFO_DEPTH_BY_2, 2);
+ } else {
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2,
+ SPARE, 0x1);
+ }
}
}
}
@@ -499,7 +503,7 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -543,6 +547,7 @@ static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
@@ -574,8 +579,19 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
{
int err;
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
- "amdgpu/%s_mec.bin", chip_name);
+ if (amdgpu_sriov_vf(adev)) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
+
+ if (err)
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
+ } else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
@@ -929,6 +945,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -1779,9 +1796,7 @@ static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
@@ -2764,16 +2779,16 @@ static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
.is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range,
};
-static int gfx_v9_4_3_set_powergating_state(void *handle,
+static int gfx_v9_4_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static int gfx_v9_4_3_set_clockgating_state(void *handle,
+static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, num_xcc;
if (amdgpu_sriov_vf(adev))
@@ -4653,7 +4668,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
- amdgpu_gfx_off_ctrl(adev, false);
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
xcc_offset = xcc_id * reg_count;
for (i = 0; i < reg_count; i++)
@@ -4661,7 +4675,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i],
GET_INST(GC, xcc_id)));
}
- amdgpu_gfx_off_ctrl(adev, true);
/* dump compute queue registers for all instances */
if (!adev->gfx.ip_dump_compute_queues)
@@ -4670,7 +4683,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
- amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->srbm_mutex);
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
xcc_offset = xcc_id * reg_count * num_inst;
@@ -4697,7 +4709,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
}
soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- amdgpu_gfx_off_ctrl(adev, true);
}
static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
@@ -4860,6 +4871,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
/* 9.4.3 removed all the GDS internal memory,
* only support GWS opcode in kernel, like barrier
* semaphore.etc */
@@ -4873,6 +4885,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
/* deprecated for 9.4.3, no usage at all */
adev->gds.gds_compute_max_wave_id = 0;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index ed8e130c7d19..5470cef7e9bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -368,7 +368,9 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
amdgpu_ip_version(adev, GC_HWIP, 0) ==
IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) ==
- IP_VERSION(9, 4, 4));
+ IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 5, 0));
WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 697599c46240..9bedca9a79c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -1088,11 +1088,11 @@ static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int gmc_v10_0_set_clockgating_state(void *handle,
+static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
@@ -1131,7 +1131,7 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags)
athub_v2_0_get_clockgating(adev, flags);
}
-static int gmc_v10_0_set_powergating_state(void *handle,
+static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index f893ab4c14df..72751ab4c766 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -996,11 +996,11 @@ static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int gmc_v11_0_set_clockgating_state(void *handle,
+static int gmc_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
@@ -1018,7 +1018,7 @@ static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags)
athub_v3_0_get_clockgating(adev, flags);
}
-static int gmc_v11_0_set_powergating_state(void *handle,
+static int gmc_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index d22b027fd0bb..b749f1c3f6a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -40,7 +40,7 @@
#include "gfxhub_v12_0.h"
#include "mmhub_v4_1_0.h"
#include "athub_v4_1_0.h"
-
+#include "umc_v8_14.h"
static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
@@ -581,6 +581,18 @@ static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev)
{
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 14, 0):
+ adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev);
+ adev->umc.node_inst_num = 0;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET;
+ adev->umc.ras = &umc_v8_14_ras;
+ break;
+ default:
+ break;
+ }
}
@@ -829,6 +841,10 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_vm_manager_init(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -980,11 +996,11 @@ static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int gmc_v12_0_set_clockgating_state(void *handle,
+static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
@@ -1002,7 +1018,7 @@ static void gmc_v12_0_get_clockgating_state(void *handle, u64 *flags)
athub_v4_1_0_get_clockgating(adev, flags);
}
-static int gmc_v12_0_set_powergating_state(void *handle,
+static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index ca000b3d1afc..2245dda92021 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -131,7 +131,8 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
chip_name = "si58";
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
dev_err(adev->dev,
"si_mc: Failed to load firmware \"%s_mc.bin\"\n",
@@ -1094,13 +1095,13 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int gmc_v6_0_set_clockgating_state(void *handle,
+static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int gmc_v6_0_set_powergating_state(void *handle,
+static int gmc_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index b6016f11956e..9aac4b1101e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -157,7 +157,8 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
amdgpu_ucode_release(&adev->gmc.fw);
@@ -1317,11 +1318,11 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int gmc_v7_0_set_clockgating_state(void *handle,
+static int gmc_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1337,7 +1338,7 @@ static int gmc_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v7_0_set_powergating_state(void *handle,
+static int gmc_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 12d5967ecd45..d06585207c33 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -259,7 +259,8 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
amdgpu_ucode_release(&adev->gmc.fw);
@@ -1658,10 +1659,10 @@ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev,
}
}
-static int gmc_v8_0_set_clockgating_state(void *handle,
+static int gmc_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1679,7 +1680,7 @@ static int gmc_v8_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v8_0_set_powergating_state(void *handle,
+static int gmc_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 50c5da3020cb..291549765c38 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -623,6 +623,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
}
}
+ if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))
+ return 1;
+
if (!printk_ratelimit())
return 0;
@@ -645,7 +648,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
soc15_ih_clientid_name[entry->client_id]);
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n",
node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
@@ -795,7 +799,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
{
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
return false;
return ((vmhub == AMDGPU_MMHUB0(0) ||
@@ -1138,12 +1143,13 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
struct amdgpu_vm *vm = mapping->bo_va->base.vm;
unsigned int mtype_local, mtype;
+ uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
bool snoop = false;
bool is_local;
dma_resv_assert_held(bo->tbo.base.resv);
- switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ switch (gc_ip_version) {
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
if (is_vram) {
@@ -1157,10 +1163,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
/* FIXME: is this still needed? Or does
* amdgpu_ttm_tt_pde_flags already handle this?
*/
- if ((amdgpu_ip_version(adev, GC_HWIP, 0) ==
- IP_VERSION(9, 4, 2) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) ==
- IP_VERSION(9, 4, 3)) &&
+ if (gc_ip_version == IP_VERSION(9, 4, 2) &&
adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
@@ -1184,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
/* Only local VRAM BOs or system memory on non-NUMA APUs
* can be assumed to be local in their entirety. Choose
* MTYPE_NC as safe fallback for all system memory BOs on
@@ -1208,7 +1212,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
if (uncached) {
mtype = MTYPE_UC;
} else if (ext_coherent) {
- if (adev->rev_id)
+ if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id)
mtype = is_local ? MTYPE_CC : MTYPE_UC;
else
mtype = MTYPE_UC;
@@ -1218,10 +1222,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
/* dGPU */
if (is_local)
mtype = mtype_local;
- else if (is_vram)
- mtype = MTYPE_NC;
- else
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram)
mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
}
break;
@@ -1275,7 +1279,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
* memory can use more efficient MTYPEs.
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) &&
- amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4))
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 5, 0))
return;
/* Only direct-mapped memory allows us to determine the NUMA node from
@@ -1540,6 +1545,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
adev->mmhub.ras = &mmhub_v1_7_ras;
break;
case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
adev->mmhub.ras = &mmhub_v1_8_ras;
break;
default:
@@ -1551,7 +1557,8 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
adev->gfxhub.funcs = &gfxhub_v1_2_funcs;
else
adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
@@ -1619,7 +1626,8 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
adev->gmc.xgmi.supported = true;
if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) {
@@ -1792,6 +1800,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -2070,7 +2079,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
spin_lock_init(&adev->gmc.invalidate_lock);
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) {
gmc_v9_4_3_init_vram_info(adev);
} else if (!adev->bios) {
if (adev->flags & AMD_IS_APU) {
@@ -2154,6 +2164,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
NUM_XCC(adev->gfx.xcc_mask));
@@ -2220,7 +2231,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
amdgpu_gmc_get_vbios_allocations(adev);
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) {
r = gmc_v9_0_init_mem_ranges(adev);
if (r)
return r;
@@ -2250,7 +2262,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) ?
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ?
3 :
8;
@@ -2263,7 +2276,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
return r;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
amdgpu_gmc_sysfs_init(adev);
return 0;
@@ -2274,7 +2288,8 @@ static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
amdgpu_gmc_sysfs_fini(adev);
amdgpu_gmc_ras_fini(adev);
@@ -2544,10 +2559,10 @@ static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int gmc_v9_0_set_clockgating_state(void *handle,
+static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->set_clockgating(adev, state);
@@ -2565,7 +2580,7 @@ static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags)
athub_v1_0_get_clockgating(adev, flags);
}
-static int gmc_v9_0_set_powergating_state(void *handle,
+static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index e019249883fb..194026e9be33 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -40,10 +40,12 @@
static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
+ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ }
}
static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
@@ -54,11 +56,13 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5))
return;
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
- else
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
+ } else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+ }
}
static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index ed7facacf2fe..d3962d469088 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -31,10 +31,12 @@
static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
+ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ }
}
static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
@@ -42,6 +44,7 @@ static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
{
if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
} else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
index 29c3484ae1f1..f52552c5fa27 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -31,13 +31,15 @@
static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
0);
- else
+ RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
amdgpu_ring_emit_wreg(ring,
(adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
0);
+ }
}
static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
index 33736d361dd0..6948fe9956ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
@@ -34,10 +34,12 @@
static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
+ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ }
}
static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
index 1c99bb09e2a1..63820329f67e 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
@@ -31,10 +31,12 @@
static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
+ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ }
}
static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index 7f45e93c0397..8ac3d3282268 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -392,13 +392,13 @@ static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int iceland_ih_set_clockgating_state(void *handle,
+static int iceland_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int iceland_ih_set_powergating_state(void *handle,
+static int iceland_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index 38f953fd65d9..f8a485164437 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -693,10 +693,10 @@ static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int ih_v6_0_set_clockgating_state(void *handle,
+static int ih_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v6_0_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -756,10 +756,10 @@ static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
-static int ih_v6_0_set_powergating_state(void *handle,
+static int ih_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
index 61381e0c3795..dd0042efceec 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -674,10 +674,10 @@ static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev,
return;
}
-static int ih_v6_1_set_clockgating_state(void *handle,
+static int ih_v6_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v6_1_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -737,10 +737,10 @@ static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev,
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
-static int ih_v6_1_set_powergating_state(void *handle,
+static int ih_v6_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
index d2428cf5d385..8f9b15c171f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
@@ -664,10 +664,10 @@ static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev,
return;
}
-static int ih_v7_0_set_clockgating_state(void *handle,
+static int ih_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v7_0_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -727,10 +727,10 @@ static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
-static int ih_v7_0_set_powergating_state(void *handle,
+static int ih_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index d4f72e47ae9e..aeca5c08ea2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -50,7 +50,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
index 1341f0292031..df898dbb746e 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
@@ -47,7 +47,8 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index 7319299f25ae..03b8b7cd5229 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -604,7 +604,7 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->jpeg.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
int cnt = 0;
mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 6e29b69894a5..7c9251c03815 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -35,7 +35,7 @@
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
@@ -154,7 +154,7 @@ static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -675,14 +675,14 @@ static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
return ret;
}
-static int jpeg_v2_0_set_clockgating_state(void *handle,
+static int jpeg_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
- if (!jpeg_v2_0_is_idle(handle))
+ if (!jpeg_v2_0_is_idle(adev))
return -EBUSY;
jpeg_v2_0_enable_clock_gating(adev);
} else {
@@ -692,10 +692,10 @@ static int jpeg_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 9ac421486f05..11f6af2646e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -38,7 +38,7 @@
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev);
@@ -219,7 +219,7 @@ static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
- jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
@@ -518,10 +518,10 @@ static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int jpeg_v2_5_set_clockgating_state(void *handle,
+static int jpeg_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -530,7 +530,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (!jpeg_v2_5_is_idle(handle))
+ if (!jpeg_v2_5_is_idle(adev))
return -EBUSY;
jpeg_v2_5_enable_clock_gating(adev, i);
} else {
@@ -541,10 +541,10 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index e0df6800502c..4eca65ea9053 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -36,7 +36,7 @@
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
@@ -168,7 +168,7 @@ static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -466,14 +466,14 @@ static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v3_0_set_clockgating_state(void *handle,
+static int jpeg_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
if (enable) {
- if (!jpeg_v3_0_is_idle(handle))
+ if (!jpeg_v3_0_is_idle(adev))
return -EBUSY;
jpeg_v3_0_enable_clock_gating(adev);
} else {
@@ -483,10 +483,10 @@ static int jpeg_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if(state == adev->jpeg.cur_state)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index eca1963c33b6..0aef1f64afd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -39,7 +39,7 @@
static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev);
static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v4_0_set_powergating_state(void *handle,
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);
@@ -206,7 +206,7 @@ static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
if (!amdgpu_sriov_vf(adev)) {
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
- jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
@@ -635,14 +635,14 @@ static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v4_0_set_clockgating_state(void *handle,
+static int jpeg_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
if (enable) {
- if (!jpeg_v4_0_is_idle(handle))
+ if (!jpeg_v4_0_is_idle(adev))
return -EBUSY;
jpeg_v4_0_enable_clock_gating(adev);
} else {
@@ -652,10 +652,10 @@ static int jpeg_v4_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v4_0_set_powergating_state(void *handle,
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (amdgpu_sriov_vf(adev)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 67b51bcbacd1..88f9771c1686 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -43,7 +43,7 @@ enum jpeg_engin_status {
static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v4_0_3_set_powergating_state(void *handle,
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
@@ -76,7 +76,7 @@ static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3;
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
@@ -321,7 +321,7 @@ static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
if (r)
return r;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
ring = &adev->jpeg.inst[i].ring_dec[j];
ring->wptr = 0;
@@ -379,7 +379,7 @@ static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
if (!amdgpu_sriov_vf(adev)) {
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
- ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
return ret;
@@ -949,16 +949,16 @@ static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
return ret;
}
-static int jpeg_v4_0_3_set_clockgating_state(void *handle,
+static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (enable) {
- if (!jpeg_v4_0_3_is_idle(handle))
+ if (!jpeg_v4_0_3_is_idle(adev))
return -EBUSY;
jpeg_v4_0_3_enable_clock_gating(adev, i);
} else {
@@ -968,10 +968,10 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v4_0_3_set_powergating_state(void *handle,
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (amdgpu_sriov_vf(adev)) {
@@ -1231,9 +1231,95 @@ static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
.reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
};
+static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v4_0_3_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v4_0_3_err_codes,
+ ARRAY_SIZE(jpeg_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v4_0_3_aca_bank_ops,
+};
+
+static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
.ras_block = {
.hw_ops = &jpeg_v4_0_3_ras_hw_ops,
+ .ras_late_init = jpeg_v4_0_3_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index 1d9e3b101c3a..6b3656984957 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -48,7 +48,7 @@
static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v4_0_5_set_powergating_state(void *handle,
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring);
@@ -236,7 +236,7 @@ static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
if (!amdgpu_sriov_vf(adev)) {
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS))
- jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
}
return 0;
@@ -660,10 +660,10 @@ static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int jpeg_v4_0_5_set_clockgating_state(void *handle,
+static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
int i;
@@ -672,7 +672,7 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (!jpeg_v4_0_5_is_idle(handle))
+ if (!jpeg_v4_0_5_is_idle(adev))
return -EBUSY;
jpeg_v4_0_5_enable_clock_gating(adev, i);
@@ -684,10 +684,10 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v4_0_5_set_powergating_state(void *handle,
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (amdgpu_sriov_vf(adev)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index 58fb1e5fa89c..d5cf0f2799d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -31,12 +31,12 @@
#include "vcn/vcn_5_0_0_offset.h"
#include "vcn/vcn_5_0_0_sh_mask.h"
-#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
#include "jpeg_v5_0_0.h"
static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v5_0_0_set_powergating_state(void *handle,
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
@@ -74,7 +74,7 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
/* JPEG TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
- VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
if (r)
return r;
@@ -172,7 +172,7 @@ static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
- jpeg_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -560,14 +560,14 @@ static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v5_0_0_set_clockgating_state(void *handle,
+static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
if (enable) {
- if (!jpeg_v5_0_0_is_idle(handle))
+ if (!jpeg_v5_0_0_is_idle(adev))
return -EBUSY;
jpeg_v5_0_0_enable_clock_gating(adev);
} else {
@@ -577,10 +577,10 @@ static int jpeg_v5_0_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v5_0_0_set_powergating_state(void *handle,
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
@@ -612,7 +612,7 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev,
DRM_DEBUG("IH: JPEG TRAP\n");
switch (entry->src_id) {
- case VCN_4_0__SRCID__JPEG_DECODE:
+ case VCN_5_0__SRCID__JPEG_DECODE:
amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
new file mode 100644
index 000000000000..40d4c32a8c2a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
@@ -0,0 +1,708 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v4_0_3.h"
+#include "jpeg_v5_0_1.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_5_0__SRCID__JPEG_DECODE,
+ VCN_5_0__SRCID__JPEG1_DECODE,
+ VCN_5_0__SRCID__JPEG2_DECODE,
+ VCN_5_0__SRCID__JPEG3_DECODE,
+ VCN_5_0__SRCID__JPEG4_DECODE,
+ VCN_5_0__SRCID__JPEG5_DECODE,
+ VCN_5_0__SRCID__JPEG6_DECODE,
+ VCN_5_0__SRCID__JPEG7_DECODE,
+ VCN_5_0__SRCID__JPEG8_DECODE,
+ VCN_5_0__SRCID__JPEG9_DECODE,
+};
+
+static int jpeg_v5_0_1_core_reg_offset(u32 pipe)
+{
+ if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return ((0x40 * pipe) - 0x440);
+}
+
+/**
+ * jpeg_v5_0_1_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES)
+ return -ENOENT;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ jpeg_v5_0_1_set_dec_ring_funcs(adev);
+ jpeg_v5_0_1_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = false;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 11 * jpeg_inst;
+ } else {
+ if (j < 4)
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 4 + j + 32 * jpeg_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 8 + j + 32 * jpeg_inst;
+ }
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0,
+ (j ? jpeg_v5_0_1_core_reg_offset(j) : 0));
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* jpeg_v5_0_1_start_sriov(adev); */
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v5_0_1_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ return 0;
+ }
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ ring = adev->jpeg.inst[i].ring_dec;
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL,
+ (ring->pipe ? (ring->pipe - 0x15) : 0),
+ ring->doorbell_index <<
+ VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+
+ return ret;
+}
+
+/**
+ * jpeg_v5_0_1_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = jpeg_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+static int jpeg_v5_0_1_disable_antihang(struct amdgpu_device *adev, int inst_idx)
+{
+ int jpeg_inst;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int jpeg_v5_0_1_enable_antihang(struct amdgpu_device *adev, int inst_idx)
+{
+ int jpeg_inst;
+
+ jpeg_inst = GET_INST(JPEG, inst_idx);
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_1_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j, jpeg_inst, r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ /* disable antihang */
+ r = jpeg_v5_0_1_disable_antihang(adev, i);
+ if (r)
+ return r;
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+ u32 reg, data, mask;
+
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+
+ /* enable System Interrupt for JRBC */
+ reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN);
+ if (j < AMDGPU_MAX_JPEG_RINGS_4_0_3) {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << j;
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j);
+ WREG32_P(reg, data, mask);
+ } else {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << (j+12);
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (j+12));
+ WREG32_P(reg, data, mask);
+ }
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR,
+ reg_offset);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_1_stop(struct amdgpu_device *adev)
+{
+ int i, jpeg_inst, r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable antihang */
+ r = jpeg_v5_0_1_enable_antihang(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC_RB_WPTR,
+ (ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_1_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+ int i;
+
+ if (!enable)
+ return 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (!jpeg_v5_0_1_is_idle(adev))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_stop(adev);
+ else
+ ret = jpeg_v5_0_1_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_5_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_5_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_5_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_5_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_5_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_5_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_5_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ case VCN_5_0__SRCID__JPEG8_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]);
+ break;
+ case VCN_5_0__SRCID__JPEG9_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = {
+ .name = "jpeg_v5_0_1",
+ .early_init = jpeg_v5_0_1_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v5_0_1_sw_init,
+ .sw_fini = jpeg_v5_0_1_sw_fini,
+ .hw_init = jpeg_v5_0_1_hw_init,
+ .hw_fini = jpeg_v5_0_1_hw_fini,
+ .suspend = jpeg_v5_0_1_suspend,
+ .resume = jpeg_v5_0_1_resume,
+ .is_idle = jpeg_v5_0_1_is_idle,
+ .wait_for_idle = jpeg_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v5_0_1_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_1_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_1_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_1_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_1_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_1_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = {
+ .set = jpeg_v5_0_1_set_interrupt_state,
+ .process = jpeg_v5_0_1_process_interrupt,
+};
+
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &jpeg_v5_0_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
new file mode 100644
index 000000000000..8ce146c00bb6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_1_H__
+#define __JPEG_V5_0_1_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block;
+
+#endif /* __JPEG_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 9c905b9e9376..65f389eb65e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -1505,9 +1505,7 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index 9ecc5d61e49b..5b537806b4da 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include "amdgpu.h"
+#include "gfx_v12_0.h"
#include "soc15_common.h"
#include "soc21.h"
#include "gc/gc_12_0_0_offset.h"
@@ -350,6 +351,132 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
+{
+ u32 i, tmp, val;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
+
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
+
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t value, reg;
+ int i, r = 0;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
+ me_id, pipe_id, queue_id, vmid);
+
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ gfx_v12_0_request_gfx_index_mutex(adev, true);
+ /* all se allow writes */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
+ gfx_v12_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input)
{
@@ -721,6 +848,11 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
union MESAPI__RESET mes_reset_queue_pkt;
int pipe;
+ if (input->use_mmio)
+ return mes_v12_0_reset_queue_mmio(mes, input->queue_type,
+ input->me_id, input->pipe_id,
+ input->queue_id, input->vmid);
+
memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
@@ -1455,9 +1587,7 @@ static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index e9a6f33ca710..243eabda0607 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -356,7 +356,7 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB)
amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GMC,
- enable);
+ enable, 0);
}
static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index b01bb759d0f4..e646e5cef0a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -33,7 +33,6 @@
#define regVM_L2_CNTL3_DEFAULT 0x80100007
#define regVM_L2_CNTL4_DEFAULT 0x000000c1
-#define mmSMNAID_AID0_MCA_SMU 0x03b30400
static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
index 0fbc3be81f14..f2ab5001b492 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -108,7 +108,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw];
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 0820ed62e2e8..62cdfe10e6f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -434,9 +434,8 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catch up.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
@@ -667,17 +666,17 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int navi10_ih_set_clockgating_state(void *handle,
+static int navi10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
-static int navi10_ih_set_powergating_state(void *handle,
+static int navi10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
index 39919e0892c1..c92875ceb31f 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
@@ -28,6 +28,7 @@
#include "nbif/nbif_6_3_1_sh_mask.h"
#include "pcie/pcie_6_1_0_offset.h"
#include "pcie/pcie_6_1_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include <uapi/linux/kfd_ioctl.h>
static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev)
@@ -518,3 +519,83 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = {
.get_rom_offset = nbif_v6_3_1_get_rom_offset,
.set_reg_remap = nbif_v6_3_1_set_reg_remap,
};
+
+static int nbif_v6_3_1_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+
+ return 0;
+}
+
+static int nbif_v6_3_1_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to bif ring. since bif ring is not enabled, just leave process callback
+ * as a dummy one.
+ */
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs nbif_v6_3_1_ras_err_event_athub_irq_funcs = {
+ .set = nbif_v6_3_1_set_ras_err_event_athub_irq_state,
+ .process = nbif_v6_3_1_process_err_event_athub_irq,
+};
+
+static void nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
+{
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+ amdgpu_ras_global_ras_isr(adev);
+ }
+}
+
+static int nbif_v6_3_1_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev)
+{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbif_v6_3_1_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt
+ * nbif v6_3_1 uses the same irq source as nbio v7_4
+ */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
+}
+
+struct amdgpu_nbio_ras nbif_v6_3_1_ras = {
+ .handle_ras_err_event_athub_intr_no_bifring =
+ nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_err_event_athub_interrupt =
+ nbif_v6_3_1_init_ras_err_event_athub_interrupt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
index b7f2e0d88905..9ac4831d39e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
@@ -29,5 +29,6 @@
extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs;
extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs;
+extern struct amdgpu_nbio_ras nbif_v6_3_1_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index b1b57dcc5a73..d1032e9992b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -271,8 +271,19 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
+#define regRCC_DEV0_EPF6_STRAP4 0xd304
+#define regRCC_DEV0_EPF6_STRAP4_BASE_IDX 5
+
static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(2, 5, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4, data);
+ break;
+ }
}
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
index 814ab59fdd4a..41421da63a08 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -275,7 +275,7 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 11, 0):
case IP_VERSION(7, 11, 1):
case IP_VERSION(7, 11, 2):
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index 1ac730328516..3fb6d2aa7e3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -247,7 +247,7 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data);
- switch (adev->ip_versions[NBIO_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(7, 7, 0):
data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 3bad565ded73..47db483c3516 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -1039,10 +1039,10 @@ static bool nv_common_is_idle(void *handle)
return true;
}
-static int nv_common_set_clockgating_state(void *handle,
+static int nv_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1070,7 +1070,7 @@ static int nv_common_set_clockgating_state(void *handle,
return 0;
}
-static int nv_common_set_powergating_state(void *handle,
+static int nv_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* TODO */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index c4b775aaee9f..cc621064610f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -51,6 +51,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin");
@@ -122,6 +124,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
@@ -177,6 +180,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
retry_cnt =
((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ?
PSP_VMBX_POLLING_LIMIT :
10;
@@ -203,6 +207,7 @@ static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
int ret;
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
ret = psp_v13_0_wait_for_vmbx_ready(psp);
if (ret)
@@ -288,6 +293,11 @@ static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp)
return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
}
+static int psp_v13_0_bootloader_load_spdm_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->spdm_drv, PSP_BL__LOAD_SPDMDRV);
+}
+
static inline void psp_v13_0_init_sos_version(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -798,6 +808,7 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp)
return false;
if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) &&
(!(adev->flags & AMD_IS_APU))) {
reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127);
@@ -857,6 +868,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv,
.bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,
.bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,
+ .bootloader_load_spdm_drv = psp_v13_0_bootloader_load_spdm_drv,
.bootloader_load_sos = psp_v13_0_bootloader_load_sos,
.ring_create = psp_v13_0_ring_create,
.ring_stop = psp_v13_0_ring_stop,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 7948d74f8722..135c5099bfb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -145,9 +145,11 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
@@ -631,7 +633,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1080,14 +1082,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v2_4_set_clockgating_state(void *handle,
+static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* XXX handled via the smc on VI */
return 0;
}
-static int sdma_v2_4_set_powergating_state(void *handle,
+static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 9a3d729545a7..c611328671ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -305,9 +305,11 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
@@ -904,7 +906,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1483,10 +1485,10 @@ static void sdma_v3_0_update_sdma_medium_grain_light_sleep(
}
}
-static int sdma_v3_0_set_clockgating_state(void *handle,
+static int sdma_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1506,7 +1508,7 @@ static int sdma_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v3_0_set_powergating_state(void *handle,
+static int sdma_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index c1f98f6cf20d..b48d9c0b2e1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1565,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1956,7 +1956,7 @@ static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0);
if (!amdgpu_sriov_vf(adev))
sdma_v4_0_init_golden_registers(adev);
@@ -1983,7 +1983,7 @@ static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
sdma_v4_0_enable(adev, false);
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0);
return 0;
}
@@ -2297,10 +2297,10 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
}
}
-static int sdma_v4_0_set_clockgating_state(void *handle,
+static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -2312,10 +2312,10 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v4_0_set_powergating_state(void *handle,
+static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index a38553f38fdc..48537eba225d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -189,6 +189,7 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
ret = amdgpu_sdma_init_microcode(adev, 0, true);
break;
@@ -667,11 +668,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
*
* @adev: amdgpu_device pointer
* @i: instance to resume
+ * @restore: used to restore wptr when restart
*
* Set up the gfx DMA ring buffers and enable them.
* Returns 0 for success, error for failure.
*/
-static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
+static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
@@ -698,16 +700,24 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
- ring->wptr = 0;
+ if (!restore)
+ ring->wptr = 0;
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
/* Initialize the ring buffer's read and write pointers */
- WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+ }
doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
@@ -755,11 +765,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
*
* @adev: amdgpu_device pointer
* @i: instance to resume
+ * @restore: boolean to say restore needed or not
*
* Set up the page DMA ring buffers and enable them.
* Returns 0 for success, error for failure.
*/
-static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
+static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
@@ -775,10 +786,17 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
/* Initialize the ring buffer's read and write pointers */
- WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+ }
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
@@ -792,7 +810,8 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
- ring->wptr = 0;
+ if (!restore)
+ ring->wptr = 0;
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
@@ -911,12 +930,13 @@ static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @inst_mask: mask of dma engine instances to be enabled
+ * @restore: boolean to say restore needed or not
*
* Set up the DMA engines and enable them.
* Returns 0 for success, error for failure.
*/
static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
- uint32_t inst_mask)
+ uint32_t inst_mask, bool restore)
{
struct amdgpu_ring *ring;
uint32_t tmp_mask;
@@ -927,7 +947,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
sdma_v4_4_2_inst_enable(adev, false, inst_mask);
} else {
/* bypass sdma microcode loading on Gopher */
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
+ if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
adev->sdma.instance[0].fw) {
r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
if (r)
@@ -946,17 +966,19 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
uint32_t temp;
WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
- sdma_v4_4_2_gfx_resume(adev, i);
+ sdma_v4_4_2_gfx_resume(adev, i, restore);
if (adev->sdma.has_page_queue)
- sdma_v4_4_2_page_resume(adev, i);
+ sdma_v4_4_2_page_resume(adev, i, restore);
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, regSDMA_CNTL);
temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
- /* enable context empty interrupt during initialization */
- temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
- WREG32_SDMA(i, regSDMA_CNTL, temp);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) {
+ /* enable context empty interrupt during initialization */
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
+ }
if (!amdgpu_sriov_vf(adev)) {
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
/* unhalt engine */
@@ -1110,7 +1132,7 @@ static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1466,6 +1488,7 @@ static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block)
amdgpu_sdma_sysfs_reset_mask_fini(adev);
if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5))
amdgpu_sdma_destroy_inst_ctx(adev, true);
else
@@ -1486,7 +1509,7 @@ static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block)
if (!amdgpu_sriov_vf(adev))
sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
- r = sdma_v4_4_2_inst_start(adev, inst_mask);
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
return r;
}
@@ -1514,7 +1537,7 @@ static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int sdma_v4_4_2_set_clockgating_state(void *handle,
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block)
@@ -1522,7 +1545,7 @@ static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_in_reset(adev))
- sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
return sdma_v4_4_2_hw_fini(ip_block);
}
@@ -1573,6 +1596,42 @@ static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block)
return 0;
}
+static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i, r;
+ u32 inst_mask;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ /* stop queue */
+ inst_mask = 1 << ring->me;
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, ring->me));
+ if (r)
+ return r;
+
+ udelay(50);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT))
+ break;
+ udelay(1);
+ }
+
+ if (i == adev->usec_timeout) {
+ dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n",
+ ring->me);
+ return -ETIMEDOUT;
+ }
+
+ return sdma_v4_4_2_inst_start(adev, inst_mask, true);
+}
+
static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1821,10 +1880,10 @@ static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
}
}
-static int sdma_v4_4_2_set_clockgating_state(void *handle,
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t inst_mask;
if (amdgpu_sriov_vf(adev))
@@ -1839,7 +1898,7 @@ static int sdma_v4_4_2_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v4_4_2_set_powergating_state(void *handle,
+static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1895,7 +1954,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
if (!adev->sdma.ip_dump)
return;
- amdgpu_gfx_off_ctrl(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
instance_offset = i * reg_count;
for (j = 0; j < reg_count; j++)
@@ -1903,7 +1961,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
RREG32(sdma_v4_4_2_get_reg_offset(adev, i,
sdma_reg_list_4_4_2[j].reg_offset));
}
- amdgpu_gfx_off_ctrl(adev, true);
}
const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
@@ -1955,6 +2012,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
};
static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
@@ -2167,7 +2225,7 @@ static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
if (!amdgpu_sriov_vf(adev))
sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
- r = sdma_v4_4_2_inst_start(adev, inst_mask);
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index fa9b40934957..b764550834a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1194,7 +1194,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
if (!ring->is_mes_queue)
@@ -1853,10 +1853,10 @@ static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_0_set_clockgating_state(void *handle,
+static int sdma_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1877,7 +1877,7 @@ static int sdma_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_0_set_powergating_state(void *handle,
+static int sdma_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index ba5160399ab2..b1818e87889a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1050,7 +1050,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
if (!ring->is_mes_queue)
@@ -1812,10 +1812,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_2_set_clockgating_state(void *handle,
+static int sdma_v5_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1841,7 +1841,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_2_set_powergating_state(void *handle,
+static int sdma_v5_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index d46128b0ec92..1a023b45f0be 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -1063,7 +1063,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
if (!ring->is_mes_queue)
@@ -1601,13 +1601,13 @@ static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v6_0_set_clockgating_state(void *handle,
+static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int sdma_v6_0_set_powergating_state(void *handle,
+static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index d2ce6b6a7ff6..9c17df2cf37b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -490,162 +490,185 @@ static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v7_0_gfx_resume - setup and start the async dma engines
+ * sdma_v7_0_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev)
+static int sdma_v7_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
u32 doorbell;
u32 doorbell_offset;
- u32 tmp;
+ u32 temp;
u64 wptr_gpu_addr;
- int i, r;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
+ int r;
- //if (!amdgpu_sriov_vf(adev))
- // WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ ring = &adev->sdma.instance[i].ring;
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev))
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
+ else
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
- if (amdgpu_sriov_vf(adev))
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
- else
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
-
- if (i == 0)
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
-
- if (amdgpu_sriov_vf(adev))
- sdma_v7_0_ring_set_wptr(ring);
-
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
-
- /* Set up sdma hang watchdog */
- tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
- /* 100ms per unit */
- tmp = REG_SET_FIELD(tmp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
- max(adev->usec_timeout/100000, 1));
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), tmp);
-
- /* Set up RESP_MODE to non-copy addresses */
- tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
- tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), tmp);
-
- /* program default cache read and write policy */
- tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- tmp &= 0xFF0FFF;
- tmp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
- (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), tmp);
-
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
- tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, HALT, 0);
- tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, RESET, 0);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp);
- }
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v7_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ ring->sched.ready = true;
- ring->sched.ready = true;
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v7_0_ctx_switch_enable(adev, true);
+ sdma_v7_0_enable(adev, true);
+ }
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v7_0_ctx_switch_enable(adev, true);
- sdma_v7_0_enable(adev, true);
- }
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ ring->sched.ready = false;
- r = amdgpu_ring_test_helper(ring);
- if (r) {
- ring->sched.ready = false;
- return r;
- }
+ return r;
+}
+
+/**
+ * sdma_v7_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v7_0_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
}
return 0;
+
}
/**
@@ -806,6 +829,31 @@ static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
return false;
}
+static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (ring == &adev->sdma.instance[i].ring)
+ break;
+ }
+
+ if (i == adev->sdma.num_instances) {
+ DRM_ERROR("sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ return sdma_v7_0_gfx_resume_instance(adev, i, true);
+}
+
/**
* sdma_v7_0_start - setup and start the async dma engines
*
@@ -1060,7 +1108,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
if (!ring->is_mes_queue)
@@ -1316,6 +1364,13 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
/* Allocate memory for SDMA IP Dump buffer */
ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
if (ptr)
@@ -1334,6 +1389,7 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
@@ -1524,13 +1580,13 @@ static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v7_0_set_clockgating_state(void *handle,
+static int sdma_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int sdma_v7_0_set_powergating_state(void *handle,
+static int sdma_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1636,6 +1692,7 @@ static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = {
.emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v7_0_ring_init_cond_exec,
.preempt_ib = sdma_v7_0_ring_preempt_ib,
+ .reset = sdma_v7_0_reset_queue,
};
static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 00f63d3fbea7..77ef7da2e4fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -2649,13 +2649,13 @@ static bool si_common_is_idle(void *handle)
return true;
}
-static int si_common_set_clockgating_state(void *handle,
+static int si_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_common_set_powergating_state(void *handle,
+static int si_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 47647a6083e8..dbd78d5345a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -286,7 +286,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -629,13 +629,13 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
-static int si_dma_set_clockgating_state(void *handle,
+static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
u32 orig, data, offset;
int i;
bool enable;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
enable = (state == AMD_CG_STATE_GATE);
@@ -672,12 +672,12 @@ static int si_dma_set_clockgating_state(void *handle,
return 0;
}
-static int si_dma_set_powergating_state(void *handle,
+static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
WREG32(DMA_PGFSM_WRITE, 0x00002000);
WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 2ec1ebe4db11..a32b6243c1f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -263,13 +263,13 @@ static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int si_ih_set_clockgating_state(void *handle,
+static int si_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_ih_set_powergating_state(void *handle,
+static int si_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index ede072758dab..a59b4c36cad7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -171,6 +171,24 @@ static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = {
.codec_array = NULL,
};
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_encode_vcn0 = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_1_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_1_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_1_video_codecs_decode_array_vcn0,
+};
+
static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
@@ -209,6 +227,12 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
else
*codecs = &vcn_4_0_3_video_codecs_decode;
return 0;
+ case IP_VERSION(5, 0, 1):
+ if (encode)
+ *codecs = &vcn_5_0_1_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_1_video_codecs_decode_vcn0;
+ return 0;
default:
return -EINVAL;
}
@@ -327,6 +351,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) ||
amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) ||
amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14))
return 10000;
if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) ||
@@ -556,6 +581,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
break;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
/* Use gpu_recovery param to target a reset method.
* Enable triggering of GPU reset only if specified
* by module parameter.
@@ -1177,6 +1203,7 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block)
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
adev->asic_funcs = &aqua_vanjaram_asic_funcs;
adev->cg_flags =
AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
@@ -1385,10 +1412,10 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable
WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data);
}
-static int soc15_common_set_clockgating_state(void *handle,
+static int soc15_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1453,6 +1480,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) &&
(amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 12)) &&
(amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) {
/* AMD_CG_SUPPORT_DRM_MGCG */
data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
@@ -1473,7 +1501,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
adev->df.funcs->get_clockgating_state(adev, flags);
}
-static int soc15_common_set_powergating_state(void *handle,
+static int soc15_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* todo */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index d6999835918f..62ad67d0b598 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -928,10 +928,10 @@ static bool soc21_common_is_idle(void *handle)
return true;
}
-static int soc21_common_set_clockgating_state(void *handle,
+static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(4, 3, 0):
@@ -954,10 +954,10 @@ static int soc21_common_set_clockgating_state(void *handle,
return 0;
}
-static int soc21_common_set_powergating_state(void *handle,
+static int soc21_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
case IP_VERSION(6, 0, 0):
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
index be96de92b2f5..6b8e078ee7c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc24.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -444,8 +444,18 @@ static int soc24_common_late_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ /* don't need to fail gpu late init
+ * if enabling athub_err_event interrupt failed
+ * nbif v6_3_1 only support fatal error hanlding
+ * just enable the interrupt directly
+ */
+ amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
/* Enable selfring doorbell aperture late because doorbell BAR
* aperture will change if resize BAR successfully in gmc sw_init.
@@ -501,8 +511,13 @@ static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block)
adev->nbio.funcs->enable_doorbell_aperture(adev, false);
adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_put_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
return 0;
}
@@ -522,10 +537,10 @@ static bool soc24_common_is_idle(void *handle)
return true;
}
-static int soc24_common_set_clockgating_state(void *handle,
+static int soc24_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 3, 1):
@@ -542,10 +557,10 @@ static int soc24_common_set_clockgating_state(void *handle,
return 0;
}
-static int soc24_common_set_powergating_state(void *handle,
+static int soc24_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
case IP_VERSION(7, 0, 0):
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 21b71a427b1f..64891f099366 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -30,6 +30,9 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+/* invalid node instance value */
+#define TA_RAS_INV_NODE 0xffff
+
/* RAS related enumerations */
/**********************************************************/
enum ras_command {
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
index 00d8bdb8254f..9ec2e03d41c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
@@ -31,10 +31,12 @@
* Secure Display Command ID
*/
enum ta_securedisplay_command {
- /* Query whether TA is responding used only for validation purpose */
+ /* Query whether TA is responding. It is used only for validation purpose */
TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1,
/* Send region of Interest and CRC value to I2C */
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2,
+ /* V2 to send multiple regions of Interest and CRC value to I2C */
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2 = 3,
/* Maximum Command ID */
TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF,
};
@@ -83,6 +85,8 @@ enum ta_securedisplay_ta_query_cmd_ret {
enum ta_securedisplay_buffer_size {
/* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */
TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15,
+ /* 16 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) + 1 byte(roi_idx) */
+ TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE = 16,
};
/** Input/output structures for Secure Display commands */
@@ -95,7 +99,15 @@ enum ta_securedisplay_buffer_size {
* Physical ID to determine which DIO scratch register should be used to get ROI
*/
struct ta_securedisplay_send_roi_crc_input {
- uint32_t phy_id; /* Physical ID */
+ /* Physical ID */
+ uint32_t phy_id;
+};
+
+struct ta_securedisplay_send_roi_crc_v2_input {
+ /* Physical ID */
+ uint32_t phy_id;
+ /* Region of interest index */
+ uint8_t roi_idx;
};
/** @union ta_securedisplay_cmd_input
@@ -104,6 +116,8 @@ struct ta_securedisplay_send_roi_crc_input {
union ta_securedisplay_cmd_input {
/* send ROI and CRC input buffer format */
struct ta_securedisplay_send_roi_crc_input send_roi_crc;
+ /* send ROI and CRC input buffer format, v2 adds a ROI index */
+ struct ta_securedisplay_send_roi_crc_v2_input send_roi_crc_v2;
uint32_t reserved[4];
};
@@ -128,6 +142,10 @@ struct ta_securedisplay_send_roi_crc_output {
uint8_t reserved;
};
+struct ta_securedisplay_send_roi_crc_v2_output {
+ uint8_t i2c_buf[TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE]; /* I2C buffer */
+};
+
/** @union ta_securedisplay_cmd_output
* Output buffer
*/
@@ -136,6 +154,8 @@ union ta_securedisplay_cmd_output {
struct ta_securedisplay_query_ta_output query_ta;
/* Send ROI CRC output buffer format used only for validation purpose */
struct ta_securedisplay_send_roi_crc_output send_roi_crc;
+ /* Send ROI CRC output buffer format used only for validation purpose */
+ struct ta_securedisplay_send_roi_crc_v2_output send_roi_crc_v2;
uint32_t reserved[4];
};
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 5a04a6770138..0968e551f7b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -448,13 +448,13 @@ static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int tonga_ih_set_clockgating_state(void *handle,
+static int tonga_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int tonga_ih_set_powergating_state(void *handle,
+static int tonga_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 1a8ea834efa6..a7b9c358a2d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -173,156 +173,96 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
umc_v12_0_reset_error_count(adev);
}
-static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
+static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data,
- struct ta_ras_query_address_input *addr_in)
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr)
{
- uint32_t col, row, row_xor, bank, channel_index;
- uint64_t soc_pa, retired_page, column, err_addr;
- struct ta_ras_query_address_output addr_out;
+ uint32_t col, col_lower, row, row_lower, bank;
+ uint32_t channel_index = 0, umc_inst = 0;
+ uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS];
+ uint64_t soc_pa, column, err_addr;
+ struct ta_ras_query_address_output addr_out_tmp;
+ struct ta_ras_query_address_output *paddr_out;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ int ret = 0;
+
+ if (!addr_out)
+ paddr_out = &addr_out_tmp;
+ else
+ paddr_out = addr_out;
- err_addr = addr_in->ma.err_addr;
- addr_in->addr_type = TA_RAS_MCA_TO_PA;
- if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) {
- dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
- err_addr);
+ err_addr = bank = 0;
+ if (addr_in) {
+ err_addr = addr_in->ma.err_addr;
+ addr_in->addr_type = TA_RAS_MCA_TO_PA;
+ ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
+ err_addr);
- return;
- }
+ goto out;
+ }
- soc_pa = addr_out.pa.pa;
- bank = addr_out.pa.bank;
- channel_index = addr_out.pa.channel_idx;
-
- col = (err_addr >> 1) & 0x1fULL;
- row = (err_addr >> 10) & 0x3fffULL;
- row_xor = row ^ (0x1ULL << 13);
- /* clear [C3 C2] in soc physical address */
- soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
- /* clear [C4] in soc physical address */
- soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
-
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
- retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
- /* include column bit 0 and 1 */
- col &= 0x3;
- col |= (column << 2);
- dev_info(adev->dev,
- "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
- retired_page, row, col, bank, channel_index);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, addr_in->ma.umc_inst);
-
- /* shift R13 bit */
- retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
- dev_info(adev->dev,
- "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
- retired_page, row_xor, col, bank, channel_index);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, addr_in->ma.umc_inst);
+ bank = paddr_out->pa.bank;
+ /* no need to care about umc inst if addr_in is NULL */
+ umc_inst = addr_in->ma.umc_inst;
}
-}
-static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev,
- struct ta_ras_query_address_output *addr_out,
- uint64_t err_addr)
-{
- uint32_t col, row, row_xor, bank, channel_index;
- uint64_t soc_pa, retired_page, column;
-
- soc_pa = addr_out->pa.pa;
- bank = addr_out->pa.bank;
- channel_index = addr_out->pa.channel_idx;
-
- col = (err_addr >> 1) & 0x1fULL;
- row = (err_addr >> 10) & 0x3fffULL;
- row_xor = row ^ (0x1ULL << 13);
- /* clear [C3 C2] in soc physical address */
- soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
- /* clear [C4] in soc physical address */
- soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
-
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
- retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
- /* include column bit 0 and 1 */
- col &= 0x3;
- col |= (column << 2);
- dev_info(adev->dev,
- "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
- retired_page, row, col, bank, channel_index);
-
- /* shift R13 bit */
- retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
- dev_info(adev->dev,
- "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
- retired_page, row_xor, col, bank, channel_index);
- }
-}
+ loop_bits[0] = UMC_V12_0_PA_C2_BIT;
+ loop_bits[1] = UMC_V12_0_PA_C3_BIT;
+ loop_bits[2] = UMC_V12_0_PA_C4_BIT;
+ loop_bits[3] = UMC_V12_0_PA_R13_BIT;
-static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
- uint64_t pa_addr, uint64_t *pfns, int len)
-{
- uint64_t soc_pa, retired_page, column;
- uint32_t pos = 0;
-
- soc_pa = pa_addr;
- /* clear [C3 C2] in soc physical address */
- soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
- /* clear [C4] in soc physical address */
- soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
-
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
- retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
-
- if (pos >= len)
- return 0;
- pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
-
- /* shift R13 bit */
- retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
-
- if (pos >= len)
- return 0;
- pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ /* other nps modes are taken as nps1 */
+ if (nps == AMDGPU_NPS4_PARTITION_MODE) {
+ loop_bits[0] = UMC_V12_0_PA_CH4_BIT;
+ loop_bits[1] = UMC_V12_0_PA_CH5_BIT;
+ loop_bits[2] = UMC_V12_0_PA_B0_BIT;
+ loop_bits[3] = UMC_V12_0_PA_R11_BIT;
}
- return pos;
-}
-
-static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev,
- uint64_t err_addr, uint32_t ch, uint32_t umc,
- uint32_t node, uint32_t socket,
- uint64_t *addr, bool dump_addr)
-{
- struct ta_ras_query_address_input addr_in;
- struct ta_ras_query_address_output addr_out;
-
- memset(&addr_in, 0, sizeof(addr_in));
- addr_in.ma.err_addr = err_addr;
- addr_in.ma.ch_inst = ch;
- addr_in.ma.umc_inst = umc;
- addr_in.ma.node_inst = node;
- addr_in.ma.socket_id = socket;
- addr_in.addr_type = TA_RAS_MCA_TO_PA;
- if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) {
- dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
- err_addr);
- return -EINVAL;
+ soc_pa = paddr_out->pa.pa;
+ channel_index = paddr_out->pa.channel_idx;
+ /* clear loop bits in soc physical address */
+ for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++)
+ soc_pa &= ~BIT_ULL(loop_bits[i]);
+
+ paddr_out->pa.pa = soc_pa;
+ /* get column bit 0 and 1 in mca address */
+ col_lower = (err_addr >> 1) & 0x3ULL;
+ /* MA_R13_BIT will be handled later */
+ row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL;
+
+ if (!err_data && !dump_addr)
+ goto out;
+
+ /* loop for all possibilities of retired bits */
+ for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) {
+ soc_pa = paddr_out->pa.pa;
+ for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++)
+ soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]);
+
+ col = ((column & 0x7) << 2) | col_lower;
+ /* add row bit 13 */
+ row = ((column >> 3) << 13) | row_lower;
+
+ if (dump_addr)
+ dev_info(adev->dev,
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
+ soc_pa, row, col, bank, channel_index);
+
+ if (err_data)
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ soc_pa, channel_index, umc_inst);
}
- if (dump_addr)
- umc_v12_0_dump_addr_info(adev, &addr_out, err_addr);
-
- *addr = addr_out.pa.pa;
-
- return 0;
+out:
+ return ret;
}
static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
@@ -374,7 +314,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
addr_in.ma.umc_inst = umc_inst;
addr_in.ma.node_inst = node_inst;
- umc_v12_0_convert_error_address(adev, err_data, &addr_in);
+ umc_v12_0_convert_error_address(adev, err_data, &addr_in, NULL, true);
}
/* clear umc status */
@@ -526,6 +466,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
uint64_t err_addr, pa_addr = 0;
struct ras_ecc_err *ecc_err;
+ struct ta_ras_query_address_output addr_out;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ uint32_t shift_bit = UMC_V12_0_PA_C4_BIT;
int count, ret, i;
hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
@@ -552,10 +495,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
MCA_IPID_2_UMC_CH(ipid),
err_addr);
- ret = umc_v12_0_convert_mca_to_addr(adev,
+ ret = amdgpu_umc_mca_to_addr(adev,
err_addr, MCA_IPID_2_UMC_CH(ipid),
MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid),
- MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true);
+ MCA_IPID_2_SOCKET_ID(ipid), &addr_out, true);
if (ret)
return ret;
@@ -563,14 +506,21 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
if (!ecc_err)
return -ENOMEM;
+ pa_addr = addr_out.pa.pa;
ecc_err->status = status;
ecc_err->ipid = ipid;
ecc_err->addr = addr;
- ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->channel_idx = addr_out.pa.channel_idx;
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ shift_bit = UMC_V12_0_PA_B0_BIT;
/* If converted pa_pfn is 0, use pa C4 pfn. */
if (!ecc_err->pa_pfn)
- ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT;
ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err);
if (ret) {
@@ -586,7 +536,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
con->umc_ecc_log.de_queried_count++;
memset(page_pfn, 0, sizeof(page_pfn));
- count = umc_v12_0_lookup_bad_pages_in_a_row(adev,
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
pa_addr,
page_pfn, ARRAY_SIZE(page_pfn));
if (count <= 0) {
@@ -629,7 +579,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
return -EINVAL;
memset(page_pfn, 0, sizeof(page_pfn));
- count = umc_v12_0_lookup_bad_pages_in_a_row(adev,
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT,
page_pfn, ARRAY_SIZE(page_pfn));
@@ -637,7 +587,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
ret = amdgpu_umc_fill_error_record(err_data,
ecc_err->addr,
page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT,
- MCA_IPID_2_UMC_CH(ecc_err->ipid),
+ ecc_err->channel_idx,
MCA_IPID_2_UMC_INST(ecc_err->ipid));
if (ret)
break;
@@ -676,6 +626,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
mutex_unlock(&con->umc_ecc_log.lock);
}
+static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page)
+{
+ uint32_t die = 0;
+
+ /* we only calculate die id for nps1 mode right now */
+ die += ((((retired_page >> 12) & 0x1ULL)^
+ ((retired_page >> 20) & 0x1ULL) ^
+ ((retired_page >> 27) & 0x1ULL) ^
+ ((retired_page >> 34) & 0x1ULL) ^
+ ((retired_page >> 41) & 0x1ULL)) << 0);
+
+ /* the original PA_C4 and PA_R13 may be cleared in retired_page, so
+ * get them from mca_addr.
+ */
+ die += ((((retired_page >> 13) & 0x1ULL) ^
+ ((mca_addr >> 5) & 0x1ULL) ^
+ ((retired_page >> 28) & 0x1ULL) ^
+ ((mca_addr >> 23) & 0x1ULL) ^
+ ((retired_page >> 42) & 0x1ULL)) << 1);
+ die &= 3;
+
+ return die;
+}
+
struct amdgpu_umc_ras umc_v12_0_ras = {
.ras_block = {
.hw_ops = &umc_v12_0_ras_hw_ops,
@@ -686,5 +661,7 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
.ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr,
.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
.update_ecc_status = umc_v12_0_update_ecc_status,
+ .convert_ras_err_addr = umc_v12_0_convert_error_address,
+ .get_die_id_from_pa = umc_v12_0_get_die_id,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index be5598d76c1d..9298018d938f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -55,12 +55,24 @@
#define UMC_V12_0_NA_MAP_PA_NUM 8
/* R13 bit shift should be considered, double the number */
#define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2)
+/* C2, C3, C4, R13, four bits in MCA address are looped in retirement */
+#define UMC_V12_0_RETIRE_LOOP_BITS 4
/* column bits in SOC physical address */
#define UMC_V12_0_PA_C2_BIT 15
+#define UMC_V12_0_PA_C3_BIT 16
#define UMC_V12_0_PA_C4_BIT 21
/* row bits in SOC physical address */
+#define UMC_V12_0_PA_R0_BIT 22
+#define UMC_V12_0_PA_R11_BIT 33
#define UMC_V12_0_PA_R13_BIT 35
+/* channel bit in SOC physical address */
+#define UMC_V12_0_PA_CH4_BIT 12
+#define UMC_V12_0_PA_CH5_BIT 13
+/* bank bit in SOC physical address */
+#define UMC_V12_0_PA_B0_BIT 19
+/* row bits in MCA address */
+#define UMC_V12_0_MA_R0_BIT 10
#define MCA_UMC_HWID_V12_0 0x96
#define MCA_UMC_MCATYPE_V12_0 0x0
@@ -81,11 +93,6 @@
(((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \
(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03))
-#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \
- ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \
- (0x1ULL << UMC_V12_0_PA_C4_BIT) | \
- (0x1ULL << UMC_V12_0_PA_R13_BIT)))
-
bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
new file mode 100644
index 000000000000..eaca10a3c4a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_14.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_14_0_offset.h"
+#include "umc/umc_8_14_0_sh_mask.h"
+
+static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst;
+}
+
+static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_clear_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_clear_error_count_per_channel, NULL);
+}
+
+static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ umc_v8_14_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_14_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_query_error_count_per_channel, ras_error_status);
+
+ umc_v8_14_clear_error_count(adev);
+}
+
+static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_err_cnt_init_per_channel, NULL);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_14_query_ras_error_count,
+};
+
+struct amdgpu_umc_ras umc_v8_14_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_14_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_14_err_cnt_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
new file mode 100644
index 000000000000..20a258f0017a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_14_H__
+#define __UMC_V8_14_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_14_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num)
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+
+/* UMC register per channel offset */
+#define UMC_V8_14_PER_CHANNEL_OFFSET 0x400
+
+#define UMC_V8_14_INST_DIST 0x40000
+
+/* EccErrCnt max value */
+#define UMC_V8_14_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V8_14_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD)
+
+extern struct amdgpu_umc_ras umc_v8_14_ras;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index bdbca25d80c4..5830e799c0a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -790,13 +790,13 @@ static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block)
return uvd_v3_1_start(adev);
}
-static int uvd_v3_1_set_clockgating_state(void *handle,
+static int uvd_v3_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v3_1_set_powergating_state(void *handle,
+static int uvd_v3_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index a836dc9cfcad..f93079e09215 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -44,7 +44,7 @@ static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v4_2_start(struct amdgpu_device *adev);
static void uvd_v4_2_stop(struct amdgpu_device *adev);
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
bool sw_mode);
@@ -708,13 +708,13 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v4_2_set_powergating_state(void *handle,
+static int uvd_v4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -724,7 +724,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE) {
uvd_v4_2_stop(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index ab55fae3569e..050a0f309390 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -42,7 +42,7 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v5_0_start(struct amdgpu_device *adev);
static void uvd_v5_0_stop(struct amdgpu_device *adev);
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -155,7 +155,7 @@ static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
int r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v5_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v5_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -790,16 +790,11 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
- struct amdgpu_ip_block *ip_block;
-
- ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD);
- if (!ip_block)
- return -EINVAL;
if (enable) {
/* wait for STATUS to clear */
@@ -817,7 +812,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v5_0_set_powergating_state(void *handle,
+static int uvd_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -827,7 +822,7 @@ static int uvd_v5_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 39f8c3d3a135..d9d036ee51fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -48,7 +48,7 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v6_0_start(struct amdgpu_device *adev);
static void uvd_v6_0_stop(struct amdgpu_device *adev);
static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev);
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -467,7 +467,7 @@ static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
int i, r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v6_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v6_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -1450,17 +1450,12 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_ip_block *ip_block;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
- ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD);
- if (!ip_block)
- return -EINVAL;
-
if (enable) {
/* wait for STATUS to clear */
if (uvd_v6_0_wait_for_idle(ip_block))
@@ -1476,7 +1471,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v6_0_set_powergating_state(void *handle,
+static int uvd_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -1486,7 +1481,7 @@ static int uvd_v6_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 079131aeb2f7..9d237b5937fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1288,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
unsigned i;
/* No patching necessary for the first instance */
@@ -1511,7 +1511,7 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int uvd_v7_0_set_clockgating_state(void *handle,
+static int uvd_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index c1ed91b39415..c633b7ff2943 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -578,13 +578,13 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v2_0_set_clockgating_state(void *handle,
+static int vce_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
bool sw_cg = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE) {
gate = true;
@@ -596,7 +596,7 @@ static int vce_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v2_0_set_powergating_state(void *handle,
+static int vce_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -606,7 +606,7 @@ static int vce_v2_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v2_0_stop(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6bb318a06f19..f8bddcd19b68 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -65,7 +65,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
/**
* vce_v3_0_ring_get_rptr - get read pointer
@@ -497,7 +497,7 @@ static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
return r;
vce_v3_0_stop(adev);
- return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
+ return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE);
}
static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block)
@@ -760,10 +760,10 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -801,7 +801,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v3_0_set_powergating_state(void *handle,
+static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -811,7 +811,7 @@ static int vce_v3_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 79ee555768a5..335bda64ff5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -684,14 +684,14 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
}
-static int vce_v4_0_set_clockgating_state(void *handle,
+static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
return 0;
}
-static int vce_v4_0_set_powergating_state(void *handle,
+static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -701,7 +701,7 @@ static int vce_v4_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v4_0_stop(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 10e99c926fb8..5ea96c983517 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -85,7 +85,8 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
+static int vcn_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
@@ -281,7 +282,7 @@ static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
- vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vcn_v1_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
return 0;
@@ -303,7 +304,7 @@ static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block)
idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work);
if (idle_work_unexecuted) {
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
}
r = vcn_v1_0_hw_fini(ip_block);
@@ -344,7 +345,7 @@ static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block)
*/
static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -411,7 +412,7 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -1394,15 +1395,15 @@ static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
return ret;
}
-static int vcn_v1_0_set_clockgating_state(void *handle,
+static int vcn_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v1_0_is_idle(handle))
+ if (!vcn_v1_0_is_idle(adev))
return -EBUSY;
vcn_v1_0_enable_clock_gating(adev);
} else {
@@ -1799,7 +1800,7 @@ static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun
}
}
-static int vcn_v1_0_set_powergating_state(void *handle,
+static int vcn_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
@@ -1810,7 +1811,7 @@ static int vcn_v1_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == adev->vcn.cur_state)
return 0;
@@ -1856,7 +1857,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
@@ -1886,7 +1887,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index e0322cbca3ec..e42cfc731ad8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -92,7 +92,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = {
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_0_set_powergating_state(void *handle,
+static int vcn_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
@@ -318,7 +318,7 @@ static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
- vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vcn_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -372,7 +372,7 @@ static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block)
*/
static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
if (amdgpu_sriov_vf(adev))
@@ -428,7 +428,7 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -978,7 +978,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
int i, j, r;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
@@ -1235,7 +1235,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
power_off:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
return 0;
}
@@ -1335,10 +1335,10 @@ static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
return ret;
}
-static int vcn_v2_0_set_clockgating_state(void *handle,
+static int vcn_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -1346,7 +1346,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v2_0_is_idle(handle))
+ if (!vcn_v2_0_is_idle(adev))
return -EBUSY;
vcn_v2_0_enable_clock_gating(adev);
} else {
@@ -1796,7 +1796,7 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
}
-static int vcn_v2_0_set_powergating_state(void *handle,
+static int vcn_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
@@ -1807,7 +1807,7 @@ static int vcn_v2_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
@@ -1920,7 +1920,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
init_table += header->vcn_table_offset;
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(
SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 6aa08281d094..b518202955ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -95,7 +95,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = {
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_5_set_powergating_state(void *handle,
+static int vcn_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
@@ -399,7 +399,7 @@ static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, mmUVD_STATUS)))
- vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vcn_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
@@ -465,7 +465,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
@@ -514,7 +514,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4);
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -1012,8 +1012,10 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
uint32_t rb_bufsz, tmp;
int i, j, k, r;
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+ }
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
@@ -1285,7 +1287,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
/* mc resume*/
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V1_0_INSERT_DIRECT_WT(
@@ -1485,8 +1487,10 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+ }
return 0;
}
@@ -1778,6 +1782,7 @@ static bool vcn_v2_5_is_idle(void *handle)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+
ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
@@ -1801,17 +1806,17 @@ static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
return ret;
}
-static int vcn_v2_5_set_clockgating_state(void *handle,
+static int vcn_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
return 0;
if (enable) {
- if (!vcn_v2_5_is_idle(handle))
+ if (!vcn_v2_5_is_idle(adev))
return -EBUSY;
vcn_v2_5_enable_clock_gating(adev);
} else {
@@ -1821,10 +1826,10 @@ static int vcn_v2_5_set_clockgating_state(void *handle,
return 0;
}
-static int vcn_v2_5_set_powergating_state(void *handle,
+static int vcn_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (amdgpu_sriov_vf(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 6732ad7f16f5..63ddd4cca910 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -105,7 +105,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v3_0_set_powergating_state(void *handle,
+static int vcn_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
@@ -430,9 +430,9 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
- vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
+ vcn_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
}
}
@@ -490,7 +490,7 @@ static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block)
*/
static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst]->size + 4);
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -540,7 +540,7 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4);
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -1141,8 +1141,10 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
uint32_t rb_bufsz, tmp;
int i, j, k, r;
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+ }
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
@@ -1373,7 +1375,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
@@ -1632,8 +1634,10 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev)
vcn_v3_0_enable_static_power_gating(adev, i);
}
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+ }
return 0;
}
@@ -2132,10 +2136,10 @@ static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
return ret;
}
-static int vcn_v3_0_set_clockgating_state(void *handle,
+static int vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
@@ -2155,10 +2159,10 @@ static int vcn_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int vcn_v3_0_set_powergating_state(void *handle,
+static int vcn_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
/* for SRIOV, guest should not control VCN Power-gating
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index fcc8511e91ee..00551d6f0370 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -96,7 +96,7 @@ static int amdgpu_ih_clientid_vcns[] = {
static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v4_0_set_powergating_state(void *handle,
+static int vcn_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
@@ -366,9 +366,9 @@ static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
continue;
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, regUVD_STATUS))) {
- vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vcn_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
}
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
@@ -431,7 +431,7 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -491,7 +491,7 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
{
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -1097,8 +1097,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
uint32_t tmp;
int i, j, k, r;
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+ }
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
@@ -1341,7 +1343,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
regUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
@@ -1623,8 +1625,10 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev)
vcn_v4_0_enable_static_power_gating(adev, i);
}
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+ }
return 0;
}
@@ -2007,14 +2011,15 @@ static int vcn_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
/**
* vcn_v4_0_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
+static int vcn_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
@@ -2037,14 +2042,15 @@ static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_sta
/**
* vcn_v4_0_set_powergating_state - set VCN block powergating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: power gating state
*
* Set VCN block powergating state
*/
-static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state)
+static int vcn_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
/* for SRIOV, guest should not control VCN Power-gating
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 3f69b9b2bcd0..ecdc027f8220 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -87,7 +87,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = {
static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v4_0_3_set_powergating_state(void *handle,
+static int vcn_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
@@ -349,7 +349,7 @@ static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
cancel_delayed_work_sync(&adev->vcn.idle_work);
if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
- vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vcn_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -407,7 +407,7 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
uint32_t offset, size, vcn_inst;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
vcn_inst = GET_INST(VCN, inst_idx);
@@ -482,7 +482,7 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -957,6 +957,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
vcn_inst = GET_INST(VCN, i);
+ vcn_v4_0_3_fw_shared_init(adev, vcn_inst);
+
memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
header.version = MMSCH_VERSION;
header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
@@ -969,7 +971,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
@@ -1121,8 +1123,10 @@ static int vcn_v4_0_3_start(struct amdgpu_device *adev)
int i, j, k, r, vcn_inst;
uint32_t tmp;
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+ }
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -1395,8 +1399,10 @@ static int vcn_v4_0_3_stop(struct amdgpu_device *adev)
vcn_v4_0_3_enable_clock_gating(adev, i);
}
Done:
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+ }
return 0;
}
@@ -1616,15 +1622,15 @@ static int vcn_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
/* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v4_0_3_set_clockgating_state(void *handle,
+static int vcn_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
@@ -1644,15 +1650,15 @@ static int vcn_v4_0_3_set_clockgating_state(void *handle,
/**
* vcn_v4_0_3_set_powergating_state - set VCN block powergating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: power gating state
*
* Set VCN block powergating state
*/
-static int vcn_v4_0_3_set_powergating_state(void *handle,
+static int vcn_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
/* for SRIOV, guest should not control VCN Power-gating
@@ -1911,9 +1917,94 @@ static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
.reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
};
+static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int vcn_v4_0_3_err_codes[] = {
+ 14, 15, /* VCN */
+};
+
+static bool vcn_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ vcn_v4_0_3_err_codes,
+ ARRAY_SIZE(vcn_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops vcn_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = vcn_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = vcn_v4_0_3_aca_bank_is_valid,
+};
+
+static const struct aca_info vcn_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &vcn_v4_0_3_aca_bank_ops,
+};
+
+static int vcn_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN,
+ &vcn_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
.ras_block = {
.hw_ops = &vcn_v4_0_3_ras_hw_ops,
+ .ras_late_init = vcn_v4_0_3_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 71961fb3f7ff..23d3c16c9d9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -95,7 +95,7 @@ static int amdgpu_ih_clientid_vcns[] = {
static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v4_0_5_set_powergating_state(void *handle,
+static int vcn_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
@@ -309,7 +309,7 @@ static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, regUVD_STATUS))) {
- vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vcn_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
}
}
@@ -370,7 +370,7 @@ static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst)
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -431,7 +431,7 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -1000,8 +1000,10 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev)
uint32_t tmp;
int i, j, k, r;
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+ }
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
@@ -1277,8 +1279,10 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev)
vcn_v4_0_5_enable_static_power_gating(adev, i);
}
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+ }
return 0;
}
@@ -1492,14 +1496,15 @@ static int vcn_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
/**
* vcn_v4_0_5_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_state state)
+static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
int i;
@@ -1522,14 +1527,15 @@ static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_s
/**
* vcn_v4_0_5_set_powergating_state - set VCN block powergating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: power gating state
*
* Set VCN block powergating state
*/
-static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_state state)
+static int vcn_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->vcn.cur_state)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index bd3d2bbdc16b..b6d78381ebfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -32,7 +32,7 @@
#include "vcn/vcn_5_0_0_offset.h"
#include "vcn/vcn_5_0_0_sh_mask.h"
-#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
#include "vcn_v5_0_0.h"
#include <drm/drm_drv.h>
@@ -78,7 +78,7 @@ static int amdgpu_ih_clientid_vcns[] = {
static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v5_0_0_set_powergating_state(void *handle,
+static int vcn_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
@@ -105,6 +105,21 @@ static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block)
return amdgpu_vcn_early_init(adev);
}
+void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0);
+ uint32_t *ptr;
+
+ /* Allocate memory for VCN IP Dump buffer */
+ ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for VCN IP Dump\n");
+ adev->vcn.ip_dump = NULL;
+ } else {
+ adev->vcn.ip_dump = ptr;
+ }
+}
+
/**
* vcn_v5_0_0_sw_init - sw init for VCN block
*
@@ -117,8 +132,6 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_ring *ring;
struct amdgpu_device *adev = ip_block->adev;
int i, r;
- uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0);
- uint32_t *ptr;
r = amdgpu_vcn_sw_init(adev);
if (r)
@@ -140,13 +153,13 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
/* VCN UNIFIED TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
- VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
if (r)
return r;
/* VCN POISON TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
- VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
+ VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
if (r)
return r;
@@ -177,14 +190,7 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode;
- /* Allocate memory for VCN IP Dump buffer */
- ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
- if (!ptr) {
- DRM_ERROR("Failed to allocate memory for VCN IP Dump\n");
- adev->vcn.ip_dump = NULL;
- } else {
- adev->vcn.ip_dump = ptr;
- }
+ vcn_v5_0_0_alloc_ip_dump(adev);
r = amdgpu_vcn_sysfs_reset_mask_init(adev);
if (r)
@@ -283,7 +289,7 @@ static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, regUVD_STATUS))) {
- vcn_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vcn_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
}
}
@@ -344,7 +350,7 @@ static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst)
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -405,7 +411,7 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -771,8 +777,10 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev)
uint32_t tmp;
int i, j, k, r;
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, true, i);
+ }
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
@@ -1018,8 +1026,10 @@ static int vcn_v5_0_0_stop(struct amdgpu_device *adev)
vcn_v5_0_0_enable_static_power_gating(adev, i);
}
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_vcn(adev, false, i);
+ }
return 0;
}
@@ -1229,14 +1239,15 @@ static int vcn_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
/**
* vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
+static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
int i;
@@ -1259,14 +1270,15 @@ static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_s
/**
* vcn_v5_0_0_set_powergating_state - set VCN block powergating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: power gating state
*
* Set VCN block powergating state
*/
-static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_state state)
+static int vcn_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->vcn.cur_state)
@@ -1312,10 +1324,10 @@ static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgp
DRM_DEBUG("IH: VCN TRAP\n");
switch (entry->src_id) {
- case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
break;
- case VCN_4_0__SRCID_UVD_POISON:
+ case VCN_5_0__SRCID_UVD_POISON:
amdgpu_vcn_process_poison_irq(adev, source, entry);
break;
default:
@@ -1351,7 +1363,8 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev)
}
}
-static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block,
+ struct drm_printer *p)
{
struct amdgpu_device *adev = ip_block->adev;
int i, j;
@@ -1383,7 +1396,7 @@ static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm
}
}
-static void vcn_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
+void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int i, j;
@@ -1424,8 +1437,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = {
.wait_for_idle = vcn_v5_0_0_wait_for_idle,
.set_clockgating_state = vcn_v5_0_0_set_clockgating_state,
.set_powergating_state = vcn_v5_0_0_set_powergating_state,
- .dump_ip_state = vcn_v5_0_dump_ip_state,
- .print_ip_state = vcn_v5_0_print_ip_state,
+ .dump_ip_state = vcn_v5_0_0_dump_ip_state,
+ .print_ip_state = vcn_v5_0_0_print_ip_state,
};
const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
index 51bbccd4360f..b8927652bc50 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
@@ -32,6 +32,11 @@
#define VCN_VID_IP_ADDRESS 0x0
#define VCN_AON_IP_ADDRESS 0x30000
+void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev);
+void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block,
+ struct drm_printer *p);
+void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block);
+
extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block;
#endif /* __VCN_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
new file mode 100644
index 000000000000..8b463c977d08
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -0,0 +1,1118 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+
+#include <drm/drm_drv.h>
+
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring);
+
+/**
+ * vcn_v5_0_1_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ /* re-use enc ring as unified ring */
+ adev->vcn.num_enc_rings = 1;
+
+ vcn_v5_0_1_set_unified_ring_funcs(adev);
+ vcn_v5_0_1_set_irq_funcs(adev);
+
+ return amdgpu_vcn_early_init(adev);
+}
+
+/**
+ * vcn_v5_0_1_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev);
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst;
+
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = true;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ }
+
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ vcn_v5_0_0_alloc_ip_dump(adev);
+
+ return amdgpu_vcn_sysfs_reset_mask_init(adev);
+}
+
+/**
+ * vcn_v5_0_1_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ kfree(adev->vcn.ip_dump);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 9 * vcn_inst),
+ adev->vcn.inst[i].aid_id);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = vcn_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v5_0_1_mc_resume(struct amdgpu_device *adev, int inst)
+{
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+}
+
+/**
+ * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+}
+
+/**
+ * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @inst: instance number
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_device *adev, int inst)
+{
+}
+
+/**
+ * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ int vcn_inst;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ vcn_v5_0_1_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_start - VCN start
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Start VCN block
+ */
+static int vcn_v5_0_1_start(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int i, j, k, r, vcn_inst;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v5_0_1_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+ continue;
+ }
+
+ vcn_inst = GET_INST(VCN, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_1_mc_resume(adev, i);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(100);
+ if (amdgpu_emu_mode == 1)
+ msleep(20);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @adev: amdgpu_device pointer
+ * @inst_idx: instance number index
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+{
+ uint32_t tmp;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+}
+
+/**
+ * vcn_v5_0_1_stop - VCN stop
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop VCN block
+ */
+static int vcn_v5_0_1_stop(struct amdgpu_device *adev)
+{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ uint32_t tmp;
+ int i, r = 0, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_1_stop_dpg_mode(adev, i);
+ continue;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+ }
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v5_0_1_unified_ring_get_rptr,
+ .get_wptr = vcn_v5_0_1_unified_ring_get_wptr,
+ .set_wptr = vcn_v5_0_1_unified_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+}
+
+/**
+ * vcn_v5_0_1_is_idle - check VCN block is idle
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v5_0_1_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE);
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v5_0_1_enable_clock_gating(adev, i);
+ } else {
+ vcn_v5_0_1_disable_clock_gating(adev, i);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_set_powergating_state - set VCN block powergating state
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+static int vcn_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v5_0_1_stop(adev);
+ else
+ ret = vcn_v5_0_1_start(adev);
+
+ if (!ret)
+ adev->vcn.cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = {
+ .process = vcn_v5_0_1_process_interrupt,
+};
+
+/**
+ * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ adev->vcn.inst->irq.num_types++;
+ adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs;
+}
+
+static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = {
+ .name = "vcn_v5_0_1",
+ .early_init = vcn_v5_0_1_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v5_0_1_sw_init,
+ .sw_fini = vcn_v5_0_1_sw_fini,
+ .hw_init = vcn_v5_0_1_hw_init,
+ .hw_fini = vcn_v5_0_1_hw_fini,
+ .suspend = vcn_v5_0_1_suspend,
+ .resume = vcn_v5_0_1_resume,
+ .is_idle = vcn_v5_0_1_is_idle,
+ .wait_for_idle = vcn_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v5_0_1_set_clockgating_state,
+ .set_powergating_state = vcn_v5_0_1_set_powergating_state,
+ .dump_ip_state = vcn_v5_0_0_dump_ip_state,
+ .print_ip_state = vcn_v5_0_0_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &vcn_v5_0_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h
new file mode 100644
index 000000000000..82ac709f44bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_v5_0_1_H__
+#define __VCN_v5_0_1_H__
+
+extern const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block;
+
+#endif /* __VCN_v5_0_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 0fedadd0a6a4..98fc6941159e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -364,9 +364,8 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catchup.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
@@ -605,10 +604,10 @@ static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int vega10_ih_set_clockgating_state(void *handle,
+static int vega10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -616,7 +615,7 @@ static int vega10_ih_set_clockgating_state(void *handle,
}
-static int vega10_ih_set_powergating_state(void *handle,
+static int vega10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 1c9aff742e43..e9e3b2ed4b7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -366,6 +366,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
/* Enable IH Retry CAM */
if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) ||
amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 4) ||
amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5))
WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN,
ENABLE, 1);
@@ -443,9 +444,8 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catchup.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
@@ -697,10 +697,10 @@ static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int vega20_ih_set_clockgating_state(void *handle,
+static int vega20_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega20_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -708,7 +708,7 @@ static int vega20_ih_set_clockgating_state(void *handle,
}
-static int vega20_ih_set_powergating_state(void *handle,
+static int vega20_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index a83505815d39..06615f160331 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1945,10 +1945,10 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
return 0;
}
-static int vi_common_set_clockgating_state(void *handle,
+static int vi_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1988,7 +1988,7 @@ static int vi_common_set_clockgating_state(void *handle,
return 0;
}
-static int vi_common_set_powergating_state(void *handle,
+static int vi_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 02f7ba8c93cd..388b44ed5928 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -4122,3 +4122,494 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
};
+
+static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
+ 0xbf820001, 0xbf8202d8,
+ 0xb8f8f802, 0x8978ff78,
+ 0x00020006, 0xb8fbf803,
+ 0x866eff78, 0x00002000,
+ 0xbf840008, 0xbf0d986d,
+ 0xbf85001f, 0x866eff7b,
+ 0x00000400, 0xbf850061,
+ 0xbf8e0010, 0xb8fbf803,
+ 0xbf82fffa, 0x866eff7b,
+ 0x03800900, 0xbf850015,
+ 0x866eff7b, 0x000071ff,
+ 0xbf840008, 0x866fff7b,
+ 0x00007080, 0xbf840001,
+ 0xbeee1a87, 0xb8eff801,
+ 0x8e6e8c6e, 0x866e6f6e,
+ 0xbf85000a, 0xbf0d986d,
+ 0xbf850003, 0x866eff6d,
+ 0x00ff0000, 0xbf850005,
+ 0xbf0d986d, 0xbf850004,
+ 0x866eff7b, 0x00000400,
+ 0xbf850046, 0xbeed1a9d,
+ 0xb8faf807, 0x867aff7a,
+ 0x001f8000, 0x8e7a8b7a,
+ 0x8979ff79, 0xfc000000,
+ 0x87797a79, 0xba7ff807,
+ 0x00000000, 0xb8faf812,
+ 0xb8fbf813, 0x8efa887a,
+ 0xbf0d8f7b, 0xbf840002,
+ 0x877bff7b, 0xffff0000,
+ 0xc0031cfd, 0x00000010,
+ 0xc0071bbd, 0x00000000,
+ 0xc0071ebd, 0x00000008,
+ 0xbf8cc07f, 0x8e739773,
+ 0x8979ff79, 0x01800000,
+ 0x87797379, 0xbf0d986d,
+ 0xbf840009, 0xbf0d9879,
+ 0xbf850007, 0x896dff6d,
+ 0x01ff0000, 0xba7f0583,
+ 0x00000000, 0xbf0d9d6d,
+ 0xbeed189d, 0xbf840012,
+ 0xbef91898, 0xbeed189d,
+ 0x86ee6e6e, 0xbf840001,
+ 0xbe801d6e, 0x866eff6d,
+ 0x01ff0000, 0xbf850005,
+ 0x8778ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xbf820005, 0x866eff6d,
+ 0x01000000, 0xbf850002,
+ 0x806c846c, 0x826d806d,
+ 0x866dff6d, 0x0000ffff,
+ 0x8f7a8b79, 0x867aff7a,
+ 0x001f8000, 0xb97af807,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e8378, 0xb96ee0c2,
+ 0xbf800002, 0xb9780002,
+ 0xbe801f6c, 0x866dff6d,
+ 0x0000ffff, 0xbefa0080,
+ 0xb97a0283, 0xb8faf807,
+ 0x867aff7a, 0x001f8000,
+ 0x8e7a8b7a, 0x8979ff79,
+ 0xfc000000, 0x87797a79,
+ 0xba7ff807, 0x00000000,
+ 0xbeee007e, 0xbeef007f,
+ 0xbefe0180, 0xbf900004,
+ 0x877a8478, 0xb97af802,
+ 0xbf8e0002, 0xbf88fffe,
+ 0xb8fa2985, 0x807a817a,
+ 0x8e7a8a7a, 0x8e7a817a,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b867b, 0x807a7b7a,
+ 0x807a7e7a, 0x827b807f,
+ 0x867bff7b, 0x0000ffff,
+ 0xc04b1c3d, 0x00000050,
+ 0xbf8cc07f, 0xc04b1d3d,
+ 0x00000060, 0xbf8cc07f,
+ 0xc0431e7d, 0x00000074,
+ 0xbf8cc07f, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0xbef1007c,
+ 0xbef00080, 0xb8f02985,
+ 0x80708170, 0x8e708a70,
+ 0x8e708170, 0xb8fa1605,
+ 0x807a817a, 0x8e7a867a,
+ 0x80707a70, 0xbef60084,
+ 0xbef600ff, 0x01000000,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611b3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611b7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611bba,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611bfa, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611e3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xb8fbf803, 0xbefe007c,
+ 0xbefc0070, 0xc0611efa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc0070, 0xc0611a7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0xb8f1f801, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x80708470, 0xbefc007e,
+ 0x867aff7f, 0x04000000,
+ 0xbeef0080, 0x876f6f7a,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf85004d, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb5306, 0x867bc17b,
+ 0xbf840052, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf84004e, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02985, 0x80708170,
+ 0x8e708a70, 0x8e708170,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0x8070ff70, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf85001d,
+ 0x24040682, 0xd86c0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x680404ff,
+ 0x00000100, 0xd0c9006a,
+ 0x0000f702, 0xbf87ffe5,
+ 0xbf820016, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe800077,
+ 0x8677ff77, 0xff7fffff,
+ 0x8777ff77, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8cc07f, 0xe0765000,
+ 0x701d0002, 0x68040702,
+ 0xd0c9006a, 0x0000f702,
+ 0xbefe016a, 0xbf87fff6,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2b05,
+ 0x807b817b, 0x8e7b827b,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0084, 0xbf0a7b7c,
+ 0xbf84006d, 0xbf11017c,
+ 0x807bff7b, 0x00001000,
+ 0x867aff78, 0x00400000,
+ 0xbf850003, 0xb8faf803,
+ 0x897a7aff, 0x10000000,
+ 0xbf850051, 0xbe840080,
+ 0xd2890000, 0x00000900,
+ 0x80048104, 0xd2890001,
+ 0x00000900, 0x80048104,
+ 0xd2890002, 0x00000900,
+ 0x80048104, 0xd2890003,
+ 0x00000900, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000901, 0x80048104,
+ 0xd2890001, 0x00000901,
+ 0x80048104, 0xd2890002,
+ 0x00000901, 0x80048104,
+ 0xd2890003, 0x00000901,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000902,
+ 0x80048104, 0xd2890001,
+ 0x00000902, 0x80048104,
+ 0xd2890002, 0x00000902,
+ 0x80048104, 0xd2890003,
+ 0x00000902, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000903, 0x80048104,
+ 0xd2890001, 0x00000903,
+ 0x80048104, 0xd2890002,
+ 0x00000903, 0x80048104,
+ 0xd2890003, 0x00000903,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0x807c847c,
+ 0xbf0a7b7c, 0xbf85ffb1,
+ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0x807c847c, 0x8070ff70,
+ 0x00000400, 0xbf0a7b7c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xb8fb2985, 0x807b817b,
+ 0x8e7b837b, 0xb8fa2b05,
+ 0x807a817a, 0x8e7a827a,
+ 0x80fb7a7b, 0x867b7b7b,
+ 0xbf84007a, 0x807bff7b,
+ 0x00001000, 0xbefc0080,
+ 0xbf11017c, 0x867aff78,
+ 0x00400000, 0xbf850003,
+ 0xb8faf803, 0x897a7aff,
+ 0x10000000, 0xbf850059,
+ 0xd3d84000, 0x18000100,
+ 0xd3d84001, 0x18000101,
+ 0xd3d84002, 0x18000102,
+ 0xd3d84003, 0x18000103,
+ 0xbe840080, 0xd2890000,
+ 0x00000900, 0x80048104,
+ 0xd2890001, 0x00000900,
+ 0x80048104, 0xd2890002,
+ 0x00000900, 0x80048104,
+ 0xd2890003, 0x00000900,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000901,
+ 0x80048104, 0xd2890001,
+ 0x00000901, 0x80048104,
+ 0xd2890002, 0x00000901,
+ 0x80048104, 0xd2890003,
+ 0x00000901, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0xbe840080, 0xd2890000,
+ 0x00000902, 0x80048104,
+ 0xd2890001, 0x00000902,
+ 0x80048104, 0xd2890002,
+ 0x00000902, 0x80048104,
+ 0xd2890003, 0x00000902,
+ 0x80048104, 0xc069003a,
+ 0x00000070, 0xbf8cc07f,
+ 0x80709070, 0xbf06c004,
+ 0xbf84ffee, 0xbe840080,
+ 0xd2890000, 0x00000903,
+ 0x80048104, 0xd2890001,
+ 0x00000903, 0x80048104,
+ 0xd2890002, 0x00000903,
+ 0x80048104, 0xd2890003,
+ 0x00000903, 0x80048104,
+ 0xc069003a, 0x00000070,
+ 0xbf8cc07f, 0x80709070,
+ 0xbf06c004, 0xbf84ffee,
+ 0x807c847c, 0xbf0a7b7c,
+ 0xbf85ffa9, 0xbf9c0000,
+ 0xbf820016, 0xd3d84000,
+ 0x18000100, 0xd3d84001,
+ 0x18000101, 0xd3d84002,
+ 0x18000102, 0xd3d84003,
+ 0x18000103, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffeb,
+ 0xbf9c0000, 0xbf8200f4,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x04000000,
+ 0xbf840025, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef5306,
+ 0x866fc16f, 0xbf840020,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82985,
+ 0x80788178, 0x8e788a78,
+ 0x8e788178, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x8078ff78,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x781d0000,
+ 0xe0510100, 0x781d0000,
+ 0xe0510200, 0x781d0000,
+ 0xe0510300, 0x781d0000,
+ 0xe0510400, 0x781d0000,
+ 0x807cff7c, 0x00000500,
+ 0x8078ff78, 0x00000500,
+ 0xbf0a6f7c, 0xbf85fff0,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbef600ff, 0x01000000,
+ 0xb8ef2b05, 0x806f816f,
+ 0x8e6f826f, 0x806fff6f,
+ 0x00008000, 0xbef80080,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xb8ef2985,
+ 0x806f816f, 0x8e6f836f,
+ 0xb8f92b05, 0x80798179,
+ 0x8e798279, 0x80ef796f,
+ 0x866f6f6f, 0xbf84001a,
+ 0x806fff6f, 0x00008000,
+ 0xbefc0080, 0xbf11087c,
+ 0xe0524000, 0x781d0000,
+ 0xe0524100, 0x781d0100,
+ 0xe0524200, 0x781d0200,
+ 0xe0524300, 0x781d0300,
+ 0xbf8c0f70, 0xd3d94000,
+ 0x18000100, 0xd3d94001,
+ 0x18000101, 0xd3d94002,
+ 0x18000102, 0xd3d94003,
+ 0x18000103, 0x807c847c,
+ 0x8078ff78, 0x00000400,
+ 0xbf0a6f7c, 0xbf85ffea,
+ 0xbf9c0000, 0xe0524000,
+ 0x6e1d0000, 0xe0524100,
+ 0x6e1d0100, 0xe0524200,
+ 0x6e1d0200, 0xe0524300,
+ 0x6e1d0300, 0xbf8c0f70,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x80f8c078, 0xb8ef1605,
+ 0x806f816f, 0x8e6f846f,
+ 0x8e76826f, 0xbef600ff,
+ 0x01000000, 0xbefc006f,
+ 0xc031003a, 0x00000078,
+ 0x80f8c078, 0xbf8cc07f,
+ 0x80fc907c, 0xbf800000,
+ 0xbe802d00, 0xbe822d02,
+ 0xbe842d04, 0xbe862d06,
+ 0xbe882d08, 0xbe8a2d0a,
+ 0xbe8c2d0c, 0xbe8e2d0e,
+ 0xbf06807c, 0xbf84fff0,
+ 0xb8f82985, 0x80788178,
+ 0x8e788a78, 0x8e788178,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2985,
+ 0x806e816e, 0x8e6e8a6e,
+ 0x8e6e816e, 0xb8ef1605,
+ 0x806f816f, 0x8e6f866f,
+ 0x806e6f6e, 0x806e746e,
+ 0x826f8075, 0x866fff6f,
+ 0x0000ffff, 0xc00b1c37,
+ 0x00000050, 0xc00b1d37,
+ 0x00000060, 0xc0031e77,
+ 0x00000074, 0xbf8cc07f,
+ 0x8f6e8b79, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0xbe801f6c,
+ 0xbf9b0000, 0x00000000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 44772eec9ef4..96fbb16ceb21 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -34,41 +34,24 @@
* cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
* sp3 gfx11.sp3 -hex gfx11.hex
*
- * gfx12:
- * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3
- * sp3 gfx12.sp3 -hex gfx12.hex
*/
#define CHIP_NAVI10 26
#define CHIP_SIENNA_CICHLID 30
#define CHIP_PLUM_BONITO 36
-#define CHIP_GFX12 37
#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
-#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12)
+#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
#define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
-#if ASIC_FAMILY < CHIP_GFX12
#define S_COHERENCE glc:1
#define V_COHERENCE slc:1 glc:1
#define S_WAITCNT_0 s_waitcnt 0
-#else
-#define S_COHERENCE scope:SCOPE_SYS
-#define V_COHERENCE scope:SCOPE_SYS
-#define S_WAITCNT_0 s_wait_idle
-
-#define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO
-#define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI
-#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC
-#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC
-#define HW_REG_MODE HW_REG_WAVE_MODE
-#endif
-#if ASIC_FAMILY < CHIP_GFX12
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
@@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E
var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK
var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
var S_SAVE_PC_HI_HT_MASK = 0x01000000
-#else
-var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
-var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
-var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
-var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
-var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
-var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
-var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
-var SQ_WAVE_STATUS_WAVE64_SIZE = 1
-var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
-var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV
-var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
-var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK
-var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
-#endif
var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
@@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
#endif
-#if ASIC_FAMILY < CHIP_GFX12
var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
@@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
var S_TRAPSTS_HWREG = HW_REG_TRAPSTS
var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK
var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
-#else
-var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
-var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
-var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
-var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
-var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
-var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
-var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
-var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
-var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
-var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
-var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
-
-var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV
-var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
-var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
-var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
- SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
-var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
-var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
-var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
-var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
-var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
-var BARRIER_STATE_SIGNAL_OFFSET = 16
-var BARRIER_STATE_VALID_OFFSET = 0
-#endif
// bits [31:24] unused by SPI debug data
var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
@@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER:
L_HALTED:
// Host trap may occur while wave is halted.
-#if ASIC_FAMILY < CHIP_GFX12
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
-#else
- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
-#endif
s_cbranch_scc1 L_FETCH_2ND_TRAP
L_CHECK_SAVE:
@@ -336,7 +266,6 @@ L_NOT_HALTED:
// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
// Maskable exceptions only cause the wave to enter the trap handler if
// their respective bit in mode.excp_en is set.
-#if ASIC_FAMILY < CHIP_GFX12
s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
s_cbranch_scc0 L_CHECK_TRAP_ID
@@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH:
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
s_and_b32 ttmp2, ttmp2, ttmp3
s_cbranch_scc1 L_FETCH_2ND_TRAP
-#else
- s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
- s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
- s_cbranch_scc0 L_NOT_ADDR_WATCH
- s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
-
-L_NOT_ADDR_WATCH:
- s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
- s_and_b32 ttmp2, ttmp3, ttmp2
- s_cbranch_scc1 L_FETCH_2ND_TRAP
-#endif
L_CHECK_TRAP_ID:
// Check trap_id != 0
@@ -369,13 +287,8 @@ L_CHECK_TRAP_ID:
#if SINGLE_STEP_MISSED_WORKAROUND
// Prioritize single step exception over context save.
// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
-#if ASIC_FAMILY < CHIP_GFX12
s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
-#else
- // WAVE_TRAP_CTRL is already in ttmp3.
- s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
-#endif
s_cbranch_scc1 L_FETCH_2ND_TRAP
#endif
@@ -425,12 +338,7 @@ L_NO_NEXT_TRAP:
s_cbranch_scc1 L_TRAP_CASE
// Host trap will not cause trap re-entry.
-#if ASIC_FAMILY < CHIP_GFX12
s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
-#else
- s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
- s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
-#endif
s_cbranch_scc1 L_EXIT_TRAP
s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK
@@ -457,16 +365,7 @@ L_EXIT_TRAP:
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
-#if ASIC_FAMILY < CHIP_GFX12
s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status
-#else
- // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
- // Only restore fields which the trap handler changes.
- s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT
- s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
- SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status
-#endif
-
s_rfe_b64 [ttmp0, ttmp1]
L_SAVE:
@@ -478,14 +377,6 @@ L_SAVE:
s_endpgm
L_HAVE_VGPRS:
#endif
-#if ASIC_FAMILY >= CHIP_GFX12
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
- s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
- s_cbranch_scc0 L_HAVE_VGPRS
- s_endpgm
-L_HAVE_VGPRS:
-#endif
-
s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
s_mov_b32 s_save_tmp, 0
s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
@@ -671,19 +562,6 @@ L_SAVE_HWREG:
s_mov_b32 m0, 0x0 //Next lane of v2 to write to
#endif
-#if ASIC_FAMILY >= CHIP_GFX12
- // Ensure no further changes to barrier or LDS state.
- // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
- s_barrier_signal -2
- s_barrier_wait -2
-
- // Re-read final state of BARRIER_COMPLETE field for save.
- s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG)
- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
- s_or_b32 s_save_status, s_save_status, s_save_tmp
-#endif
-
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
@@ -707,21 +585,6 @@ L_SAVE_HWREG:
s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-#if ASIC_FAMILY >= CHIP_GFX12
- s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-
- s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL)
- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-
- s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
- write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
-
- s_get_barrier_state s_save_tmp, -1
- s_wait_kmcnt (0)
- write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
-#endif
-
#if NO_SQC_STORE
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
s_mov_b32 exec_lo, 0xFFFF
@@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL:
s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
-#if ASIC_FAMILY < CHIP_GFX12
s_barrier //LDS is used? wait for other waves in the same TG
-#endif
s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
s_cbranch_scc0 L_SAVE_LDS_DONE
@@ -1081,11 +942,6 @@ L_RESTORE:
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
-#if ASIC_FAMILY >= CHIP_GFX12
- // Save s_restore_spi_init_hi for later use.
- s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
-#endif
-
//determine it is wave32 or wave64
get_wave_size2(s_restore_size)
@@ -1320,9 +1176,7 @@ L_RESTORE_SGPR:
// s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
// Clear DEBUG_EN before and restore MODE after the barrier.
s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0
-#if ASIC_FAMILY < CHIP_GFX12
s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
-#endif
/* restore HW registers */
L_RESTORE_HWREG:
@@ -1334,11 +1188,6 @@ L_RESTORE_HWREG:
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
-#if ASIC_FAMILY >= CHIP_GFX12
- // Restore s_restore_spi_init_hi before the saved value gets clobbered.
- s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
-#endif
-
read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
@@ -1358,44 +1207,6 @@ L_RESTORE_HWREG:
s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
-#if ASIC_FAMILY >= CHIP_GFX12
- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
- S_WAITCNT_0
- s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
-
- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
- S_WAITCNT_0
- s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
-
- // Only the first wave needs to restore the workgroup barrier.
- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
-
- // Skip over WAVE_STATUS, since there is no state to restore from it
- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
-
- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
- S_WAITCNT_0
-
- s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
-
- // extract the saved signal count from s_restore_tmp
- s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
-
- // We need to call s_barrier_signal repeatedly to restore the signal
- // count of the work group barrier. The member count is already
- // initialized with the number of waves in the work group.
-L_BARRIER_RESTORE_LOOP:
- s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
- s_barrier_signal -1
- s_add_i32 s_restore_tmp, s_restore_tmp, -1
- s_branch L_BARRIER_RESTORE_LOOP
-
-L_SKIP_BARRIER_RESTORE:
-#endif
-
s_mov_b32 m0, s_restore_m0
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
@@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV:
s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu
-#if ASIC_FAMILY >= CHIP_GFX12
- // Make barrier and LDS state visible to all waves in the group.
- // STATE_PRIV.BARRIER_COMPLETE may change after this point.
- s_barrier_signal -2
- s_barrier_wait -2
-#endif
-
s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
L_END_PGM:
@@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes
end
function get_wave_size2(s_reg)
-#if ASIC_FAMILY < CHIP_GFX12
s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
-#else
- s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
-#endif
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
new file mode 100644
index 000000000000..1740e98c6719
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -0,0 +1,1126 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ *
+ * gfx12:
+ * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3
+ * sp3 gfx12.sp3 -hex gfx12.hex
+ */
+
+#define CHIP_GFX12 37
+
+#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
+
+var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
+var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
+var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
+var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
+var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
+var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
+var SQ_WAVE_STATUS_WAVE64_SIZE = 1
+var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
+var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
+var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
+var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
+
+var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
+var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
+var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
+var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
+var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
+var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
+var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
+var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
+var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
+
+var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
+ SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
+var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT
+var BARRIER_STATE_SIGNAL_OFFSET = 16
+var BARRIER_STATE_VALID_OFFSET = 0
+
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
+
+// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
+// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
+
+var s_sgpr_save_num = 108
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+var s_save_pc_lo = ttmp0
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_state_priv = ttmp12
+var s_save_excp_flag_priv = ttmp15
+var s_save_xnack_mask = s_save_excp_flag_priv
+var s_wave_size = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+var s_save_mem_offset = ttmp4
+var s_save_alloc_size = s_save_excp_flag_priv
+var s_save_tmp = ttmp14
+var s_save_m0 = ttmp5
+var s_save_ttmps_lo = s_save_tmp
+var s_save_ttmps_hi = s_save_excp_flag_priv
+
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+var S_WAVE_SIZE = 25
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+var s_restore_mem_offset = ttmp12
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp2
+var s_restore_mem_offset_save = s_restore_tmp
+var s_restore_m0 = s_restore_alloc_size
+var s_restore_mode = ttmp7
+var s_restore_flat_scratch = s_restore_tmp
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = ttmp4
+var s_restore_exec_hi = ttmp5
+var s_restore_state_priv = ttmp14
+var s_restore_excp_flag_priv = ttmp15
+var s_restore_xnack_mask = ttmp13
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+var s_restore_size = ttmp6
+var s_restore_ttmps_lo = s_restore_tmp
+var s_restore_ttmps_hi = s_restore_alloc_size
+var s_restore_spi_init_hi_save = s_restore_exec_hi
+
+shader main
+ asic(DEFAULT)
+ type(CS)
+ wave_size(32)
+
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE
+
+L_SKIP_RESTORE:
+ s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC
+
+ // Clear SPI_PRIO: do not save with elevated priority.
+ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK
+
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+
+ s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+ s_cbranch_scc0 L_NOT_HALTED
+
+L_HALTED:
+ // Host trap may occur while wave is halted.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_SAVE:
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+ // Wave is halted but neither host trap nor SAVECTX is raised.
+ // Caused by instruction fetch memory violation.
+ // Spin wait until context saved to prevent interrupt storm.
+ s_sleep 0x10
+ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_branch L_CHECK_SAVE
+
+L_NOT_HALTED:
+ // Let second-level handle non-SAVECTX exception or trap.
+ // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+ // Check non-maskable exceptions. memory_violation, illegal_instruction
+ // and xnack_error exceptions always cause the wave to enter the trap
+ // handler.
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
+ s_cbranch_scc0 L_NOT_ADDR_WATCH
+ s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
+
+L_NOT_ADDR_WATCH:
+ s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ s_and_b32 ttmp2, ttmp3, ttmp2
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+#if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+ // WAVE_TRAP_CTRL is already in ttmp3.
+ s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+#endif
+
+ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+ s_cbranch_scc1 L_SAVE
+
+L_FETCH_2ND_TRAP:
+ // Read second-level TBA/TMA from first-level TMA and jump if available.
+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+ // ttmp12 holds SQ_WAVE_STATUS
+ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+ s_wait_idle
+ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+ s_bitcmp1_b32 ttmp15, 0xF
+ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
+ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag
+ s_wait_idle
+ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp2
+
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA
+ s_wait_idle
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA
+ s_wait_idle
+
+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+ // If not caused by trap then halt wave to prevent re-entry.
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+ s_cbranch_scc1 L_TRAP_CASE
+
+ // Host trap will not cause trap re-entry.
+ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set.
+ // Rewind the PC to prevent this from occurring.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+ s_branch L_EXIT_TRAP
+
+L_TRAP_CASE:
+ // Advance past trap instruction to prevent re-entry.
+ s_add_u32 ttmp0, ttmp0, 0x4
+ s_addc_u32 ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+
+ // Restore SQ_WAVE_STATUS.
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
+ // Only restore fields which the trap handler changes.
+ s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
+ SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
+
+ s_rfe_b64 [ttmp0, ttmp1]
+
+L_SAVE:
+ // If VGPRs have been deallocated then terminate the wavefront.
+ // It has no remaining program to run and cannot save without VGPRs.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+ s_cbranch_scc0 L_HAVE_VGPRS
+ s_endpgm
+L_HAVE_VGPRS:
+
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_mov_b32 s_save_tmp, 0
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+ s_wait_idle
+
+ // Save first_wave flag so we can clear high bits of save address.
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+ // Trap temporaries must be saved via VGPR but all VGPRs are in use.
+ // There is no ttmp space to hold the resource constant for VGPR save.
+ // Save v0 by itself since it requires only two SGPRs.
+ s_mov_b32 s_save_ttmps_lo, exec_lo
+ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS
+ v_mov_b32 v0, 0x0
+ s_mov_b32 exec_lo, s_save_ttmps_lo
+ s_mov_b32 exec_hi, s_save_ttmps_hi
+
+ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_wave_size2(s_save_ttmps_hi)
+ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+ get_svgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
+
+ v_writelane_b32 v0, ttmp4, 0x4
+ v_writelane_b32 v0, ttmp5, 0x5
+ v_writelane_b32 v0, ttmp6, 0x6
+ v_writelane_b32 v0, ttmp7, 0x7
+ v_writelane_b32 v0, ttmp8, 0x8
+ v_writelane_b32 v0, ttmp9, 0x9
+ v_writelane_b32 v0, ttmp10, 0xA
+ v_writelane_b32 v0, ttmp11, 0xB
+ v_writelane_b32 v0, ttmp13, 0xD
+ v_writelane_b32 v0, exec_lo, 0xE
+ v_writelane_b32 v0, exec_hi, 0xF
+
+ s_mov_b32 exec_lo, 0x3FFF
+ s_mov_b32 exec_hi, 0x0
+ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS
+ v_readlane_b32 ttmp14, v0, 0xE
+ v_readlane_b32 ttmp15, v0, 0xF
+ s_mov_b32 exec_lo, ttmp14
+ s_mov_b32 exec_hi, ttmp15
+
+ /* setup Resource Contants */
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+
+ s_mov_b32 s_save_m0, m0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0
+ get_wave_size2(s_wave_size)
+
+ /* save first 4 VGPRs, needed for SGPR save */
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_4VGPR_WAVE32
+L_ENABLE_SAVE_4VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_branch L_SAVE_4VGPR_WAVE64
+L_SAVE_4VGPR_WAVE32:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+ s_branch L_SAVE_HWREG
+
+L_SAVE_4VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ /* save HW registers */
+
+L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
+ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
+ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
+ s_mov_b32 m0, 0x0 //Next lane of v2 to write to
+
+ // Ensure no further changes to barrier or LDS state.
+ // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ // Re-read final state of BARRIER_COMPLETE field for save.
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV)
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+ s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp
+
+ write_hwreg_to_v2(s_save_m0)
+ write_hwreg_to_v2(s_save_pc_lo)
+ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ write_hwreg_to_v2(s_save_tmp)
+ write_hwreg_to_v2(s_save_exec_lo)
+ write_hwreg_to_v2(s_save_exec_hi)
+ write_hwreg_to_v2(s_save_state_priv)
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+ write_hwreg_to_v2(s_save_tmp)
+
+ write_hwreg_to_v2(s_save_xnack_mask)
+
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_MODE)
+ write_hwreg_to_v2(s_save_m0)
+
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO)
+ write_hwreg_to_v2(s_save_m0)
+
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI)
+ write_hwreg_to_v2(s_save_m0)
+
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+ write_hwreg_to_v2(s_save_m0)
+
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL)
+ write_hwreg_to_v2(s_save_m0)
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+ write_hwreg_to_v2(s_save_tmp)
+
+ s_get_barrier_state s_save_tmp, -1
+ s_wait_kmcnt (0)
+ write_hwreg_to_v2(s_save_tmp)
+
+ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+ s_mov_b32 exec_lo, 0xFFFF
+ s_mov_b32 exec_hi, 0x0
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ write_16sgpr_to_v2(s0)
+
+ s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled?
+ s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
+ s_mov_b32 ttmp13, 0x0
+ v_mov_b32 v2, 0x0
+L_SAVE_SGPR_SKIP_TCP_STORE:
+
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
+
+ //save the rest 12 SGPR
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ write_12sgpr_to_v2(s0)
+
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ /* save LDS */
+
+L_SAVE_LDS:
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_LDS_NORMAL
+L_ENABLE_SAVE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_LDS_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+ get_svgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ //load 0~63*4(byte address) to vgpr v0
+ v_mbcnt_lo_u32_b32 v0, -1, 0
+ v_mbcnt_hi_u32_b32 v0, -1, v0
+ v_mul_u32_u24 v0, 4, v0
+
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_SAVE_LDS_W64
+
+L_SAVE_LDS_W32:
+ s_mov_b32 s3, 128
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W32:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete?
+
+ s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_W64:
+ s_mov_b32 s3, 256
+ s_nop 0
+ s_nop 0
+ s_nop 0
+L_SAVE_LDS_LOOP_W64:
+ ds_read_b32 v1, v0
+ s_wait_idle
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
+ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete?
+
+L_SAVE_LDS_DONE:
+ /* save VGPRs - set the Rest VGPRs */
+L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI
+ s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_SAVE_VGPR_NORMAL
+L_ENABLE_SAVE_VGPR_EXEC_HI:
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_SAVE_VGPR_NORMAL:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_SAVE_VGPR_WAVE64
+
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR Allocated in 4-GPR granularity
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+L_SAVE_VGPR_W32_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete?
+
+ s_branch L_SAVE_VGPR_END
+
+L_SAVE_VGPR_WAVE64:
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_SHARED_VGPR
+
+L_SAVE_VGPR_W64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ v_movrels_b32 v1, v1 //v1 = v[1+m0]
+ v_movrels_b32 v2, v2 //v2 = v[2+m0]
+ v_movrels_b32 v3, v3 //v3 = v[3+m0]
+
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
+
+L_SAVE_SHARED_VGPR:
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //save shared_vgpr will start from the index of m0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+
+L_SAVE_SHARED_VGPR_WAVE64_LOOP:
+ v_movrels_b32 v0, v0 //v0 = v[0+m0]
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete?
+
+L_SAVE_VGPR_END:
+ s_branch L_END_PGM
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+
+ // Save s_restore_spi_init_hi for later use.
+ s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
+
+ //determine it is wave32 or wave64
+ get_wave_size2(s_restore_size)
+
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+L_RESTORE_LDS:
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_LDS_NORMAL
+L_ENABLE_RESTORE_LDS_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_LDS_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_mov_b32 m0, 0x0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
+
+L_RESTORE_LDS_LOOP_W32:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 128 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete?
+ s_branch L_RESTORE_VGPR
+
+L_RESTORE_LDS_LOOP_W64:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+ s_wait_idle
+ ds_store_addtid_b32 v0
+ s_add_u32 m0, m0, 256 // 256 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete?
+
+ /* restore VGPRs */
+L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI
+ s_mov_b32 exec_hi, 0x00000000
+ s_branch L_RESTORE_VGPR_NORMAL
+L_ENABLE_RESTORE_VGPR_EXEC_HI:
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+L_RESTORE_VGPR_NORMAL:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ //determine it is wave32 or wave64
+ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
+ s_and_b32 m0, m0, 1
+ s_cmp_eq_u32 m0, 1
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE32_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
+
+ /* VGPR restore on v0 */
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3
+ s_wait_idle
+
+ s_branch L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE64:
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 4
+ s_cmp_lt_u32 m0, s_restore_alloc_size
+ s_cbranch_scc0 L_RESTORE_SHARED_VGPR
+
+L_RESTORE_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ v_movreld_b32 v1, v1
+ v_movreld_b32 v2, v2
+ v_movreld_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+L_RESTORE_SHARED_VGPR:
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
+ s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used?
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
+ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+ //restore shared_vgpr will start from the index of m0
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0
+ s_mov_b32 exec_lo, 0xFFFFFFFF
+ s_mov_b32 exec_hi, 0x00000000
+L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
+ s_wait_idle
+ v_movreld_b32 v0, v0 //v[0+m0] = v0
+ s_add_u32 m0, m0, 1 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
+
+ s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!!
+
+ /* VGPR restore on v0 */
+L_RESTORE_V0:
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3
+ s_wait_idle
+
+ /* restore SGPRs */
+ //will be 2+8+16*6
+ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
+L_RESTORE_SGPR:
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ s_mov_b32 m0, s_sgpr_save_num
+
+ read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+
+ read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP
+
+ // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception.
+ // Clear DEBUG_EN before and restore MODE after the barrier.
+ s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0
+
+ /* restore HW registers */
+L_RESTORE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+ get_svgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+ // Restore s_restore_spi_init_hi before the saved value gets clobbered.
+ s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+ s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
+
+ // Only the first wave needs to restore the workgroup barrier.
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // Skip over WAVE_STATUS, since there is no state to restore from it
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
+
+ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+ s_wait_idle
+
+ s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+
+ // extract the saved signal count from s_restore_tmp
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
+
+ // We need to call s_barrier_signal repeatedly to restore the signal
+ // count of the work group barrier. The member count is already
+ // initialized with the number of waves in the work group.
+L_BARRIER_RESTORE_LOOP:
+ s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
+ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+ s_barrier_signal -1
+ s_add_i32 s_restore_tmp, s_restore_tmp, -1
+ s_branch L_BARRIER_RESTORE_LOOP
+
+L_SKIP_BARRIER_RESTORE:
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
+ // Only restore the other fields to avoid clobbering them.
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv
+ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
+ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode
+
+ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+ get_svgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS
+ s_wait_idle
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu
+
+ // Make barrier and LDS state visible to all waves in the group.
+ // STATE_PRIV.BARRIER_COMPLETE may change after this point.
+ s_barrier_signal -2
+ s_barrier_wait -2
+
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+
+L_END_PGM:
+ s_endpgm_saved
+end
+
+function write_hwreg_to_v2(s)
+ // Copy into VGPR for later TCP store.
+ v_writelane_b32 v2, s, m0
+ s_add_u32 m0, m0, 0x1
+end
+
+
+function write_16sgpr_to_v2(s)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], ttmp13
+ s_add_u32 ttmp13, ttmp13, 0x1
+ end
+end
+
+function write_12sgpr_to_v2(s)
+ // Copy into VGPR for later TCP store.
+ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
+ v_writelane_b32 v2, s[sgpr_idx], ttmp13
+ s_add_u32 ttmp13, ttmp13, 0x1
+ end
+end
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*8
+ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*4
+ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_bitcmp1_b32 s_size, S_WAVE_SIZE
+ s_cbranch_scc1 L_ENABLE_SHIFT_W64
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
+ s_branch L_SHIFT_DONE
+L_ENABLE_SHIFT_W64:
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value)
+L_SHIFT_DONE:
+end
+
+function get_svgpr_size_bytes(s_svgpr_size_byte)
+ s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+ s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7)
+end
+
+function get_sgpr_size_bytes
+ return 512
+end
+
+function get_hwreg_size_bytes
+ return 128
+end
+
+function get_wave_size2(s_reg)
+ s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
+ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index bb26338204f4..0eabb7a8cab9 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -37,17 +37,28 @@
* gc_9_4_3:
* cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3
* sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex
+ *
+ * gc_9_5_0:
+ * cpp -DASIC_FAMILY=GC_9_5_0 cwsr_trap_handler_gfx9.asm -P -o gc_9_5_0.sp3
+ * sp3 gc_9_5_0.sp3 -hex gc_9_5_0.hex
*/
#define CHIP_VEGAM 18
#define CHIP_ARCTURUS 23
#define CHIP_ALDEBARAN 25
#define CHIP_GC_9_4_3 26
+#define CHIP_GC_9_5_0 27
var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+#if ASIC_FAMILY < CHIP_GC_9_4_3
+#define VMEM_MODIFIERS slc:1 glc:1
+#else
+#define VMEM_MODIFIERS sc0:1 nt:1
+#endif
+
/**************************************************************************/
/* variables */
/**************************************************************************/
@@ -62,7 +73,13 @@ var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 11
+var LDS_RESTORE_GRANULARITY_BYTES = 1280
+#else
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var LDS_RESTORE_GRANULARITY_BYTES = 512
+#endif
var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
@@ -557,12 +574,21 @@ if SAVE_AFTER_XNACK_ERROR
v_lshlrev_b32 v2, 2, v3
L_SAVE_LDS_LOOP_SQC:
+#if ASIC_FAMILY < CHIP_GC_9_5_0
ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40
s_waitcnt lgkmcnt(0)
-
write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset)
v_add_u32 v2, 0x200, v2
+#else
+ // gfx950 needs to save in multiple of 256 bytes.
+ ds_read_b32 v0, v2
+ s_waitcnt lgkmcnt(0)
+ write_vgprs_to_mem_with_sqc(v0, 1, s_save_buf_rsrc0, s_save_mem_offset)
+
+ v_add_u32 v2, 0x100, v2
+#endif
+
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC
@@ -581,11 +607,14 @@ end
L_SAVE_LDS_LOOP_VECTOR:
ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
s_waitcnt lgkmcnt(0)
- buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1
// s_waitcnt vmcnt(0)
// v_add_u32 v2, vcc[0:1], v2, v3
v_add_u32 v2, v2, v3
v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ s_mov_b64 exec, vcc
+#endif
s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
// restore rsrc3
@@ -748,8 +777,13 @@ L_RESTORE:
L_RESTORE_LDS_LOOP:
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
- s_add_u32 m0, m0, 256*2 // 128 DW
- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+#if ASIC_FAMILY >= CHIP_GC_9_5_0
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:512 // third 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:768 // forth 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:1024 // fifth 64DW
+#endif
+ s_add_u32 m0, m0, LDS_RESTORE_GRANULARITY_BYTES // 128/320 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, LDS_RESTORE_GRANULARITY_BYTES //mem offset increased by 128/320 DW
s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
@@ -979,17 +1013,17 @@ L_TCP_STORE_CHECK_DONE:
end
function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
- buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
end
function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
- buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
- buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
- buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
- buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+ buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS
+ buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256
+ buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2
+ buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3
s_waitcnt vmcnt(0)
end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 723f1220e1cc..693469c18c60 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1423,6 +1423,7 @@ err:
static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+ bool cache_line_size_missing,
struct kfd_gpu_cache_info *pcache_info)
{
struct amdgpu_device *adev = kdev->adev;
@@ -1437,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
@@ -1449,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* Scalar L1 Data Cache per SQC */
@@ -1460,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 64;
i++;
}
/* GL1 Data Cache per SA */
@@ -1472,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
- pcache_info[i].cache_line_size = 0;
+ if (cache_line_size_missing)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
@@ -1484,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
+ if (cache_line_size_missing && !pcache_info[i].cache_line_size)
+ pcache_info[i].cache_line_size = 128;
i++;
}
/* L3 Data Cache per GPU */
@@ -1494,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
- pcache_info[i].cache_line_size = 0;
+ pcache_info[i].cache_line_size = 64;
i++;
}
return i;
@@ -1510,6 +1520,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_tcp_size_per_cu) {
pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
pcache_info[i].cache_level = 1;
+ /* Cacheline size not available in IP discovery for gc943,gc944 */
+ pcache_info[i].cache_line_size = 128;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1521,6 +1533,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
pcache_info[i].cache_size =
adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1531,6 +1544,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
pcache_info[i].cache_level = 1;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1541,6 +1555,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gfx.config.gc_tcc_size) {
pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
pcache_info[i].cache_level = 2;
+ pcache_info[i].cache_line_size = 128;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1551,6 +1566,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
if (adev->gmc.mall_size) {
pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
pcache_info[i].cache_level = 3;
+ pcache_info[i].cache_line_size = 64;
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
@@ -1563,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;
+ bool cache_line_size_missing = false;
switch (kdev->adev->asic_type) {
case CHIP_KAVERI:
@@ -1622,6 +1639,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
*pcache_info);
@@ -1686,10 +1704,17 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ /* Cacheline size not available in IP discovery for gc11.
+ * kfd_fill_gpu_cache_info_from_gfx_config to hard code it
+ */
+ cache_line_size_missing = true;
+ fallthrough;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
num_of_cache_types =
- kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
+ kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd,
+ cache_line_size_missing,
+ *pcache_info);
break;
default:
*pcache_info = dummy_cache_info;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 312dfa84f29f..a8abc3091801 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -350,10 +350,27 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
+ struct amdgpu_device *adev = pdd->dev->adev;
+ int r;
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
+ if (!pdd->proc_ctx_cpu_ptr) {
+ r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate process context bo\n");
+ return r;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
pdd->watch_points, flags, sq_trap_en);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 924d0fd85dfb..27aa1a5b120f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -79,6 +79,7 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) ||
KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 956198da7859..a29374c86405 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -85,6 +85,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
case IP_VERSION(4, 4, 0):/* ALDEBARAN */
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
case IP_VERSION(5, 0, 0):/* NAVI10 */
case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
case IP_VERSION(5, 0, 2):/* NAVI14 */
@@ -152,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
break;
case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
case IP_VERSION(9, 4, 4): /* GC 9.4.4 */
+ case IP_VERSION(9, 5, 0): /* GC 9.5.0 */
kfd->device_info.event_interrupt_class =
&event_interrupt_class_v9_4_3;
break;
@@ -235,6 +237,9 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
*/
kfd->device_info.needs_pci_atomics = true;
kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
+ } else if (gc_version < IP_VERSION(13, 0, 0)) {
+ kfd->device_info.needs_pci_atomics = true;
+ kfd->device_info.no_atomic_fw_version = 2090;
} else {
kfd->device_info.needs_pci_atomics = true;
}
@@ -353,6 +358,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
gfx_target_version = 90402;
f2g = &gc_9_4_3_kfd2kgd;
break;
+ case IP_VERSION(9, 5, 0):
+ gfx_target_version = 90500;
+ f2g = &gc_9_4_3_kfd2kgd;
+ break;
/* Navi10 */
case IP_VERSION(10, 1, 10):
gfx_target_version = 100100;
@@ -512,6 +521,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
> KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
> KFD_CWSR_TMA_OFFSET);
@@ -564,6 +577,7 @@ static int kfd_gws_init(struct kfd_node *node)
&& kfd->mec2_fw_version >= 0x28) ||
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b) ||
@@ -635,6 +649,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
struct kfd_node *knode;
unsigned int i;
+ /*
+ * flush_work ensures that there are no outstanding
+ * work-queue items that will access interrupt_ring. New work items
+ * can't be created because we stopped interrupt handling above.
+ */
+ flush_workqueue(kfd->ih_wq);
+ destroy_workqueue(kfd->ih_wq);
+
for (i = 0; i < num_nodes; i++) {
knode = kfd->nodes[i];
device_queue_manager_uninit(knode->dqm);
@@ -730,14 +752,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
- /* For GFX9.4.3, we need special handling for VMIDs depending on
- * partition mode.
+ /* For multi-partition capable GPUs, we need special handling for VMIDs
+ * depending on partition mode.
* In CPX mode, the VMID range needs to be shared between XCDs.
* Additionally, there are 13 VMIDs (3-15) available for KFD. To
* divide them equally, we change starting VMID to 4 and not use
* VMID 3.
- * If the VMID range changes for GFX9.4.3, then this code MUST be
- * revisited.
+ * If the VMID range changes for multi-partition capable GPUs, then
+ * this code MUST be revisited.
*/
if (kfd->adev->xcp_mgr) {
partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
@@ -802,14 +824,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
/*
- * For GFX9.4.3, the KFD abstracts all partitions within a socket as
- * xGMI connected in the topology so assign a unique hive id per
- * device based on the pci device location if device is in PCIe mode.
+ * For multi-partition capable GPUs, the KFD abstracts all partitions
+ * within a socket as xGMI connected in the topology so assign a unique
+ * hive id per device based on the pci device location if device is in
+ * PCIe mode.
*/
- if (!kfd->hive_id &&
- (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
- kfd->num_nodes > 1)
+ if (!kfd->hive_id && kfd->num_nodes > 1)
kfd->hive_id = pci_dev_id(kfd->adev->pdev);
kfd->noretry = kfd->adev->gmc.noretry;
@@ -847,12 +867,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
}
- if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
- partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
kfd->num_nodes != 1) {
- /* For GFX9.4.3 and CPX mode, first XCD gets VMID range
- * 4-9 and second XCD gets VMID range 10-15.
+ /* For multi-partition capable GPUs and CPX mode, first
+ * XCD gets VMID range 4-9 and second XCD gets VMID
+ * range 10-15.
*/
node->vm_info.first_vmid_kfd = (i%2 == 0) ?
@@ -876,8 +895,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
amdgpu_amdkfd_get_local_mem_info(kfd->adev,
&node->local_mem_info, node->xcp);
- if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4))
+ if (kfd->adev->xcp_mgr)
kfd_setup_interrupt_bitmap(node, i);
/* Initialize the KFD node */
@@ -1056,21 +1074,6 @@ static int kfd_resume(struct kfd_node *node)
return err;
}
-static inline void kfd_queue_work(struct workqueue_struct *wq,
- struct work_struct *work)
-{
- int cpu, new_cpu;
-
- cpu = new_cpu = smp_processor_id();
- do {
- new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
- if (cpu_to_node(new_cpu) == numa_node_id())
- break;
- } while (cpu != new_cpu);
-
- queue_work_on(new_cpu, wq, work);
-}
-
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
@@ -1096,7 +1099,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
patched_ihre, &is_patched)
&& enqueue_ih_ring_entry(node,
is_patched ? patched_ihre : ih_ring_entry)) {
- kfd_queue_work(node->ih_wq, &node->interrupt_work);
+ queue_work(node->kfd->ih_wq, &node->interrupt_work);
spin_unlock_irqrestore(&node->interrupt_lock, flags);
return;
}
@@ -1511,6 +1514,73 @@ bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
return kfd_compute_active(node);
}
+/**
+ * kgd2kfd_vmfault_fast_path() - KFD vm page fault interrupt handling fast path for gmc v9
+ * @adev: amdgpu device
+ * @entry: vm fault interrupt vector
+ * @retry_fault: if this is retry fault
+ *
+ * retry fault -
+ * with CAM enabled, adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * adev soft_ring
+ * | gmc_v9_0_process_interrupt() worker failed to recover page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * without CAM, adev primary ring1
+ * | gmc_v9_0_process_interrupt worker failed to recvoer page fault
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * no-retry fault -
+ * adev primary ring
+ * | gmc_v9_0_process_interrupt()
+ * KFD node ih_fifo
+ * | KFD interrupt_wq worker
+ * kfd_signal_vm_fault_event
+ *
+ * fast path - After kfd_signal_vm_fault_event, gmc_v9_0_process_interrupt drop the page fault
+ * of same process, don't copy interrupt to KFD node ih_fifo.
+ * With gdb debugger enabled, need convert the retry fault to no-retry fault for
+ * debugger, cannot use the fast path.
+ *
+ * Return:
+ * true - use the fast path to handle this fault
+ * false - use normal path to handle it
+ */
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ struct kfd_process *p;
+ u32 cam_index;
+
+ if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) {
+ p = kfd_lookup_process_by_pasid(entry->pasid);
+ if (!p)
+ return true;
+
+ if (p->gpu_page_fault && !p->debug_trap_enabled) {
+ if (retry_fault && adev->irq.retry_cam_enabled) {
+ cam_index = entry->src_data[2] & 0x3ff;
+ WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
+ }
+
+ kfd_unref_process(p);
+ return true;
+ }
+
+ /*
+ * This is the first page fault, set flag and then signal user space
+ */
+ p->gpu_page_fault = true;
+ kfd_unref_process(p);
+ }
+ return false;
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c79fe9069e22..1405e8affd48 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
if (!down_read_trylock(&adev->reset_domain->sem))
return -EIO;
+ if (!pdd->proc_ctx_cpu_ptr) {
+ r = amdgpu_amdkfd_alloc_gtt_mem(adev,
+ AMDGPU_MES_PROC_CTX_SIZE,
+ &pdd->proc_ctx_bo,
+ &pdd->proc_ctx_gpu_addr,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate process context bo\n");
+ return r;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ }
+
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
queue_input.process_id = qpd->pqm->process->pasid;
queue_input.page_table_base_addr = qpd->page_table_base;
@@ -2373,6 +2388,9 @@ static int wait_on_destroy_queue(struct device_queue_manager *dqm,
q->process);
int ret = 0;
+ if (WARN_ON(!pdd))
+ return ret;
+
if (pdd->qpd.is_debug)
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 210bcc048f4c..67137e674f1d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -64,7 +64,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
qpd->sh_mem_config |=
(1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index ea3792249209..d075f24e5f9f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -748,6 +748,16 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
uint64_t *slots = page_slots(p->signal_page);
uint32_t id;
+ /*
+ * If id is valid but slot is not signaled, GPU may signal the same event twice
+ * before driver have chance to process the first interrupt, then signal slot is
+ * auto-reset after set_event wakeup the user space, just drop the second event as
+ * the application only need wakeup once.
+ */
+ if ((valid_id_bits > 31 || (1U << valid_id_bits) >= KFD_SIGNAL_EVENT_LIMIT) &&
+ partial_id < KFD_SIGNAL_EVENT_LIMIT && slots[partial_id] == UNSIGNALED_EVENT_SLOT)
+ goto out_unlock;
+
if (valid_id_bits)
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
@@ -776,6 +786,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
}
}
+out_unlock:
rcu_read_unlock();
kfd_unref_process(p);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index d46a13156ee9..0cb5c582ce7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -184,6 +184,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
} else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
}
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__GFX);
break;
case SOC15_IH_CLIENTID_VMC:
case SOC15_IH_CLIENTID_VMC1:
@@ -213,6 +214,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
} else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
}
+ amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__SDMA);
break;
default:
dev_warn(dev->adev->dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 9b6b6e882593..783c2f5a04e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -46,7 +46,7 @@
#include <linux/kfifo.h>
#include "kfd_priv.h"
-#define KFD_IH_NUM_ENTRIES 8192
+#define KFD_IH_NUM_ENTRIES 16384
static void interrupt_wq(struct work_struct *);
@@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node)
return r;
}
- node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
- if (unlikely(!node->ih_wq)) {
- kfifo_free(&node->ih_fifo);
- dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
- return -ENOMEM;
+ if (!node->kfd->ih_wq) {
+ node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND,
+ node->kfd->num_nodes);
+ if (unlikely(!node->kfd->ih_wq)) {
+ kfifo_free(&node->ih_fifo);
+ dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
+ return -ENOMEM;
+ }
}
spin_lock_init(&node->interrupt_lock);
@@ -96,16 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node)
spin_lock_irqsave(&node->interrupt_lock, flags);
node->interrupts_active = false;
spin_unlock_irqrestore(&node->interrupt_lock, flags);
-
- /*
- * flush_work ensures that there are no outstanding
- * work-queue items that will access interrupt_ring. New work items
- * can't be created because we stopped interrupt handling above.
- */
- flush_workqueue(node->ih_wq);
-
- destroy_workqueue(node->ih_wq);
-
kfifo_free(&node->ih_fifo);
}
@@ -114,55 +107,48 @@ void kfd_interrupt_exit(struct kfd_node *node)
*/
bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
{
- int count;
-
- count = kfifo_in(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
- if (count != node->kfd->device_info.ih_ring_entry_size) {
- dev_dbg_ratelimited(node->adev->dev,
- "Interrupt ring overflow, dropping interrupt %d\n",
- count);
+ if (kfifo_is_full(&node->ih_fifo)) {
+ dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n",
+ node->node_id);
return false;
}
+ kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size);
return true;
}
/*
* Assumption: single reader/writer. This function is not re-entrant
*/
-static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry)
+static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry)
{
int count;
- count = kfifo_out(&node->ih_fifo, ih_ring_entry,
- node->kfd->device_info.ih_ring_entry_size);
-
- WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size);
+ if (kfifo_is_empty(&node->ih_fifo))
+ return false;
+ count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry,
+ node->kfd->device_info.ih_ring_entry_size);
+ WARN_ON(count != node->kfd->device_info.ih_ring_entry_size);
return count == node->kfd->device_info.ih_ring_entry_size;
}
static void interrupt_wq(struct work_struct *work)
{
- struct kfd_node *dev = container_of(work, struct kfd_node,
- interrupt_work);
- uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+ struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work);
+ uint32_t *ih_ring_entry;
unsigned long start_jiffies = jiffies;
- if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
- dev_err_once(dev->adev->dev, "Ring entry too small\n");
- return;
- }
-
- while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
+ while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) {
dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
ih_ring_entry);
+ kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size);
+
if (time_is_before_jiffies(start_jiffies + HZ)) {
/* If we spent more than a second processing signals,
* reschedule the worker to avoid soft-lockup warnings
*/
- queue_work(dev->ih_wq, &dev->interrupt_work);
+ queue_work(dev->kfd->ih_wq, &dev->interrupt_work);
break;
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index eacfeb32f35d..4b275937d05e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -306,7 +306,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
spage = migrate_pfn_to_page(migrate->src[i]);
if (spage && !is_zone_device_page(spage)) {
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
- DMA_TO_DEVICE);
+ DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, src[i]);
if (r) {
dev_err(dev, "%s: fail %d dma_map_page\n",
@@ -629,7 +629,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
goto out_oom;
}
- dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
r = dma_mapping_error(dev, dst[i]);
if (r) {
dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 84e8ea3a8a0c..ff417d5361c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -78,7 +78,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
@@ -301,7 +302,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_ctx_save_control = 0;
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
@@ -885,7 +887,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_v9_4_3;
mqd->load_mqd = load_mqd_v9_4_3;
mqd->update_mqd = update_mqd_v9_4_3;
@@ -909,8 +912,10 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
+ mqd->check_preemption_failed = check_preemption_failed;
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_hiq_v9_4_3;
mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 37930629edc5..4984b41cd372 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -28,6 +28,10 @@
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
+#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
+#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
+#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
+
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
{
@@ -40,7 +44,7 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
- bool *over_subscription)
+ int *over_subscription)
{
unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
unsigned int map_queue_size;
@@ -58,17 +62,20 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
- *over_subscription = false;
+ *over_subscription = 0;
if (node->max_proc_per_quantum > 1)
max_proc_per_quantum = node->max_proc_per_quantum;
- if ((process_count > max_proc_per_quantum) ||
- compute_queue_count > get_cp_queues_num(pm->dqm) ||
- gws_queue_count > 1) {
- *over_subscription = true;
+ if (process_count > max_proc_per_quantum)
+ *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT;
+ if (compute_queue_count > get_cp_queues_num(pm->dqm))
+ *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
+ if (gws_queue_count > 1)
+ *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
+
+ if (*over_subscription)
dev_dbg(dev, "Over subscribed runlist\n");
- }
map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
@@ -89,7 +96,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
unsigned int **rl_buffer,
uint64_t *rl_gpu_buffer,
unsigned int *rl_buffer_size,
- bool *is_over_subscription)
+ int *is_over_subscription)
{
struct kfd_node *node = pm->dqm->dev;
struct device *dev = node->adev->dev;
@@ -134,7 +141,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
struct qcm_process_device *qpd;
struct queue *q;
struct kernel_queue *kq;
- bool is_over_subscription;
+ int is_over_subscription;
rl_wptr = retval = processes_mapped = 0;
@@ -213,15 +220,20 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (is_over_subscription) {
if (!pm->is_over_subscription)
- dev_warn(
- dev,
- "Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+ dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s. Expect reduced ROCm performance.\n",
+ is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
+ " too many processes." : "",
+ is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
+ " too many queues." : "",
+ is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
+ " multiple processes using cooperative launch." : "");
+
retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
}
- pm->is_over_subscription = is_over_subscription;
+ pm->is_over_subscription = !!is_over_subscription;
for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
pr_debug("0x%2X ", rl_buffer[i]);
@@ -248,7 +260,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
default:
if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
pm->pmf = &kfd_aldebaran_pm_funcs;
else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
pm->pmf = &kfd_v9_pm_funcs;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9e5ca0b93b2a..d8cd913aa772 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -32,7 +32,7 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
-#include <linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <linux/kfifo.h>
#include <linux/seq_file.h>
@@ -207,7 +207,8 @@ enum cache_policy {
#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \
- (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)))
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)))
struct kfd_node;
@@ -273,7 +274,6 @@ struct kfd_node {
/* Interrupts */
struct kfifo ih_fifo;
- struct workqueue_struct *ih_wq;
struct work_struct interrupt_work;
spinlock_t interrupt_lock;
@@ -366,6 +366,8 @@ struct kfd_dev {
struct kfd_node *nodes[MAX_KFD_NODES];
unsigned int num_nodes;
+ struct workqueue_struct *ih_wq;
+
/* Kernel doorbells for KFD device */
struct amdgpu_bo *doorbells;
@@ -1002,6 +1004,9 @@ struct kfd_process {
struct semaphore runtime_enable_sema;
bool is_runtime_retry;
struct kfd_runtime_info runtime_info;
+
+ /* if gpu page fault sent to KFD */
+ bool gpu_page_fault;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -1150,7 +1155,8 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
uint32_t i;
if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0))
return dev->nodes[0];
for (i = 0; i < dev->num_nodes; i++)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 87cd52cf4ee9..083f83c94531 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfd_free_process_doorbells(pdd->dev->kfd, pdd);
- if (pdd->dev->kfd->shared_resources.enable_mes)
+ if (pdd->dev->kfd->shared_resources.enable_mes &&
+ pdd->proc_ctx_cpu_ptr)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
&pdd->proc_ctx_bo);
/*
@@ -1159,7 +1160,8 @@ static void kfd_process_wq_release(struct work_struct *work)
*/
synchronize_rcu();
ef = rcu_access_pointer(p->ef);
- dma_fence_signal(ef);
+ if (ef)
+ dma_fence_signal(ef);
kfd_process_remove_sysfs(p);
@@ -1608,7 +1610,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
- int retval = 0;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL;
@@ -1632,21 +1633,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
pdd->user_gpu_id = dev->id;
atomic64_set(&pdd->evict_duration_counter, 0);
- if (dev->kfd->shared_resources.enable_mes) {
- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
- AMDGPU_MES_PROC_CTX_SIZE,
- &pdd->proc_ctx_bo,
- &pdd->proc_ctx_gpu_addr,
- &pdd->proc_ctx_cpu_ptr,
- false);
- if (retval) {
- dev_err(dev->adev->dev,
- "failed to allocate process context bo\n");
- goto err_free_pdd;
- }
- memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
- }
-
p->pdds[p->n_pdds++] = pdd;
if (kfd_dbg_is_per_vmid_supported(pdd->dev))
pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
@@ -1658,10 +1644,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
idr_init(&pdd->alloc_idr);
return pdd;
-
-err_free_pdd:
- kfree(pdd);
- return NULL;
}
/**
@@ -2146,10 +2128,11 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
irq_drain_fence[3] = pdd->process->pasid;
/*
- * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+ * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3]
*/
if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) {
node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
irq_drain_fence[3] |= node_id << 16;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index c76db22a1000..9df56f8e09f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -131,8 +131,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
!dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
@@ -197,6 +198,7 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
if (pqn->q->gws) {
if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
!dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(
pqm->process->kgd_process_info, pqn->q->gws);
@@ -212,13 +214,17 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
void pqm_uninit(struct process_queue_manager *pqm)
{
struct process_queue_node *pqn, *next;
- struct kfd_process_device *pdd;
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
if (pqn->q) {
- pdd = kfd_get_process_device_data(pqn->q->device, pqm->process);
- kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
- kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device,
+ pqm->process);
+ if (pdd) {
+ kfd_queue_unref_bo_vas(pdd, &pqn->q->properties);
+ kfd_queue_release_buffers(pdd, &pqn->q->properties);
+ } else {
+ WARN_ON(!pdd);
+ }
pqm_clean_queue_resource(pqm, pqn);
}
@@ -316,11 +322,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
unsigned int max_queues = 127; /* HWS limit */
/*
- * On GFX 9.4.3, increase the number of queues that
- * can be created to 255. No HWS limit on GFX 9.4.3.
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
*/
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
max_queues = 255;
q = NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index ad29634f8b44..ecccd7adbab4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -394,7 +394,8 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */
gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */
- gfxv == 90008) /* GFX_VERSION_ARCTURUS */
+ gfxv == 90008 || /* GFX_VERSION_ARCTURUS */
+ gfxv == 90500)
vgpr_size = 0x80000;
else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */
gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */
@@ -405,9 +406,10 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
return vgpr_size;
}
-#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \
+#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \
(kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
- LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
+ (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\
+ HWREG_SIZE_PER_CU)
#define CNTL_STACK_BYTES_PER_WAVE(gfxv) \
((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/
@@ -431,7 +433,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
: cu_num * 32;
- wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE);
+ wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE);
ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
PAGE_SIZE);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3e2911895c74..bd3e20d981e0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1195,6 +1195,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
struct kfd_node *bo_node;
uint32_t flags = prange->flags;
uint32_t mapping_flags = 0;
+ uint32_t gc_ip_version = KFD_GC_VERSION(node);
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
@@ -1204,7 +1205,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_node = prange->svm_bo->node;
- switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) {
+ switch (gc_ip_version) {
case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_node == node) {
@@ -1241,8 +1242,10 @@ svm_range_get_pte_flags(struct kfd_node *node,
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
if (ext_coherent)
- mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC;
+ mtype_local = (gc_ip_version < IP_VERSION(9, 5, 0) && !node->adev->rev_id) ?
+ AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_CC;
else
mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
@@ -1257,9 +1260,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
*/
else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent)
mapping_flags |= AMDGPU_VM_MTYPE_NC;
- /* PCIe P2P or extended system scope coherence */
- else
+ /* PCIe P2P on GPUs pre-9.5.0 */
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) &&
+ !svm_nodes_in_same_hive(bo_node, node))
mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ /* Other remote memory */
+ else
+ mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the APU */
} else if (node->adev->flags & AMD_IS_APU) {
/* On NUMA systems, locality is determined per-page
@@ -1271,7 +1278,10 @@ svm_range_get_pte_flags(struct kfd_node *node,
mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the dGPU */
} else {
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ if (gc_ip_version < IP_VERSION(9, 5, 0))
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
}
break;
case IP_VERSION(12, 0, 0):
@@ -1299,7 +1309,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
pte_flags = AMDGPU_PTE_VALID;
pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
- if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0))
+ if (gc_ip_version >= IP_VERSION(12, 0, 0))
pte_flags |= AMDGPU_PTE_IS_PTE;
pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 9476e30d6baa..ceb9fb475ef1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1714,7 +1714,8 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
else
mode = UNKNOWN_MEMORY_PARTITION_MODE;
@@ -1776,7 +1777,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
int gpu_processor_id;
- struct kfd_cache_properties *props_ext;
+ struct kfd_cache_properties *props_ext = NULL;
int num_of_entries = 0;
int num_of_cache_types = 0;
struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 11e3f2f3b174..abd3b6564373 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -8,6 +8,8 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
depends on BROKEN || !CC_IS_CLANG || ARM64 || LOONGARCH || RISCV || SPARC64 || X86_64
+ select CEC_CORE
+ select CEC_NOTIFIER
select SND_HDA_COMPONENT if SND_HDA_CORE
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && !(CC_IS_CLANG && (ARM64 || LOONGARCH || RISCV))
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f0a6816709ca..0ec178ca7434 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -93,10 +93,12 @@
#include <drm/drm_fourcc.h>
#include <drm/drm_edid.h>
#include <drm/drm_eld.h>
+#include <drm/drm_utils.h>
#include <drm/drm_vblank.h>
#include <drm/drm_audio_component.h>
#include <drm/drm_gem_atomic_helper.h>
+#include <media/cec-notifier.h>
#include <acpi/video.h>
#include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
@@ -955,13 +957,13 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params)
}
}
-static int dm_set_clockgating_state(void *handle,
+static int dm_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dm_set_powergating_state(void *handle,
+static int dm_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1036,8 +1038,10 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port,
continue;
*enabled = true;
+ mutex_lock(&connector->eld_mutex);
ret = drm_eld_size(connector->eld);
memcpy(buf, connector->eld, min(max_bytes, ret));
+ mutex_unlock(&connector->eld_mutex);
break;
}
@@ -2152,9 +2156,13 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- adev->dm.secure_display_ctxs = amdgpu_dm_crtc_secure_display_create_contexts(adev);
- if (!adev->dm.secure_display_ctxs)
+ amdgpu_dm_crtc_secure_display_create_contexts(adev);
+ if (!adev->dm.secure_display_ctx.crtc_ctx)
DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n");
+
+ if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(4, 0, 1))
+ adev->dm.secure_display_ctx.support_mul_roi = true;
+
#endif
DRM_DEBUG_DRIVER("KMS initialized.\n");
@@ -2197,15 +2205,15 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
amdgpu_dm_destroy_drm_device(&adev->dm);
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- if (adev->dm.secure_display_ctxs) {
+ if (adev->dm.secure_display_ctx.crtc_ctx) {
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- if (adev->dm.secure_display_ctxs[i].crtc) {
- flush_work(&adev->dm.secure_display_ctxs[i].notify_ta_work);
- flush_work(&adev->dm.secure_display_ctxs[i].forward_roi_work);
+ if (adev->dm.secure_display_ctx.crtc_ctx[i].crtc) {
+ flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].notify_ta_work);
+ flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].forward_roi_work);
}
}
- kfree(adev->dm.secure_display_ctxs);
- adev->dm.secure_display_ctxs = NULL;
+ kfree(adev->dm.secure_display_ctx.crtc_ctx);
+ adev->dm.secure_display_ctx.crtc_ctx = NULL;
}
#endif
if (adev->dm.hdcp_workqueue) {
@@ -2338,7 +2346,8 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
return 0;
}
- r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, "%s", fw_name_dmcu);
+ r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name_dmcu);
if (r == -ENODEV) {
/* DMCU firmware is not necessary, so don't raise a fuss if it's missing */
DRM_DEBUG_KMS("dm: DMCU firmware not found\n");
@@ -2746,6 +2755,48 @@ out_fail:
mutex_unlock(&mgr->lock);
}
+void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector)
+{
+ struct cec_notifier *n = aconnector->notifier;
+
+ if (!n)
+ return;
+
+ cec_notifier_phys_addr_invalidate(n);
+}
+
+void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector)
+{
+ struct drm_connector *connector = &aconnector->base;
+ struct cec_notifier *n = aconnector->notifier;
+
+ if (!n)
+ return;
+
+ cec_notifier_set_phys_addr(n,
+ connector->display_info.source_physical_address);
+}
+
+static void s3_handle_hdmi_cec(struct drm_device *ddev, bool suspend)
+{
+ struct amdgpu_dm_connector *aconnector;
+ struct drm_connector *connector;
+ struct drm_connector_list_iter conn_iter;
+
+ drm_connector_list_iter_begin(ddev, &conn_iter);
+ drm_for_each_connector_iter(connector, &conn_iter) {
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
+ aconnector = to_amdgpu_dm_connector(connector);
+ if (suspend)
+ hdmi_cec_unset_edid(aconnector);
+ else
+ hdmi_cec_set_edid(aconnector);
+ }
+ drm_connector_list_iter_end(&conn_iter);
+}
+
static void s3_handle_mst(struct drm_device *dev, bool suspend)
{
struct amdgpu_dm_connector *aconnector;
@@ -3017,6 +3068,8 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block)
if (IS_ERR(adev->dm.cached_state))
return PTR_ERR(adev->dm.cached_state);
+ s3_handle_hdmi_cec(adev_to_drm(adev), true);
+
s3_handle_mst(adev_to_drm(adev), true);
amdgpu_dm_irq_suspend(adev);
@@ -3289,6 +3342,8 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
*/
amdgpu_dm_irq_resume_early(adev);
+ s3_handle_hdmi_cec(ddev, false);
+
/* On resume we need to rewrite the MSTM control bits to enable MST*/
s3_handle_mst(ddev, false);
@@ -3457,6 +3512,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
struct drm_connector *conn_base;
struct amdgpu_device *adev;
struct drm_luminance_range_info *luminance_range;
+ int min_input_signal_override;
if (aconnector->bl_idx == -1 ||
aconnector->dc_link->connector_signal != SIGNAL_TYPE_EDP)
@@ -3481,6 +3537,8 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
caps->aux_support = false;
else if (amdgpu_backlight == 1)
caps->aux_support = true;
+ if (caps->aux_support)
+ aconnector->dc_link->backlight_control_type = BACKLIGHT_CONTROL_AMD_AUX;
luminance_range = &conn_base->display_info.luminance_range;
@@ -3491,6 +3549,10 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
caps->aux_min_input_signal = 0;
caps->aux_max_input_signal = 512;
}
+
+ min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid);
+ if (min_input_signal_override >= 0)
+ caps->min_input_signal = min_input_signal_override;
}
void amdgpu_dm_update_connector_after_detect(
@@ -3596,6 +3658,7 @@ void amdgpu_dm_update_connector_after_detect(
dc_sink_retain(aconnector->dc_sink);
if (sink->dc_edid.length == 0) {
aconnector->drm_edid = NULL;
+ hdmi_cec_unset_edid(aconnector);
if (aconnector->dc_link->aux_mode) {
drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
}
@@ -3605,6 +3668,7 @@ void amdgpu_dm_update_connector_after_detect(
aconnector->drm_edid = drm_edid_alloc(edid, sink->dc_edid.length);
drm_edid_connector_update(connector, aconnector->drm_edid);
+ hdmi_cec_set_edid(aconnector);
if (aconnector->dc_link->aux_mode)
drm_dp_cec_attach(&aconnector->dm_dp_aux.aux,
connector->display_info.source_physical_address);
@@ -3621,6 +3685,7 @@ void amdgpu_dm_update_connector_after_detect(
amdgpu_dm_update_freesync_caps(connector, aconnector->drm_edid);
update_connector_ext_caps(aconnector);
} else {
+ hdmi_cec_unset_edid(aconnector);
drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
amdgpu_dm_update_freesync_caps(connector, NULL);
aconnector->num_modes = 0;
@@ -5304,7 +5369,8 @@ static int dm_init_microcode(struct amdgpu_device *adev)
/* ASIC doesn't support DMUB. */
return 0;
}
- r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, "%s", fw_name_dmub);
+ r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name_dmub);
return r;
}
@@ -5520,8 +5586,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
const u64 tiling_flags,
struct dc_plane_info *plane_info,
struct dc_plane_address *address,
- bool tmz_surface,
- bool force_disable_dcc)
+ bool tmz_surface)
{
const struct drm_framebuffer *fb = plane_state->fb;
const struct amdgpu_framebuffer *afb =
@@ -5620,7 +5685,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev,
&plane_info->tiling_info,
&plane_info->plane_size,
&plane_info->dcc, address,
- tmz_surface, force_disable_dcc);
+ tmz_surface);
if (ret)
return ret;
@@ -5641,7 +5706,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
struct dc_scaling_info scaling_info;
struct dc_plane_info plane_info;
int ret;
- bool force_disable_dcc = false;
ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, plane_state, &scaling_info);
if (ret)
@@ -5652,13 +5716,11 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
dc_plane_state->clip_rect = scaling_info.clip_rect;
dc_plane_state->scaling_quality = scaling_info.scaling_quality;
- force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend;
ret = fill_dc_plane_info_and_addr(adev, plane_state,
afb->tiling_flags,
&plane_info,
&dc_plane_state->address,
- afb->tmz_surface,
- force_disable_dcc);
+ afb->tmz_surface);
if (ret)
return ret;
@@ -7040,6 +7102,7 @@ static void amdgpu_dm_connector_unregister(struct drm_connector *connector)
if (amdgpu_dm_should_create_sysfs(amdgpu_dm_connector))
sysfs_remove_group(&connector->kdev->kobj, &amdgpu_group);
+ cec_notifier_conn_unregister(amdgpu_dm_connector->notifier);
drm_dp_aux_unregister(&amdgpu_dm_connector->dm_dp_aux.aux);
}
@@ -8276,6 +8339,27 @@ create_i2c(struct ddc_service *ddc_service,
return i2c;
}
+int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector)
+{
+ struct cec_connector_info conn_info;
+ struct drm_device *ddev = aconnector->base.dev;
+ struct device *hdmi_dev = ddev->dev;
+
+ if (amdgpu_dc_debug_mask & DC_DISABLE_HDMI_CEC) {
+ drm_info(ddev, "HDMI-CEC feature masked\n");
+ return -EINVAL;
+ }
+
+ cec_fill_conn_info_from_drm(&conn_info, &aconnector->base);
+ aconnector->notifier =
+ cec_notifier_conn_register(hdmi_dev, NULL, &conn_info);
+ if (!aconnector->notifier) {
+ drm_err(ddev, "Failed to create cec notifier\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
/*
* Note: this function assumes that dc_link_detect() was called for the
@@ -8339,6 +8423,10 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
drm_connector_attach_encoder(
&aconnector->base, &aencoder->base);
+ if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+ connector_type == DRM_MODE_CONNECTOR_HDMIB)
+ amdgpu_dm_initialize_hdmi_connector(aconnector);
+
if (connector_type == DRM_MODE_CONNECTOR_DisplayPort
|| connector_type == DRM_MODE_CONNECTOR_eDP)
amdgpu_dm_initialize_dp_connector(dm, aconnector, link->link_index);
@@ -8398,16 +8486,6 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
struct amdgpu_crtc *acrtc,
struct dm_crtc_state *acrtc_state)
{
- /*
- * We have no guarantee that the frontend index maps to the same
- * backend index - some even map to more than one.
- *
- * TODO: Use a different interrupt or check DC itself for the mapping.
- */
- int irq_type =
- amdgpu_display_crtc_idx_to_irq_type(
- adev,
- acrtc->crtc_id);
struct drm_vblank_crtc_config config = {0};
struct dc_crtc_timing *timing;
int offdelay;
@@ -8433,28 +8511,7 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
drm_crtc_vblank_on_config(&acrtc->base,
&config);
-
- amdgpu_irq_get(
- adev,
- &adev->pageflip_irq,
- irq_type);
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- amdgpu_irq_get(
- adev,
- &adev->vline0_irq,
- irq_type);
-#endif
} else {
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
- amdgpu_irq_put(
- adev,
- &adev->vline0_irq,
- irq_type);
-#endif
- amdgpu_irq_put(
- adev,
- &adev->pageflip_irq,
- irq_type);
drm_crtc_vblank_off(&acrtc->base);
}
}
@@ -8925,6 +8982,7 @@ static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach,
struct replay_settings *pr = &acrtc_state->stream->link->replay_settings;
struct amdgpu_dm_connector *aconn =
(struct amdgpu_dm_connector *)acrtc_state->stream->dm_stream_context;
+ bool vrr_active = amdgpu_dm_crtc_vrr_active(acrtc_state);
if (acrtc_state->update_type > UPDATE_TYPE_FAST) {
if (pr->config.replay_supported && !pr->replay_feature_enabled)
@@ -8951,14 +9009,15 @@ static void amdgpu_dm_enable_self_refresh(struct amdgpu_crtc *acrtc_attach,
* adequate number of fast atomic commits to notify KMD
* of update events. See `vblank_control_worker()`.
*/
- if (acrtc_attach->dm_irq_params.allow_sr_entry &&
+ if (!vrr_active &&
+ acrtc_attach->dm_irq_params.allow_sr_entry &&
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
!amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) &&
#endif
(current_ts - psr->psr_dirty_rects_change_timestamp_ns) > 500000000) {
if (pr->replay_feature_enabled && !pr->replay_allow_active)
amdgpu_dm_replay_enable(acrtc_state->stream, true);
- if (psr->psr_version >= DC_PSR_VERSION_SU_1 &&
+ if (psr->psr_version == DC_PSR_VERSION_SU_1 &&
!psr->psr_allow_active && !aconn->disallow_edp_enter_psr)
amdgpu_dm_psr_enable(acrtc_state->stream);
}
@@ -9095,7 +9154,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
afb->tiling_flags,
&bundle->plane_infos[planes_count],
&bundle->flip_addrs[planes_count].address,
- afb->tmz_surface, false);
+ afb->tmz_surface);
drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n",
new_plane_state->plane->index,
@@ -9129,7 +9188,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
acrtc_state->stream->link->psr_settings.psr_dirty_rects_change_timestamp_ns =
timestamp_ns;
if (acrtc_state->stream->link->psr_settings.psr_allow_active)
- amdgpu_dm_psr_disable(acrtc_state->stream);
+ amdgpu_dm_psr_disable(acrtc_state->stream, true);
mutex_unlock(&dm->dc_lock);
}
}
@@ -9295,11 +9354,11 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
bundle->stream_update.abm_level = &acrtc_state->abm_level;
mutex_lock(&dm->dc_lock);
- if (acrtc_state->update_type > UPDATE_TYPE_FAST) {
+ if ((acrtc_state->update_type > UPDATE_TYPE_FAST) || vrr_active) {
if (acrtc_state->stream->link->replay_settings.replay_allow_active)
amdgpu_dm_replay_disable(acrtc_state->stream);
if (acrtc_state->stream->link->psr_settings.psr_allow_active)
- amdgpu_dm_psr_disable(acrtc_state->stream);
+ amdgpu_dm_psr_disable(acrtc_state->stream, true);
}
mutex_unlock(&dm->dc_lock);
@@ -10058,14 +10117,19 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
if (amdgpu_dm_is_valid_crc_source(cur_crc_src)) {
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
if (amdgpu_dm_crc_window_is_activated(crtc)) {
+ uint8_t cnt;
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- acrtc->dm_irq_params.window_param.update_win = true;
-
- /**
- * It takes 2 frames for HW to stably generate CRC when
- * resuming from suspend, so we set skip_frame_cnt 2.
- */
- acrtc->dm_irq_params.window_param.skip_frame_cnt = 2;
+ for (cnt = 0; cnt < MAX_CRC_WINDOW_NUM; cnt++) {
+ if (acrtc->dm_irq_params.window_param[cnt].enable) {
+ acrtc->dm_irq_params.window_param[cnt].update_win = true;
+
+ /**
+ * It takes 2 frames for HW to stably generate CRC when
+ * resuming from suspend, so we set skip_frame_cnt 2.
+ */
+ acrtc->dm_irq_params.window_param[cnt].skip_frame_cnt = 2;
+ }
+ }
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
}
#endif
@@ -11153,8 +11217,8 @@ dm_get_plane_scale(struct drm_plane_state *plane_state,
int plane_src_w, plane_src_h;
dm_get_oriented_plane_size(plane_state, &plane_src_w, &plane_src_h);
- *out_plane_scale_w = plane_state->crtc_w * 1000 / plane_src_w;
- *out_plane_scale_h = plane_state->crtc_h * 1000 / plane_src_h;
+ *out_plane_scale_w = plane_src_w ? plane_state->crtc_w * 1000 / plane_src_w : 0;
+ *out_plane_scale_h = plane_src_h ? plane_state->crtc_h * 1000 / plane_src_h : 0;
}
/*
@@ -11408,6 +11472,25 @@ static int dm_crtc_get_cursor_mode(struct amdgpu_device *adev,
return 0;
}
+static bool amdgpu_dm_crtc_mem_type_changed(struct drm_device *dev,
+ struct drm_atomic_state *state,
+ struct drm_crtc_state *crtc_state)
+{
+ struct drm_plane *plane;
+ struct drm_plane_state *new_plane_state, *old_plane_state;
+
+ drm_for_each_plane_mask(plane, dev, crtc_state->plane_mask) {
+ new_plane_state = drm_atomic_get_plane_state(state, plane);
+ old_plane_state = drm_atomic_get_plane_state(state, plane);
+
+ if (old_plane_state->fb && new_plane_state->fb &&
+ get_mem_type(old_plane_state->fb) != get_mem_type(new_plane_state->fb))
+ return true;
+ }
+
+ return false;
+}
+
/**
* amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM.
*
@@ -11605,10 +11688,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
/* Remove exiting planes if they are modified */
for_each_oldnew_plane_in_descending_zpos(state, plane, old_plane_state, new_plane_state) {
- if (old_plane_state->fb && new_plane_state->fb &&
- get_mem_type(old_plane_state->fb) !=
- get_mem_type(new_plane_state->fb))
- lock_and_validation_needed = true;
ret = dm_update_plane_state(dc, state, plane,
old_plane_state,
@@ -11903,9 +11982,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
/*
* Only allow async flips for fast updates that don't change
- * the FB pitch, the DCC state, rotation, etc.
+ * the FB pitch, the DCC state, rotation, mem_type, etc.
*/
- if (new_crtc_state->async_flip && lock_and_validation_needed) {
+ if (new_crtc_state->async_flip &&
+ (lock_and_validation_needed ||
+ amdgpu_dm_crtc_mem_type_changed(dev, state, new_crtc_state))) {
drm_dbg_atomic(crtc->dev,
"[CRTC:%d:%s] async flips are only supported for fast updates\n",
crtc->base.id, crtc->name);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 6464a8378387..d2703ca7dff3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -541,12 +541,12 @@ struct amdgpu_display_manager {
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
/**
- * @secure_display_ctxs:
+ * @secure_display_ctx:
*
- * Store the ROI information and the work_struct to command dmub and psp for
- * all crtcs.
+ * Store secure display relevant info. e.g. the ROI information
+ * , the work_struct to command dmub, etc.
*/
- struct secure_display_context *secure_display_ctxs;
+ struct secure_display_context secure_display_ctx;
#endif
/**
* @hpd_rx_offload_wq:
@@ -671,6 +671,8 @@ struct amdgpu_dm_connector {
uint32_t connector_id;
int bl_idx;
+ struct cec_notifier *notifier;
+
/* we need to mind the EDID between detect
and get modes due to analog/digital/tvencoder */
const struct drm_edid *drm_edid;
@@ -697,6 +699,8 @@ struct amdgpu_dm_connector {
struct drm_dp_mst_port *mst_output_port;
struct amdgpu_dm_connector *mst_root;
struct drm_dp_aux *dsc_aux;
+ uint32_t mst_local_bw;
+ uint16_t vc_full_pbn;
struct mutex handle_mst_msg_ready;
/* TODO see if we can merge with ddc_bus or make a dm_connector */
@@ -1010,4 +1014,8 @@ void dm_free_gpu_mem(struct amdgpu_device *adev,
bool amdgpu_dm_is_headless(struct amdgpu_device *adev);
+void hdmi_cec_set_edid(struct amdgpu_dm_connector *aconnector);
+void hdmi_cec_unset_edid(struct amdgpu_dm_connector *aconnector);
+int amdgpu_dm_initialize_hdmi_connector(struct amdgpu_dm_connector *aconnector);
+
#endif /* __AMDGPU_DM_H__ */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index f936a35fa9eb..033bd817d871 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -30,6 +30,7 @@
#include "amdgpu_dm.h"
#include "dc.h"
#include "amdgpu_securedisplay.h"
+#include "amdgpu_dm_psr.h"
static const char *const pipe_crc_sources[] = {
"none",
@@ -83,45 +84,274 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc,
}
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+static void update_phy_id_mapping(struct amdgpu_device *adev)
+{
+ struct drm_device *ddev = adev_to_drm(adev);
+ struct amdgpu_display_manager *dm = &adev->dm;
+ struct drm_connector *connector;
+ struct amdgpu_dm_connector *aconnector;
+ struct amdgpu_dm_connector *sort_connector[AMDGPU_DM_MAX_CRTC] = {NULL};
+ struct drm_connector_list_iter iter;
+ uint8_t idx = 0, idx_2 = 0, connector_cnt = 0;
+
+ dm->secure_display_ctx.phy_mapping_updated = false;
+
+ mutex_lock(&ddev->mode_config.mutex);
+ drm_connector_list_iter_begin(ddev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->status != connector_status_connected)
+ continue;
+
+ if (idx >= AMDGPU_DM_MAX_CRTC) {
+ DRM_WARN("%s connected connectors exceed max crtc\n", __func__);
+ mutex_unlock(&ddev->mode_config.mutex);
+ return;
+ }
+
+ aconnector = to_amdgpu_dm_connector(connector);
+
+ sort_connector[idx] = aconnector;
+ idx++;
+ connector_cnt++;
+ }
+ drm_connector_list_iter_end(&iter);
+
+ /* sort connectors by link_enc_hw_instance first */
+ for (idx = connector_cnt; idx > 1 ; idx--) {
+ for (idx_2 = 0; idx_2 < (idx - 1); idx_2++) {
+ if (sort_connector[idx_2]->dc_link->link_enc_hw_inst >
+ sort_connector[idx_2 + 1]->dc_link->link_enc_hw_inst)
+ swap(sort_connector[idx_2], sort_connector[idx_2 + 1]);
+ }
+ }
+
+ /*
+ * Sort mst connectors by RAD. mst connectors with the same enc_hw_instance are already
+ * sorted together above.
+ */
+ for (idx = 0; idx < connector_cnt; /*Do nothing*/) {
+ if (sort_connector[idx]->mst_root) {
+ uint8_t i, j, k;
+ uint8_t mst_con_cnt = 1;
+
+ for (idx_2 = (idx + 1); idx_2 < connector_cnt; idx_2++) {
+ if (sort_connector[idx_2]->mst_root == sort_connector[idx]->mst_root)
+ mst_con_cnt++;
+ else
+ break;
+ }
+
+ for (i = mst_con_cnt; i > 1; i--) {
+ for (j = idx; j < (idx + i - 2); j++) {
+ int mstb_lct = sort_connector[j]->mst_output_port->parent->lct;
+ int next_mstb_lct = sort_connector[j + 1]->mst_output_port->parent->lct;
+ u8 *rad;
+ u8 *next_rad;
+ bool swap = false;
+
+ /* Sort by mst tree depth first. Then compare RAD if depth is the same*/
+ if (mstb_lct > next_mstb_lct) {
+ swap = true;
+ } else if (mstb_lct == next_mstb_lct) {
+ if (mstb_lct == 1) {
+ if (sort_connector[j]->mst_output_port->port_num > sort_connector[j + 1]->mst_output_port->port_num)
+ swap = true;
+ } else if (mstb_lct > 1) {
+ rad = sort_connector[j]->mst_output_port->parent->rad;
+ next_rad = sort_connector[j + 1]->mst_output_port->parent->rad;
+
+ for (k = 0; k < mstb_lct - 1; k++) {
+ int shift = (k % 2) ? 0 : 4;
+ int port_num = (rad[k / 2] >> shift) & 0xf;
+ int next_port_num = (next_rad[k / 2] >> shift) & 0xf;
+
+ if (port_num > next_port_num) {
+ swap = true;
+ break;
+ }
+ }
+ } else {
+ DRM_ERROR("MST LCT shouldn't be set as < 1");
+ mutex_unlock(&ddev->mode_config.mutex);
+ return;
+ }
+ }
+
+ if (swap)
+ swap(sort_connector[j], sort_connector[j + 1]);
+ }
+ }
+
+ idx += mst_con_cnt;
+ } else {
+ idx++;
+ }
+ }
+
+ /* Complete sorting. Assign relavant result to dm->secure_display_ctx.phy_id_mapping[]*/
+ memset(dm->secure_display_ctx.phy_id_mapping, 0, sizeof(dm->secure_display_ctx.phy_id_mapping));
+ for (idx = 0; idx < connector_cnt; idx++) {
+ aconnector = sort_connector[idx];
+
+ dm->secure_display_ctx.phy_id_mapping[idx].assigned = true;
+ dm->secure_display_ctx.phy_id_mapping[idx].is_mst = false;
+ dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst = aconnector->dc_link->link_enc_hw_inst;
+
+ if (sort_connector[idx]->mst_root) {
+ dm->secure_display_ctx.phy_id_mapping[idx].is_mst = true;
+ dm->secure_display_ctx.phy_id_mapping[idx].lct = aconnector->mst_output_port->parent->lct;
+ dm->secure_display_ctx.phy_id_mapping[idx].port_num = aconnector->mst_output_port->port_num;
+ memcpy(dm->secure_display_ctx.phy_id_mapping[idx].rad,
+ aconnector->mst_output_port->parent->rad, sizeof(aconnector->mst_output_port->parent->rad));
+ }
+ }
+ mutex_unlock(&ddev->mode_config.mutex);
+
+ dm->secure_display_ctx.phy_id_mapping_cnt = connector_cnt;
+ dm->secure_display_ctx.phy_mapping_updated = true;
+}
+
+static bool get_phy_id(struct amdgpu_display_manager *dm,
+ struct amdgpu_dm_connector *aconnector, uint8_t *phy_id)
+{
+ int idx, idx_2;
+ bool found = false;
+
+ /*
+ * Assume secure display start after all connectors are probed. The connection
+ * config is static as well
+ */
+ if (!dm->secure_display_ctx.phy_mapping_updated) {
+ DRM_WARN("%s Should update the phy id table before get it's value", __func__);
+ return false;
+ }
+
+ for (idx = 0; idx < dm->secure_display_ctx.phy_id_mapping_cnt; idx++) {
+ if (!dm->secure_display_ctx.phy_id_mapping[idx].assigned) {
+ DRM_ERROR("phy_id_mapping[%d] should be assigned", idx);
+ return false;
+ }
+
+ if (aconnector->dc_link->link_enc_hw_inst ==
+ dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst) {
+ if (!dm->secure_display_ctx.phy_id_mapping[idx].is_mst) {
+ found = true;
+ goto out;
+ } else {
+ /* Could caused by wrongly pass mst root connector */
+ if (!aconnector->mst_output_port) {
+ DRM_ERROR("%s Check mst case but connector without a port assigned", __func__);
+ return false;
+ }
+
+ if (aconnector->mst_root &&
+ aconnector->mst_root->mst_mgr.mst_primary == NULL) {
+ DRM_WARN("%s pass in a stale mst connector", __func__);
+ }
+
+ if (aconnector->mst_output_port->parent->lct == dm->secure_display_ctx.phy_id_mapping[idx].lct &&
+ aconnector->mst_output_port->port_num == dm->secure_display_ctx.phy_id_mapping[idx].port_num) {
+ if (aconnector->mst_output_port->parent->lct == 1) {
+ found = true;
+ goto out;
+ } else if (aconnector->mst_output_port->parent->lct > 1) {
+ /* Check RAD */
+ for (idx_2 = 0; idx_2 < aconnector->mst_output_port->parent->lct - 1; idx_2++) {
+ int shift = (idx_2 % 2) ? 0 : 4;
+ int port_num = (aconnector->mst_output_port->parent->rad[idx_2 / 2] >> shift) & 0xf;
+ int port_num2 = (dm->secure_display_ctx.phy_id_mapping[idx].rad[idx_2 / 2] >> shift) & 0xf;
+
+ if (port_num != port_num2)
+ break;
+ }
+
+ if (idx_2 == aconnector->mst_output_port->parent->lct - 1) {
+ found = true;
+ goto out;
+ }
+ } else {
+ DRM_ERROR("lCT should be >= 1");
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+out:
+ if (found) {
+ DRM_DEBUG_DRIVER("Associated secure display PHY ID as %d", idx);
+ *phy_id = idx;
+ } else {
+ DRM_WARN("Can't find associated phy ID");
+ return false;
+ }
+
+ return true;
+}
+
static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc, struct dc_stream_state *stream)
{
struct drm_device *drm_dev = crtc->dev;
struct amdgpu_display_manager *dm = &drm_to_adev(drm_dev)->dm;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
+ struct amdgpu_dm_connector *aconnector;
bool was_activated;
+ uint8_t phy_id;
+ unsigned long flags;
+ int i;
- spin_lock_irq(&drm_dev->event_lock);
- was_activated = acrtc->dm_irq_params.window_param.activated;
- acrtc->dm_irq_params.window_param.x_start = 0;
- acrtc->dm_irq_params.window_param.y_start = 0;
- acrtc->dm_irq_params.window_param.x_end = 0;
- acrtc->dm_irq_params.window_param.y_end = 0;
- acrtc->dm_irq_params.window_param.activated = false;
- acrtc->dm_irq_params.window_param.update_win = false;
- acrtc->dm_irq_params.window_param.skip_frame_cnt = 0;
- spin_unlock_irq(&drm_dev->event_lock);
+ spin_lock_irqsave(&drm_dev->event_lock, flags);
+ was_activated = acrtc->dm_irq_params.crc_window_activated;
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ acrtc->dm_irq_params.window_param[i].x_start = 0;
+ acrtc->dm_irq_params.window_param[i].y_start = 0;
+ acrtc->dm_irq_params.window_param[i].x_end = 0;
+ acrtc->dm_irq_params.window_param[i].y_end = 0;
+ acrtc->dm_irq_params.window_param[i].enable = false;
+ acrtc->dm_irq_params.window_param[i].update_win = false;
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 0;
+ }
+ acrtc->dm_irq_params.crc_window_activated = false;
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags);
/* Disable secure_display if it was enabled */
- if (was_activated) {
+ if (was_activated && dm->secure_display_ctx.op_mode == LEGACY_MODE) {
/* stop ROI update on this crtc */
- flush_work(&dm->secure_display_ctxs[crtc->index].notify_ta_work);
- flush_work(&dm->secure_display_ctxs[crtc->index].forward_roi_work);
- dc_stream_forward_crc_window(stream, NULL, true);
+ flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].notify_ta_work);
+ flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].forward_roi_work);
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+ if (aconnector && get_phy_id(dm, aconnector, &phy_id)) {
+ if (dm->secure_display_ctx.support_mul_roi)
+ dc_stream_forward_multiple_crc_window(stream, NULL, phy_id, true);
+ else
+ dc_stream_forward_crc_window(stream, NULL, phy_id, true);
+ } else {
+ DRM_DEBUG_DRIVER("%s Can't find matching phy id", __func__);
+ }
}
}
static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
{
- struct secure_display_context *secure_display_ctx;
+ struct secure_display_crtc_context *crtc_ctx;
struct psp_context *psp;
struct ta_securedisplay_cmd *securedisplay_cmd;
struct drm_crtc *crtc;
struct dc_stream_state *stream;
+ struct amdgpu_dm_connector *aconnector;
uint8_t phy_inst;
+ struct amdgpu_display_manager *dm;
+ struct crc_data crc_cpy[MAX_CRC_WINDOW_NUM];
+ unsigned long flags;
+ uint8_t roi_idx = 0;
int ret;
+ int i;
- secure_display_ctx = container_of(work, struct secure_display_context, notify_ta_work);
- crtc = secure_display_ctx->crtc;
+ crtc_ctx = container_of(work, struct secure_display_crtc_context, notify_ta_work);
+ crtc = crtc_ctx->crtc;
if (!crtc)
return;
@@ -133,21 +363,50 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
return;
}
+ dm = &drm_to_adev(crtc->dev)->dm;
stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream;
- phy_inst = stream->link->link_enc_hw_inst;
-
- /* need lock for multiple crtcs to use the command buffer */
- mutex_lock(&psp->securedisplay_context.mutex);
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+ if (!aconnector)
+ return;
- psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
- TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ mutex_lock(&crtc->dev->mode_config.mutex);
+ if (!get_phy_id(dm, aconnector, &phy_inst)) {
+ DRM_WARN("%s Can't find mapping phy id!", __func__);
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+ return;
+ }
+ mutex_unlock(&crtc->dev->mode_config.mutex);
- securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst;
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ memcpy(crc_cpy, crtc_ctx->crc_info.crc, sizeof(struct crc_data) * MAX_CRC_WINDOW_NUM);
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+ /* need lock for multiple crtcs to use the command buffer */
+ mutex_lock(&psp->securedisplay_context.mutex);
/* PSP TA is expected to finish data transmission over I2C within current frame,
* even there are up to 4 crtcs request to send in this frame.
*/
- ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ if (dm->secure_display_ctx.support_mul_roi) {
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2);
+
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.phy_id = phy_inst;
+
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ if (crc_cpy[i].crc_ready)
+ roi_idx |= 1 << i;
+ }
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc_v2.roi_idx = roi_idx;
+
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2);
+ } else {
+ psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+
+ securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_inst;
+
+ ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
+ }
if (!ret) {
if (securedisplay_cmd->status != TA_SECUREDISPLAY_STATUS__SUCCESS)
@@ -160,22 +419,47 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
static void
amdgpu_dm_forward_crc_window(struct work_struct *work)
{
- struct secure_display_context *secure_display_ctx;
+ struct secure_display_crtc_context *crtc_ctx;
struct amdgpu_display_manager *dm;
struct drm_crtc *crtc;
struct dc_stream_state *stream;
+ struct amdgpu_dm_connector *aconnector;
+ struct crc_window roi_cpy[MAX_CRC_WINDOW_NUM];
+ unsigned long flags;
+ uint8_t phy_id;
- secure_display_ctx = container_of(work, struct secure_display_context, forward_roi_work);
- crtc = secure_display_ctx->crtc;
+ crtc_ctx = container_of(work, struct secure_display_crtc_context, forward_roi_work);
+ crtc = crtc_ctx->crtc;
if (!crtc)
return;
dm = &drm_to_adev(crtc->dev)->dm;
stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream;
+ aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
+
+ if (!aconnector)
+ return;
+
+ mutex_lock(&crtc->dev->mode_config.mutex);
+ if (!get_phy_id(dm, aconnector, &phy_id)) {
+ DRM_WARN("%s Can't find mapping phy id!", __func__);
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+ return;
+ }
+ mutex_unlock(&crtc->dev->mode_config.mutex);
+
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ memcpy(roi_cpy, crtc_ctx->roi, sizeof(struct crc_window) * MAX_CRC_WINDOW_NUM);
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
mutex_lock(&dm->dc_lock);
- dc_stream_forward_crc_window(stream, &secure_display_ctx->rect, false);
+ if (dm->secure_display_ctx.support_mul_roi)
+ dc_stream_forward_multiple_crc_window(stream, roi_cpy,
+ phy_id, false);
+ else
+ dc_stream_forward_crc_window(stream, &roi_cpy[0].rect,
+ phy_id, false);
mutex_unlock(&dm->dc_lock);
}
@@ -186,7 +470,7 @@ bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc)
bool ret = false;
spin_lock_irq(&drm_dev->event_lock);
- ret = acrtc->dm_irq_params.window_param.activated;
+ ret = acrtc->dm_irq_params.crc_window_activated;
spin_unlock_irq(&drm_dev->event_lock);
return ret;
@@ -224,10 +508,14 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
mutex_lock(&adev->dm.dc_lock);
+ /* For PSR1, check that the panel has exited PSR */
+ if (stream_state->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1)
+ amdgpu_dm_psr_wait_disable(stream_state);
+
/* Enable or disable CRTC CRC generation */
if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) {
if (!dc_stream_configure_crc(stream_state->ctx->dc,
- stream_state, NULL, enable, enable)) {
+ stream_state, NULL, enable, enable, 0, true)) {
ret = -EINVAL;
goto unlock;
}
@@ -258,6 +546,10 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
struct drm_crtc_commit *commit;
struct dm_crtc_state *crtc_state;
struct drm_device *drm_dev = crtc->dev;
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ struct amdgpu_display_manager *dm = &adev->dm;
+#endif
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct drm_dp_aux *aux = NULL;
bool enable = false;
@@ -357,6 +649,17 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
}
+ /*
+ * Reading the CRC requires the vblank interrupt handler to be
+ * enabled. Keep a reference until CRC capture stops.
+ */
+ enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src);
+ if (!enabled && enable) {
+ ret = drm_crtc_vblank_get(crtc);
+ if (ret)
+ goto cleanup;
+ }
+
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
/* Reset secure_display when we change crc source from debugfs */
amdgpu_dm_set_crc_window_default(crtc, crtc_state->stream);
@@ -367,16 +670,7 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
goto cleanup;
}
- /*
- * Reading the CRC requires the vblank interrupt handler to be
- * enabled. Keep a reference until CRC capture stops.
- */
- enabled = amdgpu_dm_is_valid_crc_source(cur_crc_src);
if (!enabled && enable) {
- ret = drm_crtc_vblank_get(crtc);
- if (ret)
- goto cleanup;
-
if (dm_is_crc_source_dprx(source)) {
if (drm_dp_start_crc(aux, crtc)) {
DRM_DEBUG_DRIVER("dp start crc failed\n");
@@ -402,6 +696,13 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
/* Reset crc_skipped on dm state */
crtc_state->crc_skip_count = 0;
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+ /* Initialize phy id mapping table for secure display*/
+ if (dm->secure_display_ctx.op_mode == LEGACY_MODE &&
+ !dm->secure_display_ctx.phy_mapping_updated)
+ update_phy_id_mapping(adev);
+#endif
+
cleanup:
if (commit)
drm_crtc_commit_put(commit);
@@ -456,7 +757,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc)
}
if (dm_is_crc_source_crtc(cur_crc_src)) {
- if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state,
+ if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, 0,
&crcs[0], &crcs[1], &crcs[2]))
return;
@@ -472,8 +773,17 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc)
enum amdgpu_dm_pipe_crc_source cur_crc_src;
struct amdgpu_crtc *acrtc = NULL;
struct amdgpu_device *adev = NULL;
- struct secure_display_context *secure_display_ctx = NULL;
+ struct secure_display_crtc_context *crtc_ctx = NULL;
+ bool reset_crc_frame_count[MAX_CRC_WINDOW_NUM] = {false};
+ uint32_t crc_r[MAX_CRC_WINDOW_NUM] = {0};
+ uint32_t crc_g[MAX_CRC_WINDOW_NUM] = {0};
+ uint32_t crc_b[MAX_CRC_WINDOW_NUM] = {0};
unsigned long flags1;
+ bool forward_roi_change = false;
+ bool notify_ta = false;
+ bool all_crc_ready = true;
+ struct dc_stream_state *stream_state;
+ int i;
if (crtc == NULL)
return;
@@ -481,78 +791,160 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc)
acrtc = to_amdgpu_crtc(crtc);
adev = drm_to_adev(crtc->dev);
drm_dev = crtc->dev;
+ stream_state = to_dm_crtc_state(crtc->state)->stream;
spin_lock_irqsave(&drm_dev->event_lock, flags1);
cur_crc_src = acrtc->dm_irq_params.crc_src;
/* Early return if CRC capture is not enabled. */
if (!amdgpu_dm_is_valid_crc_source(cur_crc_src) ||
- !dm_is_crc_source_crtc(cur_crc_src))
- goto cleanup;
-
- if (!acrtc->dm_irq_params.window_param.activated)
- goto cleanup;
+ !dm_is_crc_source_crtc(cur_crc_src)) {
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+ return;
+ }
- if (acrtc->dm_irq_params.window_param.skip_frame_cnt) {
- acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1;
- goto cleanup;
+ if (!acrtc->dm_irq_params.crc_window_activated) {
+ spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+ return;
}
- secure_display_ctx = &adev->dm.secure_display_ctxs[acrtc->crtc_id];
- if (WARN_ON(secure_display_ctx->crtc != crtc)) {
- /* We have set the crtc when creating secure_display_context,
+ crtc_ctx = &adev->dm.secure_display_ctx.crtc_ctx[acrtc->crtc_id];
+ if (WARN_ON(crtc_ctx->crtc != crtc)) {
+ /* We have set the crtc when creating secure_display_crtc_context,
* don't expect it to be changed here.
*/
- secure_display_ctx->crtc = crtc;
+ crtc_ctx->crtc = crtc;
}
- if (acrtc->dm_irq_params.window_param.update_win) {
- /* prepare work for dmub to update ROI */
- secure_display_ctx->rect.x = acrtc->dm_irq_params.window_param.x_start;
- secure_display_ctx->rect.y = acrtc->dm_irq_params.window_param.y_start;
- secure_display_ctx->rect.width = acrtc->dm_irq_params.window_param.x_end -
- acrtc->dm_irq_params.window_param.x_start;
- secure_display_ctx->rect.height = acrtc->dm_irq_params.window_param.y_end -
- acrtc->dm_irq_params.window_param.y_start;
- schedule_work(&secure_display_ctx->forward_roi_work);
-
- acrtc->dm_irq_params.window_param.update_win = false;
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ struct crc_params crc_window = {
+ .windowa_x_start = acrtc->dm_irq_params.window_param[i].x_start,
+ .windowa_y_start = acrtc->dm_irq_params.window_param[i].y_start,
+ .windowa_x_end = acrtc->dm_irq_params.window_param[i].x_end,
+ .windowa_y_end = acrtc->dm_irq_params.window_param[i].y_end,
+ .windowb_x_start = acrtc->dm_irq_params.window_param[i].x_start,
+ .windowb_y_start = acrtc->dm_irq_params.window_param[i].y_start,
+ .windowb_x_end = acrtc->dm_irq_params.window_param[i].x_end,
+ .windowb_y_end = acrtc->dm_irq_params.window_param[i].y_end,
+ };
+
+ crtc_ctx->roi[i].enable = acrtc->dm_irq_params.window_param[i].enable;
+
+ if (!acrtc->dm_irq_params.window_param[i].enable) {
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ continue;
+ }
- /* Statically skip 1 frame, because we may need to wait below things
- * before sending ROI to dmub:
- * 1. We defer the work by using system workqueue.
- * 2. We may need to wait for dc_lock before accessing dmub.
- */
- acrtc->dm_irq_params.window_param.skip_frame_cnt = 1;
+ if (acrtc->dm_irq_params.window_param[i].skip_frame_cnt) {
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt -= 1;
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ continue;
+ }
- } else {
- /* prepare work for psp to read ROI/CRC and send to I2C */
- schedule_work(&secure_display_ctx->notify_ta_work);
+ if (acrtc->dm_irq_params.window_param[i].update_win) {
+ crtc_ctx->roi[i].rect.x = crc_window.windowa_x_start;
+ crtc_ctx->roi[i].rect.y = crc_window.windowa_y_start;
+ crtc_ctx->roi[i].rect.width = crc_window.windowa_x_end -
+ crc_window.windowa_x_start;
+ crtc_ctx->roi[i].rect.height = crc_window.windowa_y_end -
+ crc_window.windowa_y_start;
+
+ if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE)
+ /* forward task to dmub to update ROI */
+ forward_roi_change = true;
+ else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE)
+ /* update ROI via dm*/
+ dc_stream_configure_crc(stream_state->ctx->dc, stream_state,
+ &crc_window, true, true, i, false);
+
+ reset_crc_frame_count[i] = true;
+
+ acrtc->dm_irq_params.window_param[i].update_win = false;
+
+ /* Statically skip 1 frame, because we may need to wait below things
+ * before sending ROI to dmub:
+ * 1. We defer the work by using system workqueue.
+ * 2. We may need to wait for dc_lock before accessing dmub.
+ */
+ acrtc->dm_irq_params.window_param[i].skip_frame_cnt = 1;
+ crtc_ctx->crc_info.crc[i].crc_ready = false;
+ } else {
+ if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, i,
+ &crc_r[i], &crc_g[i], &crc_b[i]))
+ DRM_ERROR("Secure Display: fail to get crc from engine %d\n", i);
+
+ if (adev->dm.secure_display_ctx.op_mode == LEGACY_MODE)
+ /* forward task to psp to read ROI/CRC and output via I2C */
+ notify_ta = true;
+ else if (adev->dm.secure_display_ctx.op_mode == DISPLAY_CRC_MODE)
+ /* Avoid ROI window get changed, keep overwriting. */
+ dc_stream_configure_crc(stream_state->ctx->dc, stream_state,
+ &crc_window, true, true, i, false);
+
+ /* crc ready for psp to read out */
+ crtc_ctx->crc_info.crc[i].crc_ready = true;
+ }
}
-cleanup:
spin_unlock_irqrestore(&drm_dev->event_lock, flags1);
+
+ if (forward_roi_change)
+ schedule_work(&crtc_ctx->forward_roi_work);
+
+ if (notify_ta)
+ schedule_work(&crtc_ctx->notify_ta_work);
+
+ spin_lock_irqsave(&crtc_ctx->crc_info.lock, flags1);
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ crtc_ctx->crc_info.crc[i].crc_R = crc_r[i];
+ crtc_ctx->crc_info.crc[i].crc_G = crc_g[i];
+ crtc_ctx->crc_info.crc[i].crc_B = crc_b[i];
+
+ if (!crtc_ctx->roi[i].enable) {
+ crtc_ctx->crc_info.crc[i].frame_count = 0;
+ continue;
+ }
+
+ if (!crtc_ctx->crc_info.crc[i].crc_ready)
+ all_crc_ready = false;
+
+ if (reset_crc_frame_count[i] || crtc_ctx->crc_info.crc[i].frame_count == UINT_MAX)
+ /* Reset the reference frame count after user update the ROI
+ * or it reaches the maximum value.
+ */
+ crtc_ctx->crc_info.crc[i].frame_count = 0;
+ else
+ crtc_ctx->crc_info.crc[i].frame_count += 1;
+ }
+ spin_unlock_irqrestore(&crtc_ctx->crc_info.lock, flags1);
+
+ if (all_crc_ready)
+ complete_all(&crtc_ctx->crc_info.completion);
}
-struct secure_display_context *
-amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev)
+void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev)
{
- struct secure_display_context *secure_display_ctxs = NULL;
+ struct secure_display_crtc_context *crtc_ctx = NULL;
int i;
- secure_display_ctxs = kcalloc(adev->mode_info.num_crtc,
- sizeof(struct secure_display_context),
+ crtc_ctx = kcalloc(adev->mode_info.num_crtc,
+ sizeof(struct secure_display_crtc_context),
GFP_KERNEL);
- if (!secure_display_ctxs)
- return NULL;
+ if (!crtc_ctx) {
+ adev->dm.secure_display_ctx.crtc_ctx = NULL;
+ return;
+ }
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- INIT_WORK(&secure_display_ctxs[i].forward_roi_work, amdgpu_dm_forward_crc_window);
- INIT_WORK(&secure_display_ctxs[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read);
- secure_display_ctxs[i].crtc = &adev->mode_info.crtcs[i]->base;
+ INIT_WORK(&crtc_ctx[i].forward_roi_work, amdgpu_dm_forward_crc_window);
+ INIT_WORK(&crtc_ctx[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read);
+ crtc_ctx[i].crtc = &adev->mode_info.crtcs[i]->base;
+ spin_lock_init(&crtc_ctx[i].crc_info.lock);
}
- return secure_display_ctxs;
+ adev->dm.secure_display_ctx.crtc_ctx = crtc_ctx;
+
+ adev->dm.secure_display_ctx.op_mode = DISPLAY_CRC_MODE;
}
#endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
index 748e80ef40d0..3da056c8d20b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
@@ -40,20 +40,53 @@ enum amdgpu_dm_pipe_crc_source {
};
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+#define MAX_CRTC 6
+
+enum secure_display_mode {
+ /* via dmub + psp */
+ LEGACY_MODE = 0,
+ /* driver directly */
+ DISPLAY_CRC_MODE,
+ SECURE_DISPLAY_MODE_MAX,
+};
+
+struct phy_id_mapping {
+ bool assigned;
+ bool is_mst;
+ uint8_t enc_hw_inst;
+ u8 lct;
+ u8 port_num;
+ u8 rad[8];
+};
+
+struct crc_data {
+ uint32_t crc_R;
+ uint32_t crc_G;
+ uint32_t crc_B;
+ uint32_t frame_count;
+ bool crc_ready;
+};
+
+struct crc_info {
+ struct crc_data crc[MAX_CRC_WINDOW_NUM];
+ struct completion completion;
+ spinlock_t lock;
+};
+
struct crc_window_param {
uint16_t x_start;
uint16_t y_start;
uint16_t x_end;
uint16_t y_end;
/* CRC window is activated or not*/
- bool activated;
+ bool enable;
/* Update crc window during vertical blank or not */
bool update_win;
/* skip reading/writing for few frames */
int skip_frame_cnt;
};
-struct secure_display_context {
+struct secure_display_crtc_context {
/* work to notify PSP TA*/
struct work_struct notify_ta_work;
@@ -63,7 +96,20 @@ struct secure_display_context {
struct drm_crtc *crtc;
/* Region of Interest (ROI) */
- struct rect rect;
+ struct crc_window roi[MAX_CRC_WINDOW_NUM];
+
+ struct crc_info crc_info;
+};
+
+struct secure_display_context {
+
+ struct secure_display_crtc_context *crtc_ctx;
+ /* Whether dmub support multiple ROI setting */
+ bool support_mul_roi;
+ enum secure_display_mode op_mode;
+ bool phy_mapping_updated;
+ int phy_id_mapping_cnt;
+ struct phy_id_mapping phy_id_mapping[MAX_CRTC];
};
#endif
@@ -95,8 +141,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc);
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc);
void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc);
-struct secure_display_context *amdgpu_dm_crtc_secure_display_create_contexts(
- struct amdgpu_device *adev);
+void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev);
#else
#define amdgpu_dm_crc_window_is_activated(x)
#define amdgpu_dm_crtc_handle_crc_window_irq(x)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 64a041c2af05..36a830a7440f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -93,7 +93,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
return rc;
}
-bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state)
+bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state)
{
return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE ||
dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED;
@@ -142,7 +142,7 @@ static void amdgpu_dm_crtc_set_panel_sr_feature(
amdgpu_dm_replay_enable(vblank_work->stream, true);
} else if (vblank_enabled) {
if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active)
- amdgpu_dm_psr_disable(vblank_work->stream);
+ amdgpu_dm_psr_disable(vblank_work->stream, false);
} else if (link->psr_settings.psr_feature_enabled &&
allow_sr_entry && !is_sr_active && !is_crc_window_active) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
index 17e948753f59..c1212947a77b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h
@@ -37,7 +37,7 @@ int amdgpu_dm_crtc_set_vupdate_irq(struct drm_crtc *crtc, bool enable);
bool amdgpu_dm_crtc_vrr_active_irq(struct amdgpu_crtc *acrtc);
-bool amdgpu_dm_crtc_vrr_active(struct dm_crtc_state *dm_state);
+bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state);
int amdgpu_dm_crtc_enable_vblank(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 6a97bb2d9160..049046c60462 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -25,6 +25,7 @@
#include <linux/string_helpers.h>
#include <linux/uaccess.h>
+#include <media/cec-notifier.h>
#include "dc.h"
#include "amdgpu.h"
@@ -258,7 +259,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
struct dc_link *link = connector->dc_link;
struct amdgpu_device *adev = drm_to_adev(connector->base.dev);
struct dc *dc = (struct dc *)link->dc;
- struct dc_link_settings prefer_link_settings;
+ struct dc_link_settings prefer_link_settings = {0};
char *wr_buf = NULL;
const uint32_t wr_buf_size = 40;
/* 0: lane_count; 1: link_rate */
@@ -389,7 +390,7 @@ static ssize_t dp_mst_link_setting(struct file *f, const char __user *buf,
struct dc_link *link = aconnector->dc_link;
struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev);
struct dc *dc = (struct dc *)link->dc;
- struct dc_link_settings prefer_link_settings;
+ struct dc_link_settings prefer_link_settings = {0};
char *wr_buf = NULL;
const uint32_t wr_buf_size = 40;
/* 0: lane_count; 1: link_rate */
@@ -613,7 +614,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf,
uint32_t wr_buf_size = 40;
long param[3];
bool use_prefer_link_setting;
- struct link_training_settings link_lane_settings;
+ struct link_training_settings link_lane_settings = {0};
int max_param_num = 3;
uint8_t param_nums = 0;
int r = 0;
@@ -768,7 +769,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED};
struct dc_link_settings cur_link_settings = {LANE_COUNT_UNKNOWN,
LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED};
- struct link_training_settings link_training_settings;
+ struct link_training_settings link_training_settings = {0};
int i;
if (size == 0)
@@ -902,9 +903,10 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data)
{
struct amdgpu_device *adev = m->private;
struct dmub_srv_fb_info *fb_info = adev->dm.dmub_fb_info;
+ struct dmub_fw_meta_info *fw_meta_info = NULL;
struct dmub_debugfs_trace_entry *entries;
uint8_t *tbuf_base;
- uint32_t tbuf_size, max_entries, num_entries, i;
+ uint32_t tbuf_size, max_entries, num_entries, first_entry, i;
if (!fb_info)
return 0;
@@ -913,20 +915,42 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data)
if (!tbuf_base)
return 0;
- tbuf_size = fb_info->fb[DMUB_WINDOW_5_TRACEBUFF].size;
+ if (adev->dm.dmub_srv)
+ fw_meta_info = &adev->dm.dmub_srv->meta_info;
+
+ tbuf_size = fw_meta_info ? fw_meta_info->trace_buffer_size :
+ DMUB_TRACE_BUFFER_SIZE;
max_entries = (tbuf_size - sizeof(struct dmub_debugfs_trace_header)) /
sizeof(struct dmub_debugfs_trace_entry);
num_entries =
((struct dmub_debugfs_trace_header *)tbuf_base)->entry_count;
+ /* DMCUB tracebuffer is a ring. If it rolled over, print a hint that
+ * entries are being overwritten.
+ */
+ if (num_entries > max_entries)
+ seq_printf(m, "...\n");
+
+ first_entry = num_entries % max_entries;
num_entries = min(num_entries, max_entries);
entries = (struct dmub_debugfs_trace_entry
*)(tbuf_base +
sizeof(struct dmub_debugfs_trace_header));
- for (i = 0; i < num_entries; ++i) {
+ /* To print entries chronologically, start from the first entry till the
+ * top of buffer, then from base of buffer to first entry.
+ */
+ for (i = first_entry; i < num_entries; ++i) {
+ struct dmub_debugfs_trace_entry *entry = &entries[i];
+
+ seq_printf(m,
+ "trace_code=%u tick_count=%u param0=%u param1=%u\n",
+ entry->trace_code, entry->tick_count, entry->param0,
+ entry->param1);
+ }
+ for (i = 0; i < first_entry; ++i) {
struct dmub_debugfs_trace_entry *entry = &entries[i];
seq_printf(m,
@@ -2825,6 +2849,67 @@ static int is_dpia_link_show(struct seq_file *m, void *data)
return 0;
}
+/**
+ * hdmi_cec_state_show - Read out the HDMI-CEC feature status
+ * @m: sequence file.
+ * @data: unused.
+ *
+ * Return 0 on success
+ */
+static int hdmi_cec_state_show(struct seq_file *m, void *data)
+{
+ struct drm_connector *connector = m->private;
+ struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
+ seq_printf(m, "%s:%d\n", connector->name, connector->base.id);
+ seq_printf(m, "HDMI-CEC status: %d\n", aconnector->notifier ? 1 : 0);
+
+ return 0;
+}
+
+/**
+ * hdmi_cec_state_write - Enable/Disable HDMI-CEC feature from driver side
+ * @f: file structure.
+ * @buf: userspace buffer. set to '1' to enable; '0' to disable cec feature.
+ * @size: size of buffer from userpsace.
+ * @pos: unused.
+ *
+ * Return size on success, error code on failure
+ */
+static ssize_t hdmi_cec_state_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ int ret;
+ bool enable;
+ struct amdgpu_dm_connector *aconnector = file_inode(f)->i_private;
+ struct drm_device *ddev = aconnector->base.dev;
+
+ if (size == 0)
+ return -EINVAL;
+
+ ret = kstrtobool_from_user(buf, size, &enable);
+ if (ret) {
+ drm_dbg_driver(ddev, "invalid user data !\n");
+ return ret;
+ }
+
+ if (enable) {
+ if (aconnector->notifier)
+ return -EINVAL;
+ ret = amdgpu_dm_initialize_hdmi_connector(aconnector);
+ if (ret)
+ return ret;
+ hdmi_cec_set_edid(aconnector);
+ } else {
+ if (!aconnector->notifier)
+ return -EINVAL;
+ cec_notifier_conn_unregister(aconnector->notifier);
+ aconnector->notifier = NULL;
+ }
+
+ return size;
+}
+
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
@@ -2837,6 +2922,7 @@ DEFINE_SHOW_ATTRIBUTE(psr_capability);
DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
DEFINE_SHOW_ATTRIBUTE(is_dpia_link);
+DEFINE_SHOW_STORE_ATTRIBUTE(hdmi_cec_state);
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@@ -2972,7 +3058,8 @@ static const struct {
char *name;
const struct file_operations *fops;
} hdmi_debugfs_entries[] = {
- {"hdcp_sink_capability", &hdcp_sink_capability_fops}
+ {"hdcp_sink_capability", &hdcp_sink_capability_fops},
+ {"hdmi_cec_state", &hdmi_cec_state_fops}
};
/*
@@ -3457,8 +3544,8 @@ static int crc_win_x_start_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.window_param.x_start = (uint16_t) val;
- acrtc->dm_irq_params.window_param.update_win = false;
+ acrtc->dm_irq_params.window_param[0].x_start = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3474,7 +3561,7 @@ static int crc_win_x_start_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.window_param.x_start;
+ *val = acrtc->dm_irq_params.window_param[0].x_start;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3494,8 +3581,8 @@ static int crc_win_y_start_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.window_param.y_start = (uint16_t) val;
- acrtc->dm_irq_params.window_param.update_win = false;
+ acrtc->dm_irq_params.window_param[0].y_start = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3511,7 +3598,7 @@ static int crc_win_y_start_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.window_param.y_start;
+ *val = acrtc->dm_irq_params.window_param[0].y_start;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3530,8 +3617,8 @@ static int crc_win_x_end_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.window_param.x_end = (uint16_t) val;
- acrtc->dm_irq_params.window_param.update_win = false;
+ acrtc->dm_irq_params.window_param[0].x_end = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3547,7 +3634,7 @@ static int crc_win_x_end_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.window_param.x_end;
+ *val = acrtc->dm_irq_params.window_param[0].x_end;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3566,8 +3653,8 @@ static int crc_win_y_end_set(void *data, u64 val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- acrtc->dm_irq_params.window_param.y_end = (uint16_t) val;
- acrtc->dm_irq_params.window_param.update_win = false;
+ acrtc->dm_irq_params.window_param[0].y_end = (uint16_t) val;
+ acrtc->dm_irq_params.window_param[0].update_win = false;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3583,7 +3670,7 @@ static int crc_win_y_end_get(void *data, u64 *val)
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
spin_lock_irq(&drm_dev->event_lock);
- *val = acrtc->dm_irq_params.window_param.y_end;
+ *val = acrtc->dm_irq_params.window_param[0].y_end;
spin_unlock_irq(&drm_dev->event_lock);
return 0;
@@ -3606,13 +3693,14 @@ static int crc_win_update_set(void *data, u64 val)
/* PSR may write to OTG CRC window control register,
* so close it before starting secure_display.
*/
- amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream);
+ amdgpu_dm_psr_disable(acrtc->dm_irq_params.stream, true);
spin_lock_irq(&adev_to_drm(adev)->event_lock);
- acrtc->dm_irq_params.window_param.activated = true;
- acrtc->dm_irq_params.window_param.update_win = true;
- acrtc->dm_irq_params.window_param.skip_frame_cnt = 0;
+ acrtc->dm_irq_params.window_param[0].enable = true;
+ acrtc->dm_irq_params.window_param[0].update_win = true;
+ acrtc->dm_irq_params.window_param[0].skip_frame_cnt = 0;
+ acrtc->dm_irq_params.crc_window_activated = true;
spin_unlock_irq(&adev_to_drm(adev)->event_lock);
mutex_unlock(&adev->dm.dc_lock);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index b0fea0856866..fbd80d8545a8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -885,6 +885,12 @@ bool dm_helpers_dp_write_dsc_enable(
return ret;
}
+bool dm_helpers_dp_write_hblank_reduction(struct dc_context *ctx, const struct dc_stream_state *stream)
+{
+ // TODO
+ return false;
+}
+
bool dm_helpers_is_dp_sink_present(struct dc_link *link)
{
bool dp_sink_present;
@@ -907,14 +913,14 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len)
struct drm_connector *connector = data;
struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev);
unsigned char start = block * EDID_LENGTH;
- void *edid;
+ struct edid *edid;
int r;
if (!acpidev)
return -ENODEV;
/* fetch the entire edid from BIOS */
- r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, &edid);
+ r = acpi_video_get_edid(acpidev, ACPI_VIDEO_DISPLAY_LCD, -1, (void *)&edid);
if (r < 0) {
drm_dbg(connector->dev, "Failed to get EDID from ACPI: %d\n", r);
return r;
@@ -924,7 +930,14 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len)
goto cleanup;
}
- memcpy(buf, edid + start, len);
+ /* sanity check */
+ if (edid->revision < 4 || !(edid->input & DRM_EDID_INPUT_DIGITAL) ||
+ (edid->input & DRM_EDID_DIGITAL_TYPE_MASK) == DRM_EDID_DIGITAL_TYPE_UNDEF) {
+ r = -EINVAL;
+ goto cleanup;
+ }
+
+ memcpy(buf, (void *)edid + start, len);
r = 0;
cleanup:
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
index 6a7ecc1e4602..6c9de834455b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
@@ -39,7 +39,9 @@ struct dm_irq_params {
#ifdef CONFIG_DEBUG_FS
enum amdgpu_dm_pipe_crc_source crc_src;
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
- struct crc_window_param window_param;
+ struct crc_window_param window_param[MAX_CRC_WINDOW_NUM];
+ /* At least one CRC window is activated or not*/
+ bool crc_window_activated;
#endif
#endif
};
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 6e4359490613..07e744da7bf4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -155,6 +155,17 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector)
return 0;
}
+
+static inline void
+amdgpu_dm_mst_reset_mst_connector_setting(struct amdgpu_dm_connector *aconnector)
+{
+ aconnector->drm_edid = NULL;
+ aconnector->dsc_aux = NULL;
+ aconnector->mst_output_port->passthrough_aux = NULL;
+ aconnector->mst_local_bw = 0;
+ aconnector->vc_full_pbn = 0;
+}
+
static void
amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
{
@@ -182,9 +193,7 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
dc_sink_release(dc_sink);
aconnector->dc_sink = NULL;
- aconnector->drm_edid = NULL;
- aconnector->dsc_aux = NULL;
- port->passthrough_aux = NULL;
+ amdgpu_dm_mst_reset_mst_connector_setting(aconnector);
}
aconnector->mst_status = MST_STATUS_DEFAULT;
@@ -504,9 +513,7 @@ dm_dp_mst_detect(struct drm_connector *connector,
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
- aconnector->drm_edid = NULL;
- aconnector->dsc_aux = NULL;
- port->passthrough_aux = NULL;
+ amdgpu_dm_mst_reset_mst_connector_setting(aconnector);
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD,
@@ -590,11 +597,12 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_PROBE, true);
- if (drm_connector_init(
+ if (drm_connector_dynamic_init(
dev,
connector,
&dm_dp_mst_connector_funcs,
- DRM_MODE_CONNECTOR_DisplayPort)) {
+ DRM_MODE_CONNECTOR_DisplayPort,
+ NULL)) {
kfree(aconnector);
return NULL;
}
@@ -1688,16 +1696,16 @@ clean_exit:
return ret;
}
-static unsigned int kbps_from_pbn(unsigned int pbn)
+static uint32_t kbps_from_pbn(unsigned int pbn)
{
- unsigned int kbps = pbn;
+ uint64_t kbps = (uint64_t)pbn;
kbps *= (1000000 / PEAK_FACTOR_X1000);
kbps *= 8;
kbps *= 54;
kbps /= 64;
- return kbps;
+ return (uint32_t)kbps;
}
static bool is_dsc_common_config_possible(struct dc_stream_state *stream,
@@ -1819,9 +1827,18 @@ enum dc_status dm_dp_mst_is_port_support_mode(
struct drm_dp_mst_port *immediate_upstream_port = NULL;
uint32_t end_link_bw = 0;
- /*Get last DP link BW capability*/
- if (dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw)) {
- if (stream_kbps > end_link_bw) {
+ /*Get last DP link BW capability. Mode shall be supported by Legacy peer*/
+ if (aconnector->mst_output_port->pdt != DP_PEER_DEVICE_DP_LEGACY_CONV &&
+ aconnector->mst_output_port->pdt != DP_PEER_DEVICE_NONE) {
+ if (aconnector->vc_full_pbn != aconnector->mst_output_port->full_pbn) {
+ dp_get_link_current_set_bw(&aconnector->mst_output_port->aux, &end_link_bw);
+ aconnector->vc_full_pbn = aconnector->mst_output_port->full_pbn;
+ aconnector->mst_local_bw = end_link_bw;
+ } else {
+ end_link_bw = aconnector->mst_local_bw;
+ }
+
+ if (end_link_bw > 0 && stream_kbps > end_link_bw) {
DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link."
"Mode required bw can't fit into last link\n");
return DC_FAIL_BANDWIDTH_VALIDATE;
@@ -1835,11 +1852,15 @@ enum dc_status dm_dp_mst_is_port_support_mode(
if (immediate_upstream_port) {
virtual_channel_bw_in_kbps = kbps_from_pbn(immediate_upstream_port->full_pbn);
virtual_channel_bw_in_kbps = min(root_link_bw_in_kbps, virtual_channel_bw_in_kbps);
- if (bw_range.min_kbps > virtual_channel_bw_in_kbps) {
- DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link."
- "Max dsc compression can't fit into MST available bw\n");
- return DC_FAIL_BANDWIDTH_VALIDATE;
- }
+ } else {
+ /* For topology LCT 1 case - only one mstb*/
+ virtual_channel_bw_in_kbps = root_link_bw_in_kbps;
+ }
+
+ if (bw_range.min_kbps > virtual_channel_bw_in_kbps) {
+ DRM_DEBUG_DRIVER("MST_DSC dsc decode at last link."
+ "Max dsc compression can't fit into MST available bw\n");
+ return DC_FAIL_BANDWIDTH_VALIDATE;
}
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index 495e3cd70426..774cc3f4f3fd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -26,6 +26,7 @@
#include <drm/drm_atomic_helper.h>
#include <drm/drm_blend.h>
+#include "drm/drm_framebuffer.h"
#include <drm/drm_gem_atomic_helper.h>
#include <drm/drm_plane_helper.h>
#include <drm/drm_gem_framebuffer_helper.h>
@@ -176,7 +177,7 @@ static unsigned int amdgpu_dm_plane_modifier_gfx9_swizzle_mode(uint64_t modifier
return AMD_FMT_MOD_GET(TILE, modifier);
}
-static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info,
+static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(struct dc_tiling_info *tiling_info,
uint64_t tiling_flags)
{
/* Fill GFX8 params */
@@ -189,6 +190,7 @@ static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_inf
tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+ tiling_info->gfxversion = DcGfxVersion8;
/* XXX fix me for VI */
tiling_info->gfx8.num_banks = num_banks;
tiling_info->gfx8.array_mode =
@@ -209,7 +211,7 @@ static void amdgpu_dm_plane_fill_gfx8_tiling_info_from_flags(union dc_tiling_inf
}
static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev,
- union dc_tiling_info *tiling_info)
+ struct dc_tiling_info *tiling_info)
{
/* Fill GFX9 params */
tiling_info->gfx9.num_pipes =
@@ -230,7 +232,7 @@ static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(const struct amdgp
}
static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
uint64_t modifier)
{
unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier);
@@ -260,7 +262,7 @@ static void amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(const struct amd
static int amdgpu_dm_plane_validate_dcc(struct amdgpu_device *adev,
const enum surface_pixel_format format,
const enum dc_rotation_angle rotation,
- const union dc_tiling_info *tiling_info,
+ const struct dc_tiling_info *tiling_info,
const struct dc_plane_dcc_param *dcc,
const struct dc_plane_address *address,
const struct plane_size *plane_size)
@@ -307,18 +309,18 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg
const enum surface_pixel_format format,
const enum dc_rotation_angle rotation,
const struct plane_size *plane_size,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct dc_plane_dcc_param *dcc,
- struct dc_plane_address *address,
- const bool force_disable_dcc)
+ struct dc_plane_address *address)
{
const uint64_t modifier = afb->base.modifier;
int ret = 0;
amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier);
tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier);
+ tiling_info->gfxversion = DcGfxVersion9;
- if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) {
+ if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
uint64_t dcc_address = afb->address + afb->base.offsets[1];
bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);
@@ -358,10 +360,9 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd
const enum surface_pixel_format format,
const enum dc_rotation_angle rotation,
const struct plane_size *plane_size,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct dc_plane_dcc_param *dcc,
- struct dc_plane_address *address,
- const bool force_disable_dcc)
+ struct dc_plane_address *address)
{
const uint64_t modifier = afb->base.modifier;
int ret = 0;
@@ -370,8 +371,9 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd
amdgpu_dm_plane_fill_gfx9_tiling_info_from_device(adev, tiling_info);
tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier);
+ tiling_info->gfxversion = DcGfxAddr3;
- if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) {
+ if (amdgpu_dm_plane_modifier_has_dcc(modifier)) {
int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);
dcc->enable = 1;
@@ -835,12 +837,11 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
const enum surface_pixel_format format,
const enum dc_rotation_angle rotation,
const uint64_t tiling_flags,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
struct dc_plane_dcc_param *dcc,
struct dc_plane_address *address,
- bool tmz_surface,
- bool force_disable_dcc)
+ bool tmz_surface)
{
const struct drm_framebuffer *fb = &afb->base;
int ret;
@@ -900,16 +901,14 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
ret = amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(adev, afb, format,
rotation, plane_size,
tiling_info, dcc,
- address,
- force_disable_dcc);
+ address);
if (ret)
return ret;
} else if (adev->family >= AMDGPU_FAMILY_AI) {
ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format,
rotation, plane_size,
tiling_info, dcc,
- address,
- force_disable_dcc);
+ address);
if (ret)
return ret;
} else {
@@ -1000,14 +999,13 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane,
dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) {
struct dc_plane_state *plane_state =
dm_plane_state_new->dc_state;
- bool force_disable_dcc = !plane_state->dcc.enable;
amdgpu_dm_plane_fill_plane_buffer_attributes(
adev, afb, plane_state->format, plane_state->rotation,
afb->tiling_flags,
&plane_state->tiling_info, &plane_state->plane_size,
&plane_state->dcc, &plane_state->address,
- afb->tmz_surface, force_disable_dcc);
+ afb->tmz_surface);
}
return 0;
@@ -1421,6 +1419,20 @@ static void amdgpu_dm_plane_atomic_async_update(struct drm_plane *plane,
amdgpu_dm_plane_handle_cursor_update(plane, old_state);
}
+static void amdgpu_dm_plane_panic_flush(struct drm_plane *plane)
+{
+ struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane->state);
+ struct drm_framebuffer *fb = plane->state->fb;
+ struct dc_plane_state *dc_plane_state;
+
+ if (!dm_plane_state || !dm_plane_state->dc_state)
+ return;
+
+ dc_plane_state = dm_plane_state->dc_state;
+
+ dc_plane_force_update_for_panic(dc_plane_state, fb->modifier ? true : false);
+}
+
static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
.prepare_fb = amdgpu_dm_plane_helper_prepare_fb,
.cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb,
@@ -1429,6 +1441,16 @@ static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
.atomic_async_update = amdgpu_dm_plane_atomic_async_update
};
+static const struct drm_plane_helper_funcs dm_primary_plane_helper_funcs = {
+ .prepare_fb = amdgpu_dm_plane_helper_prepare_fb,
+ .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb,
+ .atomic_check = amdgpu_dm_plane_atomic_check,
+ .atomic_async_check = amdgpu_dm_plane_atomic_async_check,
+ .atomic_async_update = amdgpu_dm_plane_atomic_async_update,
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = amdgpu_dm_plane_panic_flush,
+};
+
static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane)
{
struct dm_plane_state *amdgpu_state = NULL;
@@ -1855,7 +1877,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
plane->type != DRM_PLANE_TYPE_CURSOR)
drm_plane_enable_fb_damage_clips(plane);
- drm_plane_helper_add(plane, &dm_plane_helper_funcs);
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY)
+ drm_plane_helper_add(plane, &dm_primary_plane_helper_funcs);
+ else
+ drm_plane_helper_add(plane, &dm_plane_helper_funcs);
#ifdef AMD_PRIVATE_COLOR
dm_atomic_plane_attach_color_mgmt_properties(dm, plane);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
index 6498359bff6f..615d2ab2b803 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
@@ -47,12 +47,11 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev,
const enum surface_pixel_format format,
const enum dc_rotation_angle rotation,
const uint64_t tiling_flags,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
struct dc_plane_dcc_param *dcc,
struct dc_plane_address *address,
- bool tmz_surface,
- bool force_disable_dcc);
+ bool tmz_surface);
int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
struct drm_plane *plane,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index f40240aafe98..45858bf1523d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -201,14 +201,13 @@ void amdgpu_dm_psr_enable(struct dc_stream_state *stream)
*
* Return: true if success
*/
-bool amdgpu_dm_psr_disable(struct dc_stream_state *stream)
+bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait)
{
- unsigned int power_opt = 0;
bool psr_enable = false;
DRM_DEBUG_DRIVER("Disabling psr...\n");
- return dc_link_set_psr_allow_active(stream->link, &psr_enable, true, false, &power_opt);
+ return dc_link_set_psr_allow_active(stream->link, &psr_enable, wait, false, NULL);
}
/*
@@ -251,3 +250,33 @@ bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm)
return allow_active;
}
+
+/**
+ * amdgpu_dm_psr_wait_disable() - Wait for eDP panel to exit PSR
+ * @stream: stream state attached to the eDP link
+ *
+ * Waits for a max of 500ms for the eDP panel to exit PSR.
+ *
+ * Return: true if panel exited PSR, false otherwise.
+ */
+bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream)
+{
+ enum dc_psr_state psr_state = PSR_STATE0;
+ struct dc_link *link = stream->link;
+ int retry_count;
+
+ if (link == NULL)
+ return false;
+
+ for (retry_count = 0; retry_count <= 1000; retry_count++) {
+ dc_link_get_psr_state(link, &psr_state);
+ if (psr_state == PSR_STATE0)
+ break;
+ udelay(500);
+ }
+
+ if (retry_count == 1000)
+ return false;
+
+ return true;
+}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
index cd2d45c2b5ef..e2366321a3c1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
@@ -34,8 +34,9 @@
void amdgpu_dm_set_psr_caps(struct dc_link *link);
void amdgpu_dm_psr_enable(struct dc_stream_state *stream);
bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream);
-bool amdgpu_dm_psr_disable(struct dc_stream_state *stream);
+bool amdgpu_dm_psr_disable(struct dc_stream_state *stream, bool wait);
bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm);
bool amdgpu_dm_psr_is_active_allowed(struct amdgpu_display_manager *dm);
+bool amdgpu_dm_psr_wait_disable(struct dc_stream_state *stream);
#endif /* AMDGPU_DM_AMDGPU_DM_PSR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index c9a6de110b74..a62f6c51301c 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -3088,11 +3088,12 @@ static enum bp_result construct_integrated_info(
info->ext_disp_conn_info.path[i].ext_encoder_obj_id.id,
info->ext_disp_conn_info.path[i].caps
);
- if (info->ext_disp_conn_info.path[i].caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)
- DC_LOG_BIOS("BIOS EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
+ if ((info->ext_disp_conn_info.path[i].caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN)
+ DC_LOG_BIOS("BIOS AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
else if (bp->base.ctx->dc->config.force_bios_fixed_vs) {
- info->ext_disp_conn_info.path[i].caps |= EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN;
- DC_LOG_BIOS("driver forced EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
+ info->ext_disp_conn_info.path[i].caps &= ~AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+ info->ext_disp_conn_info.path[i].caps |= AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN;
+ DC_LOG_BIOS("driver forced AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN on path %d\n", i);
}
}
// Log the Checksum and Voltage Swing
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
index ab1132bc896a..d9955c5d2e5e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -174,7 +174,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32)
###############################################################################
# DCN35
###############################################################################
-CLK_MGR_DCN35 = dcn35_smu.o dcn35_clk_mgr.o
+CLK_MGR_DCN35 = dcn35_smu.o dcn351_clk_mgr.o dcn35_clk_mgr.o
AMD_DAL_CLK_MGR_DCN35 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn35/,$(CLK_MGR_DCN35))
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
index 0e243f4344d0..4c3e58c730b1 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -355,8 +355,11 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
BREAK_TO_DEBUGGER();
return NULL;
}
+ if (ctx->dce_version == DCN_VERSION_3_51)
+ dcn351_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ else
+ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
- dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
return &clk_mgr->base.base;
}
break;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
index 7920f6f1aa62..76c612ecfe3c 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c
@@ -34,8 +34,8 @@
#include "dm_services.h"
#include "cyan_skillfish_ip_offset.h"
-#include "dcn/dcn_2_0_3_offset.h"
-#include "dcn/dcn_2_0_3_sh_mask.h"
+#include "dcn/dcn_2_0_1_offset.h"
+#include "dcn/dcn_2_0_1_sh_mask.h"
#include "clk/clk_11_0_1_offset.h"
#include "clk/clk_11_0_1_sh_mask.h"
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
new file mode 100644
index 000000000000..6a6ae618650b
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "dcn35_clk_mgr.h"
+
+#define DCN_BASE__INST0_SEG1 0x000000C0
+#define mmCLK1_CLK_PLL_REQ 0x16E37
+
+#define mmCLK1_CLK0_DFS_CNTL 0x16E69
+#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK1_CLK3_DFS_CNTL 0x16E72
+#define mmCLK1_CLK4_DFS_CNTL 0x16E75
+#define mmCLK1_CLK5_DFS_CNTL 0x16E78
+
+#define mmCLK1_CLK0_CURRENT_CNT 0x16EFC
+#define mmCLK1_CLK1_CURRENT_CNT 0x16EFD
+#define mmCLK1_CLK2_CURRENT_CNT 0x16EFE
+#define mmCLK1_CLK3_CURRENT_CNT 0x16EFF
+#define mmCLK1_CLK4_CURRENT_CNT 0x16F00
+#define mmCLK1_CLK5_CURRENT_CNT 0x16F01
+
+#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A
+#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93
+#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C
+#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5
+#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE
+#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7
+
+#define mmCLK1_CLK0_DS_CNTL 0x16E83
+#define mmCLK1_CLK1_DS_CNTL 0x16E8C
+#define mmCLK1_CLK2_DS_CNTL 0x16E95
+#define mmCLK1_CLK3_DS_CNTL 0x16E9E
+#define mmCLK1_CLK4_DS_CNTL 0x16EA7
+#define mmCLK1_CLK5_DS_CNTL 0x16EB0
+
+#define mmCLK1_CLK0_ALLOW_DS 0x16E84
+#define mmCLK1_CLK1_ALLOW_DS 0x16E8D
+#define mmCLK1_CLK2_ALLOW_DS 0x16E96
+#define mmCLK1_CLK3_ALLOW_DS 0x16E9F
+#define mmCLK1_CLK4_ALLOW_DS 0x16EA8
+#define mmCLK1_CLK5_ALLOW_DS 0x16EB1
+
+#define mmCLK5_spll_field_8 0x1B04B
+#define mmDENTIST_DISPCLK_CNTL 0x0124
+#define regDENTIST_DISPCLK_CNTL 0x0064
+#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+
+// DENTIST_DISPCLK_CNTL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
+
+#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define REG(reg) \
+ (clk_mgr->regs->reg)
+
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+#define BASE(seg) BASE_INNER(seg)
+
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
+
+#define CLK_SR_DCN35(reg_name)\
+ .reg_name = mm ## reg_name
+
+static const struct clk_mgr_registers clk_mgr_regs_dcn351 = {
+ CLK_REG_LIST_DCN35()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn351 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask_dcn351 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
+
+#define TO_CLK_MGR_DCN35(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn35, base)
+
+
+void dcn351_clk_mgr_construct(
+ struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg)
+{
+ /*register offset changed*/
+ clk_mgr->base.regs = &clk_mgr_regs_dcn351;
+ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn351;
+ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn351;
+
+ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+
+}
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index b77333817f18..1f974ea3b0c6 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -36,15 +36,11 @@
#include "dcn20/dcn20_clk_mgr.h"
-
-
#include "reg_helper.h"
#include "core_types.h"
#include "dcn35_smu.h"
#include "dm_helpers.h"
-/* TODO: remove this include once we ported over remaining clk mgr functions*/
-#include "dcn30/dcn30_clk_mgr.h"
#include "dcn31/dcn31_clk_mgr.h"
#include "dc_dmub_srv.h"
@@ -55,35 +51,102 @@
#define DC_LOGGER \
clk_mgr->base.base.ctx->logger
+#define DCN_BASE__INST0_SEG1 0x000000C0
+#define mmCLK1_CLK_PLL_REQ 0x16E37
+
+#define mmCLK1_CLK0_DFS_CNTL 0x16E69
+#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
+#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
+#define mmCLK1_CLK3_DFS_CNTL 0x16E72
+#define mmCLK1_CLK4_DFS_CNTL 0x16E75
+#define mmCLK1_CLK5_DFS_CNTL 0x16E78
+
+#define mmCLK1_CLK0_CURRENT_CNT 0x16EFB
+#define mmCLK1_CLK1_CURRENT_CNT 0x16EFC
+#define mmCLK1_CLK2_CURRENT_CNT 0x16EFD
+#define mmCLK1_CLK3_CURRENT_CNT 0x16EFE
+#define mmCLK1_CLK4_CURRENT_CNT 0x16EFF
+#define mmCLK1_CLK5_CURRENT_CNT 0x16F00
+
+#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A
+#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93
+#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C
+#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5
+#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE
+#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7
+
+#define mmCLK1_CLK0_DS_CNTL 0x16E83
+#define mmCLK1_CLK1_DS_CNTL 0x16E8C
+#define mmCLK1_CLK2_DS_CNTL 0x16E95
+#define mmCLK1_CLK3_DS_CNTL 0x16E9E
+#define mmCLK1_CLK4_DS_CNTL 0x16EA7
+#define mmCLK1_CLK5_DS_CNTL 0x16EB0
+
+#define mmCLK1_CLK0_ALLOW_DS 0x16E84
+#define mmCLK1_CLK1_ALLOW_DS 0x16E8D
+#define mmCLK1_CLK2_ALLOW_DS 0x16E96
+#define mmCLK1_CLK3_ALLOW_DS 0x16E9F
+#define mmCLK1_CLK4_ALLOW_DS 0x16EA8
+#define mmCLK1_CLK5_ALLOW_DS 0x16EB1
+
+#define mmCLK5_spll_field_8 0x1B04B
+#define mmDENTIST_DISPCLK_CNTL 0x0124
+#define regDENTIST_DISPCLK_CNTL 0x0064
+#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+// DENTIST_DISPCLK_CNTL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L
+#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
+
+#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
-#define regCLK1_CLK_PLL_REQ 0x0237
-#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
+#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
+#undef FN
+#define FN(reg_name, field_name) \
+ clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name
-#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
-#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
-#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
-#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
-#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
-#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+#define REG(reg) \
+ (clk_mgr->regs->reg)
-#define regCLK1_CLK2_BYPASS_CNTL 0x029c
-#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0
+#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0
-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10
-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+#define BASE(seg) BASE_INNER(seg)
-#define regCLK5_0_CLK5_spll_field_8 0x464b
-#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0
+#define SR(reg_name)\
+ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
+ reg ## reg_name
-#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd
-#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+#define CLK_SR_DCN35(reg_name)\
+ .reg_name = mm ## reg_name
-#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
+static const struct clk_mgr_registers clk_mgr_regs_dcn35 = {
+ CLK_REG_LIST_DCN35()
+};
+
+static const struct clk_mgr_shift clk_mgr_shift_dcn35 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
+};
-#define REG(reg_name) \
- (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+static const struct clk_mgr_mask clk_mgr_mask_dcn35 = {
+ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
+};
#define TO_CLK_MGR_DCN35(clk_mgr)\
container_of(clk_mgr, struct clk_mgr_dcn35, base)
@@ -338,6 +401,7 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
if (clk_mgr->base.ctx->dc->config.allow_0_dtb_clk)
dcn35_smu_set_dtbclk(clk_mgr, false);
+
clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
}
/* check that we're not already in lower */
@@ -355,11 +419,17 @@ void dcn35_update_clocks(struct clk_mgr *clk_mgr_base,
}
if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
- dcn35_smu_set_dtbclk(clk_mgr, true);
- clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ int actual_dtbclk = 0;
dcn35_update_clocks_update_dtb_dto(clk_mgr, context, new_clocks->ref_dtbclk_khz);
- clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
+ dcn35_smu_set_dtbclk(clk_mgr, true);
+
+ actual_dtbclk = REG_READ(CLK1_CLK4_CURRENT_CNT);
+
+ if (actual_dtbclk) {
+ clk_mgr_base->clks.ref_dtbclk_khz = new_clocks->ref_dtbclk_khz;
+ clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
+ }
}
/* check that we're not already in D0 */
@@ -452,7 +522,6 @@ static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
struct fixed31_32 pll_req;
unsigned int fbmult_frac_val = 0;
unsigned int fbmult_int_val = 0;
- struct dc_context *ctx = clk_mgr->base.ctx;
/*
* Register value of fbmult is in 8.16 format, we are converting to 314.32
@@ -512,22 +581,20 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs
static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
- struct dc_context *ctx = clk_mgr->base.ctx;
+
uint32_t ssc_enable;
- REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable);
+ ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK;
- return ssc_enable == 1;
+ return ssc_enable != 0;
}
static void init_clk_states(struct clk_mgr *clk_mgr)
{
- struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
- if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD)
- clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit
clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
@@ -538,6 +605,7 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
void dcn35_init_clocks(struct clk_mgr *clk_mgr)
{
struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
+
init_clk_states(clk_mgr);
// to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
@@ -632,6 +700,7 @@ static struct wm_table lpddr5_wm_table = {
};
static DpmClocks_t_dcn35 dummy_clocks;
+static DpmClocks_t_dcn351 dummy_clocks_dcn351;
static struct dcn35_watermarks dummy_wms = { 0 };
@@ -642,10 +711,10 @@ static struct dcn35_ss_info_table ss_info_table = {
static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
{
- struct dc_context *ctx = clk_mgr->base.ctx;
- uint32_t clock_source;
+ uint32_t clock_source = 0;
+
+ clock_source = REG_READ(CLK1_CLK2_BYPASS_CNTL) & CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK;
- REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
// If it's DFS mode, clock_source is 0.
if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
@@ -755,6 +824,22 @@ static void dcn35_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
}
+static void dcn351_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
+ struct dcn351_smu_dpm_clks *smu_dpm_clks)
+{
+ DpmClocks_t_dcn351 *table = smu_dpm_clks->dpm_clks;
+
+ if (!clk_mgr->smu_ver)
+ return;
+ if (!table || smu_dpm_clks->mc_address.quad_part == 0)
+ return;
+ memset(table, 0, sizeof(*table));
+ dcn35_smu_set_dram_addr_high(clk_mgr,
+ smu_dpm_clks->mc_address.high_part);
+ dcn35_smu_set_dram_addr_low(clk_mgr,
+ smu_dpm_clks->mc_address.low_part);
+ dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
+}
static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
{
uint32_t max = 0;
@@ -1093,6 +1178,57 @@ struct clk_mgr_funcs dcn35_fpga_funcs = {
.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
};
+static void translate_to_DpmClocks_t_dcn35(struct dcn351_smu_dpm_clks *smu_dpm_clks_a,
+ struct dcn35_smu_dpm_clks *smu_dpm_clks_b)
+{
+ /*translate two structures and only take need clock tables*/
+ uint8_t i;
+
+ if (smu_dpm_clks_a == NULL || smu_dpm_clks_b == NULL ||
+ smu_dpm_clks_a->dpm_clks == NULL || smu_dpm_clks_b->dpm_clks == NULL)
+ return;
+
+ for (i = 0; i < NUM_DCFCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DcfClocks[i] = smu_dpm_clks_a->dpm_clks->DcfClocks[i];
+
+ for (i = 0; i < NUM_DISPCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DispClocks[i] = smu_dpm_clks_a->dpm_clks->DispClocks[i];
+
+ for (i = 0; i < NUM_DPPCLK_DPM_LEVELS; i++)
+ smu_dpm_clks_b->dpm_clks->DppClocks[i] = smu_dpm_clks_a->dpm_clks->DppClocks[i];
+
+ for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->FclkClocks_Freq[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Freq[i];
+ smu_dpm_clks_b->dpm_clks->FclkClocks_Voltage[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Voltage[i];
+ }
+ for (i = 0; i < NUM_MEM_PSTATE_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].MemClk =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].MemClk;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].UClk =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].UClk;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].Voltage =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].Voltage;
+ smu_dpm_clks_b->dpm_clks->MemPstateTable[i].WckRatio =
+ smu_dpm_clks_a->dpm_clks->MemPstateTable[i].WckRatio;
+ }
+ smu_dpm_clks_b->dpm_clks->MaxGfxClk = smu_dpm_clks_a->dpm_clks->MaxGfxClk;
+ smu_dpm_clks_b->dpm_clks->MinGfxClk = smu_dpm_clks_a->dpm_clks->MinGfxClk;
+ smu_dpm_clks_b->dpm_clks->NumDcfClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumDcfClkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumDispClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumDispClkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumFclkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumFclkLevelsEnabled;
+ smu_dpm_clks_b->dpm_clks->NumMemPstatesEnabled =
+ smu_dpm_clks_a->dpm_clks->NumMemPstatesEnabled;
+ smu_dpm_clks_b->dpm_clks->NumSocClkLevelsEnabled =
+ smu_dpm_clks_a->dpm_clks->NumSocClkLevelsEnabled;
+
+ for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
+ smu_dpm_clks_b->dpm_clks->SocClocks[i] = smu_dpm_clks_a->dpm_clks->SocClocks[i];
+ smu_dpm_clks_b->dpm_clks->SocVoltage[i] = smu_dpm_clks_a->dpm_clks->SocVoltage[i];
+ }
+}
void dcn35_clk_mgr_construct(
struct dc_context *ctx,
struct clk_mgr_dcn35 *clk_mgr,
@@ -1100,6 +1236,7 @@ void dcn35_clk_mgr_construct(
struct dccg *dccg)
{
struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 };
+ struct dcn351_smu_dpm_clks smu_dpm_clks_dcn351 = { 0 };
clk_mgr->base.base.ctx = ctx;
clk_mgr->base.base.funcs = &dcn35_funcs;
@@ -1112,6 +1249,12 @@ void dcn35_clk_mgr_construct(
clk_mgr->base.dprefclk_ss_divider = 1000;
clk_mgr->base.ss_on_dprefclk = false;
clk_mgr->base.dfs_ref_freq_khz = 48000;
+ if (ctx->dce_version == DCN_VERSION_3_5) {
+ clk_mgr->base.regs = &clk_mgr_regs_dcn35;
+ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn35;
+ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn35;
+ }
+
clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem(
clk_mgr->base.base.ctx,
@@ -1130,14 +1273,24 @@ void dcn35_clk_mgr_construct(
DC_MEM_ALLOC_TYPE_GART,
sizeof(DpmClocks_t_dcn35),
&smu_dpm_clks.mc_address.quad_part);
-
if (smu_dpm_clks.dpm_clks == NULL) {
smu_dpm_clks.dpm_clks = &dummy_clocks;
smu_dpm_clks.mc_address.quad_part = 0;
}
-
ASSERT(smu_dpm_clks.dpm_clks);
+ if (ctx->dce_version == DCN_VERSION_3_51) {
+ smu_dpm_clks_dcn351.dpm_clks = (DpmClocks_t_dcn351 *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+ DC_MEM_ALLOC_TYPE_GART,
+ sizeof(DpmClocks_t_dcn351),
+ &smu_dpm_clks_dcn351.mc_address.quad_part);
+ if (smu_dpm_clks_dcn351.dpm_clks == NULL) {
+ smu_dpm_clks_dcn351.dpm_clks = &dummy_clocks_dcn351;
+ smu_dpm_clks_dcn351.mc_address.quad_part = 0;
+ }
+ }
+
clk_mgr->base.smu_ver = dcn35_smu_get_smu_version(&clk_mgr->base);
if (clk_mgr->base.smu_ver)
@@ -1166,7 +1319,11 @@ void dcn35_clk_mgr_construct(
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
int i;
- dcn35_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
+ if (ctx->dce_version == DCN_VERSION_3_51) {
+ dcn351_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks_dcn351);
+ translate_to_DpmClocks_t_dcn35(&smu_dpm_clks_dcn351, &smu_dpm_clks);
+ } else
+ dcn35_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
DC_LOG_SMU("NumDcfClkLevelsEnabled: %d\n"
"NumDispClkLevelsEnabled: %d\n"
"NumSocClkLevelsEnabled: %d\n"
@@ -1227,6 +1384,10 @@ void dcn35_clk_mgr_construct(
dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART,
smu_dpm_clks.dpm_clks);
+ if (smu_dpm_clks_dcn351.dpm_clks && smu_dpm_clks_dcn351.mc_address.quad_part != 0)
+ dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART,
+ smu_dpm_clks_dcn351.dpm_clks);
+
if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) {
bool ips_support = false;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
index 1203dc605b12..a12a9bf90806 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
@@ -60,4 +60,8 @@ void dcn35_clk_mgr_construct(struct dc_context *ctx,
void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+void dcn351_clk_mgr_construct(struct dc_context *ctx,
+ struct clk_mgr_dcn35 *clk_mgr,
+ struct pp_smu_funcs *pp_smu,
+ struct dccg *dccg);
#endif //__DCN35_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h
index 3fae13c73934..ab9d21ba0c43 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h
@@ -126,18 +126,31 @@ typedef struct {
uint32_t MaxGfxClk;
} DpmClocks_t_dcn35;
-
-// Throttler Status Bitmask
-
-
-
-
-
-
-
-
-
-
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; // Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t Vcn0ClkLevelsEnabled; // Applies to both Vclk0 and Dclk0
+ uint8_t Vcn1ClkLevelsEnabled; // Applies to both Vclk1 and Dclk1
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_dcn351;
#define TABLE_BIOS_IF 0 // Called by BIOS
#define TABLE_WATERMARKS 1 // Called by DAL through VBIOS
@@ -163,6 +176,10 @@ struct dcn35_smu_dpm_clks {
union large_integer mc_address;
};
+struct dcn351_smu_dpm_clks {
+ DpmClocks_t_dcn351 *dpm_clks;
+ union large_integer mc_address;
+};
/* TODO: taken from vgh, may not be correct */
struct display_idle_optimization {
unsigned int df_request_disabled : 1;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h
index dbfdd3487da5..2e0d34fd7512 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dalsmc.h
@@ -43,7 +43,9 @@
#define DALSMC_MSG_ActiveUclkFclk 0x18
#define DALSMC_MSG_IdleUclkFclk 0x19
#define DALSMC_MSG_SetUclkPstateAllow 0x1A
-#define DALSMC_Message_Count 0x1B
+#define DALSMC_MSG_SubvpUclkFclk 0x1B
+#define DALSMC_MSG_GetNumUmcChannels 0x1C
+#define DALSMC_Message_Count 0x1D
typedef enum {
FCLK_SWITCH_DISALLOW,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
index 8cfc5f435937..8082bb877611 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c
@@ -141,6 +141,20 @@ static bool dcn401_is_ppclk_idle_dpm_enabled(struct clk_mgr_internal *clk_mgr, P
return ppclk_idle_dpm_enabled;
}
+static bool dcn401_is_df_throttle_opt_enabled(struct clk_mgr_internal *clk_mgr)
+{
+ bool is_df_throttle_opt_enabled = false;
+
+ if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) &&
+ clk_mgr->smu_ver >= 0x663500) {
+ is_df_throttle_opt_enabled = !clk_mgr->base.ctx->dc->debug.force_subvp_df_throttle;
+ }
+
+ is_df_throttle_opt_enabled &= clk_mgr->smu_present;
+
+ return is_df_throttle_opt_enabled;
+}
+
/* Query SMU for all clock states for a particular clock */
static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0,
unsigned int *num_levels)
@@ -614,207 +628,6 @@ static void dcn401_update_clocks_update_dentist(
}
-static void dcn401_update_clocks_legacy(struct clk_mgr *clk_mgr_base,
- struct dc_state *context,
- bool safe_to_lower)
-{
- struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
- struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
- struct dc *dc = clk_mgr_base->ctx->dc;
- int display_count;
- bool update_dppclk = false;
- bool update_dispclk = false;
- bool enter_display_off = false;
- bool dpp_clock_lowered = false;
- struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
- bool force_reset = false;
- bool update_uclk = false, update_fclk = false;
- bool p_state_change_support;
- bool fclk_p_state_change_support;
- int total_plane_count;
-
- if (dc->work_arounds.skip_clock_update)
- return;
-
- if (clk_mgr_base->clks.dispclk_khz == 0 ||
- (dc->debug.force_clock_mode & 0x1)) {
- /* This is from resume or boot up, if forced_clock cfg option used,
- * we bypass program dispclk and DPPCLK, but need set them for S3.
- */
- force_reset = true;
-
- dcn2_read_clocks_from_hw_dentist(clk_mgr_base);
-
- /* Force_clock_mode 0x1: force reset the clock even it is the same clock
- * as long as it is in Passive level.
- */
- }
- display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
-
- if (display_count == 0)
- enter_display_off = true;
-
- if (clk_mgr->smu_present) {
- if (enter_display_off == safe_to_lower)
- dcn401_smu_set_num_of_displays(clk_mgr, display_count);
-
- clk_mgr_base->clks.fclk_prev_p_state_change_support = clk_mgr_base->clks.fclk_p_state_change_support;
-
- total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context);
- fclk_p_state_change_support = new_clocks->fclk_p_state_change_support || (total_plane_count == 0);
-
- if (should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_p_state_change_support)) {
- clk_mgr_base->clks.fclk_p_state_change_support = fclk_p_state_change_support;
-
- /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW */
- if (clk_mgr_base->clks.fclk_p_state_change_support) {
- /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */
- dcn401_smu_send_fclk_pstate_message(clk_mgr, true);
- }
- }
-
- if (dc->debug.force_min_dcfclk_mhz > 0)
- new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ?
- new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000);
-
- if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
- clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
- if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK))
- dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DCFCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_khz));
- }
-
- if (should_set_clock(safe_to_lower, new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
- clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
- if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DCFCLK))
- dcn401_smu_set_min_deep_sleep_dcef_clk(clk_mgr, khz_to_mhz_ceil(clk_mgr_base->clks.dcfclk_deep_sleep_khz));
- }
-
- if (should_set_clock(safe_to_lower, new_clocks->socclk_khz, clk_mgr_base->clks.socclk_khz))
- /* We don't actually care about socclk, don't notify SMU of hard min */
- clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz;
-
- clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support;
- clk_mgr_base->clks.prev_num_ways = clk_mgr_base->clks.num_ways;
-
- if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
- clk_mgr_base->clks.num_ways < new_clocks->num_ways) {
- clk_mgr_base->clks.num_ways = new_clocks->num_ways;
- if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK))
- dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways);
- }
-
-
- p_state_change_support = new_clocks->p_state_change_support || (total_plane_count == 0);
- if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.prev_p_state_change_support)) {
- clk_mgr_base->clks.p_state_change_support = p_state_change_support;
- clk_mgr_base->clks.fw_based_mclk_switching = p_state_change_support && new_clocks->fw_based_mclk_switching;
-
- /* to disable P-State switching, set UCLK min = max */
- if (!clk_mgr_base->clks.p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK))
- dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK,
- clk_mgr_base->bw_params->clk_table.entries[clk_mgr_base->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1].memclk_mhz);
- }
-
- /* Always update saved value, even if new value not set due to P-State switching unsupported. Also check safe_to_lower for FCLK */
- if (safe_to_lower && (clk_mgr_base->clks.fclk_p_state_change_support != clk_mgr_base->clks.fclk_prev_p_state_change_support)) {
- update_fclk = true;
- }
-
- if (!clk_mgr_base->clks.fclk_p_state_change_support &&
- update_fclk &&
- dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_FCLK)) {
- /* Handle code for sending a message to PMFW that FCLK P-state change is not supported */
- dcn401_smu_send_fclk_pstate_message(clk_mgr, false);
- }
-
- /* Always update saved value, even if new value not set due to P-State switching unsupported */
- if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) {
- clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz;
- update_uclk = true;
- }
-
- /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
- if (clk_mgr_base->clks.p_state_change_support &&
- (update_uclk || !clk_mgr_base->clks.prev_p_state_change_support) &&
- dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK))
- dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_UCLK, khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz));
-
- if (clk_mgr_base->clks.num_ways != new_clocks->num_ways &&
- clk_mgr_base->clks.num_ways > new_clocks->num_ways) {
- clk_mgr_base->clks.num_ways = new_clocks->num_ways;
- if (dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_UCLK))
- dcn401_smu_send_cab_for_uclk_message(clk_mgr, clk_mgr_base->clks.num_ways);
- }
- }
-
- if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr_base->clks.dppclk_khz)) {
- if (clk_mgr_base->clks.dppclk_khz > new_clocks->dppclk_khz)
- dpp_clock_lowered = true;
-
- clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
- clk_mgr_base->clks.actual_dppclk_khz = new_clocks->dppclk_khz;
-
- if (clk_mgr->smu_present && !dpp_clock_lowered && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK))
- clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK, clk_mgr_base->clks.dppclk_khz);
- update_dppclk = true;
- }
-
- if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
-
- if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK))
- clk_mgr_base->clks.actual_dispclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DISPCLK, clk_mgr_base->clks.dispclk_khz);
-
- update_dispclk = true;
- }
-
- if (!new_clocks->dtbclk_en && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) {
- new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
- }
-
- /* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
- if (!dc->debug.disable_dtb_ref_clk_switch &&
- should_set_clock(safe_to_lower, new_clocks->ref_dtbclk_khz / 1000, clk_mgr_base->clks.ref_dtbclk_khz / 1000) &&
- dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DTBCLK)) {
- /* DCCG requires KHz precision for DTBCLK */
- clk_mgr_base->clks.ref_dtbclk_khz =
- dcn401_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz));
-
- dcn401_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
- }
-
- if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
- if (dpp_clock_lowered) {
- /* if clock is being lowered, increase DTO before lowering refclk */
- dcn401_update_clocks_update_dpp_dto(clk_mgr, context,
- safe_to_lower, clk_mgr_base->clks.dppclk_khz);
- dcn401_update_clocks_update_dentist(clk_mgr, context);
- if (clk_mgr->smu_present && dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK)) {
- clk_mgr_base->clks.actual_dppclk_khz = dcn401_set_hard_min_by_freq_optimized(clk_mgr, PPCLK_DPPCLK,
- clk_mgr_base->clks.dppclk_khz);
- dcn401_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower,
- clk_mgr_base->clks.actual_dppclk_khz);
- }
-
- } else {
- /* if clock is being raised, increase refclk before lowering DTO */
- if (update_dppclk || update_dispclk)
- dcn401_update_clocks_update_dentist(clk_mgr, context);
- /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures
- * that we do not lower dto when it is not safe to lower. We do not need to
- * compare the current and new dppclk before calling this function.
- */
- dcn401_update_clocks_update_dpp_dto(clk_mgr, context,
- safe_to_lower, clk_mgr_base->clks.actual_dppclk_khz);
- }
- }
-
- if (update_dispclk && dmcu && dmcu->funcs->is_dmcu_initialized(dmcu))
- /*update dmcu for wait_loop count*/
- dmcu->funcs->set_psr_wait_loop(dmcu,
- clk_mgr_base->clks.dispclk_khz / 1000 / 7);
-}
-
static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned int num_steps)
{
struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base);
@@ -869,6 +682,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned
params->update_idle_hardmin_params.uclk_mhz,
params->update_idle_hardmin_params.fclk_mhz);
break;
+ case CLK_MGR401_UPDATE_SUBVP_HARDMINS:
+ dcn401_smu_set_subvp_uclk_fclk_hardmin(
+ clk_mgr_internal,
+ params->update_idle_hardmin_params.uclk_mhz,
+ params->update_idle_hardmin_params.fclk_mhz);
+ break;
case CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK:
dcn401_smu_set_min_deep_sleep_dcef_clk(
clk_mgr_internal,
@@ -945,15 +764,21 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
bool update_active_uclk = false;
bool update_idle_fclk = false;
bool update_idle_uclk = false;
+ bool update_subvp_prefetch_dramclk = false;
+ bool update_subvp_prefetch_fclk = false;
bool is_idle_dpm_enabled = dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK) &&
dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) &&
dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_FCLK);
+ bool is_df_throttle_opt_enabled = is_idle_dpm_enabled &&
+ dcn401_is_df_throttle_opt_enabled(clk_mgr_internal);
int total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context);
int active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz);
int active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz);
int idle_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz);
int idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz);
+ int subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+ int subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
unsigned int num_steps = 0;
@@ -982,15 +807,15 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
update_active_fclk = true;
update_idle_fclk = true;
- /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW */
- if (clk_mgr_base->clks.fclk_p_state_change_support) {
- /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */
- if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
- block_sequence[num_steps].params.update_pstate_support_params.support = true;
- block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT;
- num_steps++;
- }
- }
+ /* To enable FCLK P-state switching, send PSTATE_SUPPORTED message to PMFW (message not supported on DCN401)*/
+ // if (clk_mgr_base->clks.fclk_p_state_change_support) {
+ // /* Handle the code for sending a message to PMFW that FCLK P-state change is supported */
+ // if (dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
+ // block_sequence[num_steps].params.update_pstate_support_params.support = true;
+ // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT;
+ // num_steps++;
+ // }
+ // }
}
if (!clk_mgr_base->clks.fclk_p_state_change_support && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
@@ -1109,6 +934,12 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
}
}
+ if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_dramclk_khz, clk_mgr_base->clks.subvp_prefetch_dramclk_khz)) {
+ clk_mgr_base->clks.subvp_prefetch_dramclk_khz = new_clocks->subvp_prefetch_dramclk_khz;
+ update_subvp_prefetch_dramclk = true;
+ subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz);
+ }
+
/* FCLK */
/* Always update saved value, even if new value not set due to P-State switching unsupported */
if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr_base->clks.fclk_khz)) {
@@ -1129,6 +960,12 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
}
}
+ if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_fclk_khz, clk_mgr_base->clks.subvp_prefetch_fclk_khz)) {
+ clk_mgr_base->clks.subvp_prefetch_fclk_khz = new_clocks->subvp_prefetch_fclk_khz;
+ update_subvp_prefetch_fclk = true;
+ subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz);
+ }
+
/* When idle DPM is enabled, need to send active and idle hardmins separately */
/* CLK_MGR401_UPDATE_ACTIVE_HARDMINS */
if ((update_active_uclk || update_active_fclk) && is_idle_dpm_enabled) {
@@ -1146,6 +983,14 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
num_steps++;
}
+ /* CLK_MGR401_UPDATE_SUBVP_HARDMINS */
+ if ((update_subvp_prefetch_dramclk || update_subvp_prefetch_fclk) && is_df_throttle_opt_enabled) {
+ block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = subvp_prefetch_dramclk_mhz;
+ block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = subvp_prefetch_fclk_mhz;
+ block_sequence[num_steps].func = CLK_MGR401_UPDATE_SUBVP_HARDMINS;
+ num_steps++;
+ }
+
/* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */
if (update_active_uclk || update_idle_uclk) {
if (!is_idle_dpm_enabled) {
@@ -1178,14 +1023,14 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence(
// (*num_steps)++;
// }
- /* disable FCLK P-State support if needed */
- if (!fclk_p_state_change_support &&
- should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) &&
- dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
- block_sequence[num_steps].params.update_pstate_support_params.support = false;
- block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT;
- num_steps++;
- }
+ /* disable FCLK P-State support if needed (message not supported on DCN401)*/
+ // if (!fclk_p_state_change_support &&
+ // should_update_pstate_support(safe_to_lower, fclk_p_state_change_support, clk_mgr_base->clks.fclk_prev_p_state_change_support) &&
+ // dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK)) {
+ // block_sequence[num_steps].params.update_pstate_support_params.support = false;
+ // block_sequence[num_steps].func = CLK_MGR401_UPDATE_FCLK_PSTATE_SUPPORT;
+ // num_steps++;
+ // }
}
if (new_clocks->fw_based_mclk_switching != clk_mgr_base->clks.fw_based_mclk_switching &&
@@ -1366,11 +1211,6 @@ static void dcn401_update_clocks(struct clk_mgr *clk_mgr_base,
unsigned int num_steps = 0;
- if (dc->debug.enable_legacy_clock_update) {
- dcn401_update_clocks_legacy(clk_mgr_base, context, safe_to_lower);
- return;
- }
-
/* build bandwidth related clocks update sequence */
num_steps = dcn401_build_update_bandwidth_clocks_sequence(clk_mgr_base,
context,
@@ -1505,6 +1345,20 @@ static void dcn401_set_hard_min_memclk(struct clk_mgr *clk_mgr_base, bool curren
dcn401_execute_block_sequence(clk_mgr_base, num_steps);
}
+static int dcn401_get_hard_min_memclk(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.dramclk_khz;
+}
+
+static int dcn401_get_hard_min_fclk(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+ return clk_mgr->base.ctx->dc->current_state->bw_ctx.bw.dcn.clk.fclk_khz;
+}
+
/* Get current memclk states, update bounding box */
static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
{
@@ -1549,6 +1403,15 @@ static void dcn401_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
if (clk_mgr->dpm_present && !num_levels)
clk_mgr->dpm_present = false;
+ clk_mgr_base->bw_params->num_channels = dcn401_smu_get_num_of_umc_channels(clk_mgr);
+ if (clk_mgr_base->ctx->dc_bios) {
+ /* use BIOS values if none provided by PMFW */
+ if (clk_mgr_base->bw_params->num_channels == 0) {
+ clk_mgr_base->bw_params->num_channels = clk_mgr_base->ctx->dc_bios->vram_info.num_chans;
+ }
+ clk_mgr_base->bw_params->dram_channel_width_bytes = clk_mgr_base->ctx->dc_bios->vram_info.dram_channel_width_bytes;
+ }
+
/* Refresh bounding box */
clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box(
clk_mgr->base.ctx->dc, clk_mgr_base->bw_params);
@@ -1638,6 +1501,8 @@ static struct clk_mgr_funcs dcn401_funcs = {
.enable_pme_wa = dcn401_enable_pme_wa,
.is_smu_present = dcn401_is_smu_present,
.get_dispclk_from_dentist = dcn401_get_dispclk_from_dentist,
+ .get_hard_min_memclk = dcn401_get_hard_min_memclk,
+ .get_hard_min_fclk = dcn401_get_hard_min_fclk,
};
struct clk_mgr_internal *dcn401_clk_mgr_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
index 8b0461992b22..6c9ae5ca2c7e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h
@@ -90,6 +90,7 @@ enum dcn401_clk_mgr_block_sequence_func {
CLK_MGR401_UPDATE_DTBCLK_DTO,
CLK_MGR401_UPDATE_DENTIST,
CLK_MGR401_UPDATE_PSR_WAIT_LOOP,
+ CLK_MGR401_UPDATE_SUBVP_HARDMINS,
};
struct dcn401_clk_mgr_block_sequence {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
index 7700477d019b..21c35528f61f 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c
@@ -21,6 +21,14 @@
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
+/* temporary define */
+#ifndef DALSMC_MSG_SubvpUclkFclk
+#define DALSMC_MSG_SubvpUclkFclk 0x1B
+#endif
+#ifndef DALSMC_MSG_GetNumUmcChannels
+#define DALSMC_MSG_GetNumUmcChannels 0x1C
+#endif
+
/*
* Function to be used instead of REG_WAIT macro because the wait ends when
* the register is NOT EQUAL to zero, and because the translation in msg_if.h
@@ -296,6 +304,24 @@ bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
return success;
}
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz)
+{
+ uint32_t response = 0;
+ bool success;
+
+ /* 15:0 for uclk, 32:16 for fclk */
+ uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz;
+
+ smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz);
+
+ success = dcn401_smu_send_msg_with_param(clk_mgr,
+ DALSMC_MSG_SubvpUclkFclk, param, &response);
+
+ return success;
+}
+
void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz)
{
smu_print("SMU Set min deep sleep dcef clk: freq_mhz = %d MHz\n", freq_mhz);
@@ -311,3 +337,14 @@ void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t n
dcn401_smu_send_msg_with_param(clk_mgr,
DALSMC_MSG_NumOfDisplays, num_displays, NULL);
}
+
+unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr)
+{
+ unsigned int response = 0;
+
+ dcn401_smu_send_msg_with_param(clk_mgr, DALSMC_MSG_GetNumUmcChannels, 0, &response);
+
+ smu_print("SMU Get Num UMC Channels: num_umc_channels = %d\n", response);
+
+ return response;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
index 651fb8d62864..e02eb1294b37 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h
@@ -23,7 +23,11 @@ bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
uint16_t uclk_freq_mhz,
uint16_t fclk_freq_mhz);
+bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr,
+ uint16_t uclk_freq_mhz,
+ uint16_t fclk_freq_mhz);
void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
+unsigned int dcn401_smu_get_num_of_umc_channels(struct clk_mgr_internal *clk_mgr);
#endif /* __DCN401_CLK_MGR_SMU_MSG_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 1dd26d5df6b9..cecaadf741ad 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -579,7 +579,7 @@ dc_stream_forward_dmcu_crc_window(struct dmcu *dmcu,
bool
dc_stream_forward_crc_window(struct dc_stream_state *stream,
- struct rect *rect, bool is_stop)
+ struct rect *rect, uint8_t phy_id, bool is_stop)
{
struct dmcu *dmcu;
struct dc_dmub_srv *dmub_srv;
@@ -598,7 +598,7 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream,
if (i == MAX_PIPES)
return false;
- mux_mapping.phy_output_num = stream->link->link_enc_hw_inst;
+ mux_mapping.phy_output_num = phy_id;
mux_mapping.otg_output_num = pipe->stream_res.tg->inst;
dmcu = dc->res_pool->dmcu;
@@ -615,6 +615,68 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream,
return true;
}
+
+static void
+dc_stream_forward_dmub_multiple_crc_window(struct dc_dmub_srv *dmub_srv,
+ struct crc_window *window, struct otg_phy_mux *mux_mapping, bool stop)
+{
+ int i;
+ union dmub_rb_cmd cmd = {0};
+
+ cmd.secure_display.mul_roi_ctl.phy_id = mux_mapping->phy_output_num;
+ cmd.secure_display.mul_roi_ctl.otg_id = mux_mapping->otg_output_num;
+
+ cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY;
+
+ if (stop) {
+ cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_STOP_UPDATE;
+ } else {
+ cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_WIN_NOTIFY;
+ for (i = 0; i < MAX_CRC_WINDOW_NUM; i++) {
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_start = window[i].rect.x;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_start = window[i].rect.y;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].x_end = window[i].rect.x + window[i].rect.width;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].y_end = window[i].rect.y + window[i].rect.height;
+ cmd.secure_display.mul_roi_ctl.roi_ctl[i].enable = window[i].enable;
+ }
+ }
+
+ dc_wake_and_execute_dmub_cmd(dmub_srv->ctx, &cmd, DM_DMUB_WAIT_TYPE_NO_WAIT);
+}
+
+bool
+dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream,
+ struct crc_window *window, uint8_t phy_id, bool stop)
+{
+ struct dc_dmub_srv *dmub_srv;
+ struct otg_phy_mux mux_mapping;
+ struct pipe_ctx *pipe;
+ int i;
+ struct dc *dc = stream->ctx->dc;
+
+ for (i = 0; i < MAX_PIPES; i++) {
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
+ break;
+ }
+
+ /* Stream not found */
+ if (i == MAX_PIPES)
+ return false;
+
+ mux_mapping.phy_output_num = phy_id;
+ mux_mapping.otg_output_num = pipe->stream_res.tg->inst;
+
+ dmub_srv = dc->ctx->dmub_srv;
+
+ /* forward to dmub only. no dmcu support*/
+ if (dmub_srv)
+ dc_stream_forward_dmub_multiple_crc_window(dmub_srv, window, &mux_mapping, stop);
+ else
+ return false;
+
+ return true;
+}
#endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */
/**
@@ -625,15 +687,17 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream,
* @enable: Enable CRC if true, disable otherwise.
* @continuous: Capture CRC on every frame if true. Otherwise, only capture
* once.
+ * @idx: Capture CRC on which CRC engine instance
+ * @reset: Reset CRC engine before the configuration
*
- * By default, only CRC0 is configured, and the entire frame is used to
- * calculate the CRC.
+ * By default, the entire frame is used to calculate the CRC.
*
* Return: %false if the stream is not found or CRC capture is not supported;
* %true if the stream has been configured.
*/
bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
- struct crc_params *crc_window, bool enable, bool continuous)
+ struct crc_params *crc_window, bool enable, bool continuous,
+ uint8_t idx, bool reset)
{
struct pipe_ctx *pipe;
struct crc_params param;
@@ -677,6 +741,9 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
param.continuous_mode = continuous;
param.enable = enable;
+ param.crc_eng_inst = idx;
+ param.reset = reset;
+
tg = pipe->stream_res.tg;
/* Only call if supported */
@@ -691,6 +758,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
*
* @dc: DC object.
* @stream: The DC stream state of the stream to get CRCs from.
+ * @idx: index of crc engine to get CRC from
* @r_cr: CRC value for the red component.
* @g_y: CRC value for the green component.
* @b_cb: CRC value for the blue component.
@@ -700,7 +768,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
* Return:
* %false if stream is not found, or if CRCs are not enabled.
*/
-bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream,
+bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
int i;
@@ -721,7 +789,7 @@ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream,
tg = pipe->stream_res.tg;
if (tg->funcs->get_crc)
- return tg->funcs->get_crc(tg, r_cr, g_y, b_cb);
+ return tg->funcs->get_crc(tg, idx, r_cr, g_y, b_cb);
DC_LOG_WARNING("CRC capture not supported.");
return false;
}
@@ -1173,6 +1241,8 @@ static void dc_update_visual_confirm_color(struct dc *dc, struct dc_state *conte
get_mclk_switch_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
else if (dc->debug.visual_confirm == VISUAL_CONFIRM_FAMS2)
get_fams2_visual_confirm_color(dc, context, pipe_ctx, &(pipe_ctx->visual_confirm_color));
+ else if (dc->debug.visual_confirm == VISUAL_CONFIRM_VABC)
+ get_vabc_visual_confirm_color(pipe_ctx, &(pipe_ctx->visual_confirm_color));
}
}
}
@@ -2153,6 +2223,11 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params
struct dc_stream_state *stream = params->streams[i];
struct dc_stream_status *status = dc_stream_get_status(stream);
+ /* revalidate streams */
+ res = dc_validate_stream(dc, stream);
+ if (res != DC_OK)
+ return res;
+
dc_stream_log(dc, stream);
set[i].stream = stream;
@@ -2487,7 +2562,7 @@ static enum surface_update_type get_plane_info_update_type(const struct dc *dc,
if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info,
- sizeof(union dc_tiling_info)) != 0) {
+ sizeof(struct dc_tiling_info)) != 0) {
update_flags->bits.swizzle_change = 1;
elevate_update_type(&update_type, UPDATE_TYPE_MED);
@@ -2982,6 +3057,10 @@ static void copy_surface_update_to_plane(
if (srf_update->cursor_csc_color_matrix)
surface->cursor_csc_color_matrix =
*srf_update->cursor_csc_color_matrix;
+
+ if (srf_update->bias_and_scale.bias_and_scale_valid)
+ surface->bias_and_scale =
+ srf_update->bias_and_scale;
}
static void copy_stream_update_to_stream(struct dc *dc,
@@ -4510,7 +4589,7 @@ static bool commit_minimal_transition_based_on_current_context(struct dc *dc,
struct pipe_split_policy_backup policy;
struct dc_state *intermediate_context;
struct dc_state *old_current_state = dc->current_state;
- struct dc_surface_update srf_updates[MAX_SURFACE_NUM] = {0};
+ struct dc_surface_update srf_updates[MAX_SURFACES] = {0};
int surface_count;
/*
@@ -5307,11 +5386,9 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state)
dc->vm_pa_config.valid) {
dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config);
}
-
break;
default:
ASSERT(dc->current_state->stream_count == 0);
-
dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state);
dc_state_destruct(dc->current_state);
@@ -5435,6 +5512,11 @@ bool dc_set_ips_disable(struct dc *dc, unsigned int disable_ips)
void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const *caller_name)
{
+ int idle_fclk_khz = 0, idle_dramclk_khz = 0, i = 0;
+ enum mall_stream_type subvp_pipe_type[MAX_PIPES] = {0};
+ struct pipe_ctx *pipe = NULL;
+ struct dc_state *context = dc->current_state;
+
if (dc->debug.disable_idle_power_optimizations) {
DC_LOG_DEBUG("%s: disabled\n", __func__);
return;
@@ -5459,6 +5541,23 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const
dc->idle_optimizations_allowed = allow;
DC_LOG_DEBUG("%s: %s\n", __func__, allow ? "enabled" : "disabled");
}
+
+ // log idle clocks and sub vp pipe types at idle optimization time
+ if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_fclk)
+ idle_fclk_khz = dc->clk_mgr->funcs->get_hard_min_fclk(dc->clk_mgr);
+
+ if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_memclk)
+ idle_dramclk_khz = dc->clk_mgr->funcs->get_hard_min_memclk(dc->clk_mgr);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe);
+ }
+
+ DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n",
+ __func__, allow, idle_fclk_khz, idle_dramclk_khz, subvp_pipe_type[0], subvp_pipe_type[1], subvp_pipe_type[2],
+ subvp_pipe_type[3], subvp_pipe_type[4], subvp_pipe_type[5], caller_name);
+
}
void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name)
@@ -6056,7 +6155,7 @@ void dc_query_current_properties(struct dc *dc, struct dc_current_properties *pr
bool subvp_sw_cursor_req = false;
for (i = 0; i < dc->current_state->stream_count; i++) {
- if (check_subvp_sw_cursor_fallback_req(dc, dc->current_state->streams[i])) {
+ if (check_subvp_sw_cursor_fallback_req(dc, dc->current_state->streams[i]) && !dc->current_state->streams[i]->hw_cursor_req) {
subvp_sw_cursor_req = true;
break;
}
@@ -6109,3 +6208,21 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state
profile.power_level = dc->res_pool->funcs->get_power_profile(context);
return profile;
}
+
+/*
+ **********************************************************************************
+ * dc_get_det_buffer_size_from_state() - extracts detile buffer size from dc state
+ *
+ * Called when DM wants to log detile buffer size from dc_state
+ *
+ **********************************************************************************
+ */
+unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context)
+{
+ struct dc *dc = context->clk_mgr->ctx->dc;
+
+ if (dc->res_pool->funcs->get_det_buffer_size)
+ return dc->res_pool->funcs->get_det_buffer_size(context);
+ else
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 252af83e34a5..6eb9bae3af91 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -425,6 +425,44 @@ void get_hdr_visual_confirm_color(
}
}
+/* Visual Confirm color definition for VABC */
+void get_vabc_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color)
+{
+ uint32_t color_value = MAX_TG_COLOR_VALUE;
+ struct dc_link *edp_link = NULL;
+
+ if (pipe_ctx && pipe_ctx->stream && pipe_ctx->stream->link) {
+ if (pipe_ctx->stream->link->connector_signal == SIGNAL_TYPE_EDP)
+ edp_link = pipe_ctx->stream->link;
+ }
+
+ if (edp_link) {
+ switch (edp_link->backlight_control_type) {
+ case BACKLIGHT_CONTROL_PWM:
+ color->color_r_cr = color_value;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ break;
+ case BACKLIGHT_CONTROL_AMD_AUX:
+ color->color_r_cr = 0;
+ color->color_g_y = color_value;
+ color->color_b_cb = 0;
+ break;
+ case BACKLIGHT_CONTROL_VESA_AUX:
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = color_value;
+ break;
+ }
+ } else {
+ color->color_r_cr = 0;
+ color->color_g_y = 0;
+ color->color_b_cb = 0;
+ }
+}
+
void get_subvp_visual_confirm_color(
struct pipe_ctx *pipe_ctx,
struct tg_color *color)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
index 457d60eeb486..c1b79b379447 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -125,6 +125,14 @@ uint32_t dc_link_bandwidth_kbps(
return link->dc->link_srv->dp_link_bandwidth_kbps(link, link_settings);
}
+uint32_t dc_link_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params)
+{
+ return link->dc->link_srv->dp_required_hblank_size_bytes(link,
+ audio_params);
+}
+
void dc_get_cur_link_res_map(const struct dc *dc, uint32_t *map)
{
dc->link_srv->get_cur_res_map(dc, map);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 619fad17de55..520a34a42827 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -2094,7 +2094,8 @@ int resource_get_odm_slice_dst_width(struct pipe_ctx *otg_master,
count = resource_get_odm_slice_count(otg_master);
h_active = timing->h_addressable +
timing->h_border_left +
- timing->h_border_right;
+ timing->h_border_right +
+ otg_master->hblank_borrow;
width = h_active / count;
if (otg_master->stream_res.tg)
@@ -4027,6 +4028,41 @@ fail:
}
/**
+ * decide_hblank_borrow - Decides the horizontal blanking borrow value for a given pipe context.
+ * @pipe_ctx: Pointer to the pipe context structure.
+ *
+ * This function calculates the horizontal blanking borrow value for a given pipe context based on the
+ * display stream compression (DSC) configuration. If the horizontal active pixels (hactive) are less
+ * than the total width of the DSC slices, it sets the hblank_borrow value to the difference. If the
+ * total horizontal timing minus the hblank_borrow value is less than 32, it resets the hblank_borrow
+ * value to 0.
+ */
+static void decide_hblank_borrow(struct pipe_ctx *pipe_ctx)
+{
+ uint32_t hactive;
+ uint32_t ceil_slice_width;
+ struct dc_stream_state *stream = NULL;
+
+ if (!pipe_ctx)
+ return;
+
+ stream = pipe_ctx->stream;
+
+ if (stream->timing.flags.DSC) {
+ hactive = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
+
+ /* Assume if determined slices does not divide Hactive evenly, Hborrow is needed for padding*/
+ if (hactive % stream->timing.dsc_cfg.num_slices_h != 0) {
+ ceil_slice_width = (hactive / stream->timing.dsc_cfg.num_slices_h) + 1;
+ pipe_ctx->hblank_borrow = ceil_slice_width * stream->timing.dsc_cfg.num_slices_h - hactive;
+
+ if (stream->timing.h_total - hactive - pipe_ctx->hblank_borrow < 32)
+ pipe_ctx->hblank_borrow = 0;
+ }
+ }
+}
+
+/**
* dc_validate_global_state() - Determine if hardware can support a given state
*
* @dc: dc struct for this driver
@@ -4064,6 +4100,10 @@ enum dc_status dc_validate_global_state(
if (pipe_ctx->stream != stream)
continue;
+ /* Decide whether hblank borrow is needed and save it in pipe_ctx */
+ if (dc->debug.enable_hblank_borrow)
+ decide_hblank_borrow(pipe_ctx);
+
if (dc->res_pool->funcs->patch_unknown_plane_state &&
pipe_ctx->plane_state &&
pipe_ctx->plane_state->tiling_info.gfx9.swizzle == DC_SW_UNKNOWN) {
@@ -4438,7 +4478,7 @@ static void set_hfvs_info_packet(
static void adaptive_sync_override_dp_info_packets_sdp_line_num(
const struct dc_crtc_timing *timing,
struct enc_sdp_line_num *sdp_line_num,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param)
+ unsigned int vstartup_start)
{
uint32_t asic_blank_start = 0;
uint32_t asic_blank_end = 0;
@@ -4453,8 +4493,8 @@ static void adaptive_sync_override_dp_info_packets_sdp_line_num(
asic_blank_end = (asic_blank_start - tg->v_border_bottom -
tg->v_addressable - tg->v_border_top);
- if (pipe_dlg_param->vstartup_start > asic_blank_end) {
- v_update = (tg->v_total - (pipe_dlg_param->vstartup_start - asic_blank_end));
+ if (vstartup_start > asic_blank_end) {
+ v_update = (tg->v_total - (vstartup_start - asic_blank_end));
sdp_line_num->adaptive_sync_line_num_valid = true;
sdp_line_num->adaptive_sync_line_num = (tg->v_total - v_update - 1);
} else {
@@ -4467,7 +4507,7 @@ static void set_adaptive_sync_info_packet(
struct dc_info_packet *info_packet,
const struct dc_stream_state *stream,
struct encoder_info_frame *info_frame,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dlg_param)
+ unsigned int vstartup_start)
{
if (!stream->adaptive_sync_infopacket.valid)
return;
@@ -4475,7 +4515,7 @@ static void set_adaptive_sync_info_packet(
adaptive_sync_override_dp_info_packets_sdp_line_num(
&stream->timing,
&info_frame->sdp_line_num,
- pipe_dlg_param);
+ vstartup_start);
*info_packet = stream->adaptive_sync_infopacket;
}
@@ -4508,6 +4548,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
{
enum signal_type signal = SIGNAL_TYPE_NONE;
struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
+ unsigned int vstartup_start = 0;
/* default all packets to invalid */
info->avi.valid = false;
@@ -4521,6 +4562,9 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
info->adaptive_sync.valid = false;
signal = pipe_ctx->stream->signal;
+ if (pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe)
+ vstartup_start = pipe_ctx->stream->ctx->dc->res_pool->funcs->get_vstartup_for_pipe(pipe_ctx);
+
/* HDMi and DP have different info packets*/
if (dc_is_hdmi_signal(signal)) {
set_avi_info_frame(&info->avi, pipe_ctx);
@@ -4542,7 +4586,7 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
set_adaptive_sync_info_packet(&info->adaptive_sync,
pipe_ctx->stream,
info,
- &pipe_ctx->pipe_dlg_param);
+ vstartup_start);
}
patch_gamut_packet_checksum(&info->gamut);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index e006f816ff2f..1b2cce127981 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -483,9 +483,9 @@ bool dc_state_add_plane(
if (stream_status == NULL) {
dm_error("Existing stream not found; failed to attach surface!\n");
goto out;
- } else if (stream_status->plane_count == MAX_SURFACE_NUM) {
+ } else if (stream_status->plane_count == MAX_SURFACES) {
dm_error("Surface: can not attach plane_state %p! Maximum is: %d\n",
- plane_state, MAX_SURFACE_NUM);
+ plane_state, MAX_SURFACES);
goto out;
} else if (!otg_master_pipe) {
goto out;
@@ -600,7 +600,7 @@ bool dc_state_rem_all_planes_for_stream(
{
int i, old_plane_count;
struct dc_stream_status *stream_status = NULL;
- struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+ struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 };
for (i = 0; i < state->stream_count; i++)
if (state->streams[i] == stream) {
@@ -875,7 +875,7 @@ bool dc_state_rem_all_phantom_planes_for_stream(
{
int i, old_plane_count;
struct dc_stream_status *stream_status = NULL;
- struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+ struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 };
for (i = 0; i < state->stream_count; i++)
if (state->streams[i] == phantom_stream) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 55dc482d9b36..e8134c47fe0d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -37,6 +37,8 @@
#define DC_LOGGER dc->ctx->logger
#ifndef MIN
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
+#ifndef MAX
#define MAX(x, y) ((x > y) ? x : y)
#endif
@@ -605,17 +607,6 @@ bool dc_stream_remove_writeback(struct dc *dc,
return true;
}
-bool dc_stream_warmup_writeback(struct dc *dc,
- int num_dwb,
- struct dc_writeback_info *wb_info)
-{
- dc_exit_ips_for_hw_access(dc);
-
- if (dc->hwss.mmhubbub_warmup)
- return dc->hwss.mmhubbub_warmup(dc, num_dwb, wb_info);
- else
- return false;
-}
uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
{
uint8_t i;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index ccbb15f1638c..f3471d45b312 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -83,13 +83,6 @@ uint8_t dc_plane_get_pipe_mask(struct dc_state *dc_state, const struct dc_plane
/*******************************************************************************
* Public functions
******************************************************************************/
-void enable_surface_flip_reporting(struct dc_plane_state *plane_state,
- uint32_t controller_id)
-{
- plane_state->irq_source = controller_id + DC_IRQ_SOURCE_PFLIP1 - 1;
- /*register_flip_interrupt(surface);*/
-}
-
struct dc_plane_state *dc_create_plane_state(const struct dc *dc)
{
struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state),
@@ -277,4 +270,50 @@ void dc_3dlut_func_retain(struct dc_3dlut *lut)
kref_get(&lut->refcount);
}
+void dc_plane_force_update_for_panic(struct dc_plane_state *plane_state,
+ bool clear_tiling)
+{
+ struct dc *dc;
+ int i;
+
+ if (!plane_state)
+ return;
+
+ dc = plane_state->ctx->dc;
+ if (!dc || !dc->current_state)
+ return;
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (!pipe_ctx)
+ continue;
+
+ if (dc->ctx->dce_version >= DCE_VERSION_MAX) {
+ struct hubp *hubp = pipe_ctx->plane_res.hubp;
+ if (!hubp)
+ continue;
+ /* if framebuffer is tiled, disable tiling */
+ if (clear_tiling && hubp->funcs->hubp_clear_tiling)
+ hubp->funcs->hubp_clear_tiling(hubp);
+
+ /* force page flip to see the new content of the framebuffer */
+ hubp->funcs->hubp_program_surface_flip_and_addr(hubp,
+ &plane_state->address,
+ true);
+ } else {
+ struct mem_input *mi = pipe_ctx->plane_res.mi;
+ if (!mi)
+ continue;
+ /* if framebuffer is tiled, disable tiling */
+ if (clear_tiling && mi->funcs->mem_input_clear_tiling)
+ mi->funcs->mem_input_clear_tiling(mi);
+
+ /* force page flip to see the new content of the framebuffer */
+ mi->funcs->mem_input_program_surface_flip_and_addr(mi,
+ &plane_state->address,
+ true);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 104051935884..053481ab69ef 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -55,9 +55,9 @@ struct aux_payload;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.310"
+#define DC_VER "3.2.316"
-#define MAX_SURFACES 3
+#define MAX_SURFACES 4
#define MAX_PLANES 6
#define MAX_STREAMS 6
#define MIN_VIEWPORT_SIZE 12
@@ -290,6 +290,7 @@ struct dc_caps {
uint16_t subvp_vertical_int_margin_us;
bool seamless_odm;
uint32_t max_v_total;
+ bool vtotal_limited_by_fp2;
uint32_t max_disp_clock_khz_at_vmin;
uint8_t subvp_drr_vblank_start_margin_us;
bool cursor_not_scaled;
@@ -462,6 +463,7 @@ struct dc_config {
bool enable_auto_dpm_test_logs;
unsigned int disable_ips;
unsigned int disable_ips_in_vpb;
+ bool disable_ips_in_dpms_off;
bool usb4_bw_alloc_support;
bool allow_0_dtb_clk;
bool use_assr_psp_message;
@@ -470,6 +472,7 @@ struct dc_config {
bool disable_hbr_audio_dp2;
bool consolidated_dpia_dp_lt;
bool set_pipe_unlock_order;
+ bool enable_dpia_pre_training;
};
enum visual_confirm {
@@ -486,6 +489,7 @@ enum visual_confirm {
VISUAL_CONFIRM_MCLK_SWITCH = 16,
VISUAL_CONFIRM_FAMS2 = 19,
VISUAL_CONFIRM_HW_CURSOR = 20,
+ VISUAL_CONFIRM_VABC = 21,
};
enum dc_psr_power_opts {
@@ -627,6 +631,8 @@ struct dc_clocks {
int bw_dispclk_khz;
int idle_dramclk_khz;
int idle_fclk_khz;
+ int subvp_prefetch_dramclk_khz;
+ int subvp_prefetch_fclk_khz;
};
struct dc_bw_validation_profile {
@@ -771,7 +777,8 @@ union dpia_debug_options {
uint32_t enable_force_tbt3_work_around:1; /* bit 4 */
uint32_t disable_usb4_pm_support:1; /* bit 5 */
uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */
- uint32_t reserved:25;
+ uint32_t enable_dpia_pre_training:1; /* bit 7 */
+ uint32_t reserved:24;
} bits;
uint32_t raw;
};
@@ -1054,8 +1061,8 @@ struct dc_debug_options {
bool dml21_force_pstate_method;
uint32_t dml21_force_pstate_method_values[MAX_PIPES];
uint32_t dml21_disable_pstate_method_mask;
+ union fw_assisted_mclk_switch_version fams_version;
union dmub_fams2_global_feature_config fams2_config;
- bool enable_legacy_clock_update;
unsigned int force_cositing;
unsigned int disable_spl;
unsigned int force_easf;
@@ -1068,6 +1075,8 @@ struct dc_debug_options {
unsigned int scale_to_sharpness_policy;
bool skip_full_updated_if_possible;
unsigned int enable_oled_edp_power_up_opt;
+ bool enable_hblank_borrow;
+ bool force_subvp_df_throttle;
};
@@ -1298,7 +1307,7 @@ struct dc_plane_state {
struct rect clip_rect;
struct plane_size plane_size;
- union dc_tiling_info tiling_info;
+ struct dc_tiling_info tiling_info;
struct dc_plane_dcc_param dcc;
@@ -1369,7 +1378,7 @@ struct dc_plane_state {
struct dc_plane_info {
struct plane_size plane_size;
- union dc_tiling_info tiling_info;
+ struct dc_tiling_info tiling_info;
struct dc_plane_dcc_param dcc;
enum surface_pixel_format format;
enum dc_rotation_angle rotation;
@@ -1396,7 +1405,7 @@ struct dc_scratch_space {
* store current value in plane states so we can still recover
* a valid current state during dc update.
*/
- struct dc_plane_state plane_states[MAX_SURFACE_NUM];
+ struct dc_plane_state plane_states[MAX_SURFACES];
struct dc_stream_state stream_state;
};
@@ -1524,6 +1533,7 @@ struct dc_surface_update {
const struct dc_cm2_parameters *cm2_params;
const struct dc_csc_transform *cursor_csc_color_matrix;
unsigned int sdr_white_level_nits;
+ struct dc_bias_and_scale bias_and_scale;
};
/*
@@ -2017,6 +2027,24 @@ uint32_t dc_link_bandwidth_kbps(
const struct dc_link *link,
const struct dc_link_settings *link_setting);
+struct dp_audio_bandwidth_params {
+ const struct dc_crtc_timing *crtc_timing;
+ enum dp_link_encoding link_encoding;
+ uint32_t channel_count;
+ uint32_t sample_rate_hz;
+};
+
+/* The function calculates the minimum size of hblank (in bytes) needed to
+ * support the specified channel count and sample rate combination, given the
+ * link encoding and timing to be used. This calculation is not supported
+ * for 8b/10b SST.
+ *
+ * return - min hblank size in bytes, 0 if 8b/10b SST.
+ */
+uint32_t dc_link_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params);
+
/* The function takes a snapshot of current link resource allocation state
* @dc: pointer to dc of the dm calling this
* @map: a dc link resource snapshot defined internally to dc.
@@ -2376,6 +2404,13 @@ struct dc_sink_dsc_caps {
struct dsc_dec_dpcd_caps dsc_dec_caps;
};
+struct dc_sink_hblank_expansion_caps {
+ // 'true' if these are virtual DPCD's HBlank expansion caps (immediately upstream of sink in MST topology),
+ // 'false' if they are sink's HBlank expansion caps
+ bool is_virtual_dpcd_hblank_expansion;
+ struct hblank_expansion_dpcd_caps dpcd_caps;
+};
+
struct dc_sink_fec_caps {
bool is_rx_fec_supported;
bool is_topology_fec_supported;
@@ -2402,6 +2437,7 @@ struct dc_sink {
struct scdc_caps scdc_caps;
struct dc_sink_dsc_caps dsc_caps;
struct dc_sink_fec_caps fec_caps;
+ struct dc_sink_hblank_expansion_caps hblank_expansion_caps;
bool is_vsc_sdp_colorimetry_supported;
@@ -2550,6 +2586,8 @@ struct dc_power_profile {
struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state *context);
+unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context);
+
/* DSC Interfaces */
#include "dc_dsc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index f90fc154549a..44ff9abe2880 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -1245,7 +1245,7 @@ static int count_active_streams(const struct dc *dc)
for (i = 0; i < dc->current_state->stream_count; ++i) {
struct dc_stream_state *stream = dc->current_state->streams[i];
- if (stream && !stream->dpms_off)
+ if (stream && (!stream->dpms_off || dc->config.disable_ips_in_dpms_off))
count += 1;
}
@@ -1694,10 +1694,10 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc,
{
uint8_t num_cmds = 1;
uint32_t i;
- union dmub_rb_cmd cmd[MAX_STREAMS + 1];
+ union dmub_rb_cmd cmd[2 * MAX_STREAMS + 1];
struct dmub_rb_cmd_fams2 *global_cmd = &cmd[0].fams2_config;
- memset(cmd, 0, sizeof(union dmub_rb_cmd) * (MAX_STREAMS + 1));
+ memset(cmd, 0, sizeof(union dmub_rb_cmd) * (2 * MAX_STREAMS + 1));
/* fill in generic command header */
global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
@@ -1714,17 +1714,26 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc,
/* construct per-stream configs */
for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) {
- struct dmub_rb_cmd_fams2 *stream_cmd = &cmd[i+1].fams2_config;
+ struct dmub_rb_cmd_fams2 *stream_base_cmd = &cmd[i+1].fams2_config;
+ struct dmub_rb_cmd_fams2 *stream_sub_state_cmd = &cmd[i+1+context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config;
/* configure command header */
- stream_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
- stream_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
- stream_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
- stream_cmd->header.multi_cmd_pending = 1;
- /* copy stream static state */
- memcpy(&stream_cmd->config.stream,
- &context->bw_ctx.bw.dcn.fams2_stream_params[i],
- sizeof(struct dmub_fams2_stream_static_state));
+ stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
+ stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+ stream_base_cmd->header.multi_cmd_pending = 1;
+ stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH;
+ stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG;
+ stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header);
+ stream_sub_state_cmd->header.multi_cmd_pending = 1;
+ /* copy stream static base state */
+ memcpy(&stream_base_cmd->config,
+ &context->bw_ctx.bw.dcn.fams2_stream_base_params[i],
+ sizeof(union dmub_cmd_fams2_config));
+ /* copy stream static sub state */
+ memcpy(&stream_sub_state_cmd->config,
+ &context->bw_ctx.bw.dcn.fams2_stream_sub_params[i],
+ sizeof(union dmub_cmd_fams2_config));
}
}
@@ -1735,8 +1744,8 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc,
if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) {
/* set multi pending for global, and unset for last stream cmd */
global_cmd->header.multi_cmd_pending = 1;
- cmd[context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0;
- num_cmds += context->bw_ctx.bw.dcn.fams2_global_config.num_streams;
+ cmd[2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0;
+ num_cmds += 2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams;
}
dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index 8dd6eb044829..94ce8fe74481 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -969,6 +969,21 @@ union dp_sink_video_fallback_formats {
uint8_t raw;
};
+union dp_receive_port0_cap {
+ struct {
+ uint8_t RESERVED :1;
+ uint8_t LOCAL_EDID_PRESENT :1;
+ uint8_t ASSOCIATED_TO_PRECEDING_PORT:1;
+ uint8_t HBLANK_EXPANSION_CAPABLE :1;
+ uint8_t BUFFER_SIZE_UNIT :1;
+ uint8_t BUFFER_SIZE_PER_PORT :1;
+ uint8_t HBLANK_REDUCTION_CAPABLE :1;
+ uint8_t RESERVED2:1;
+ uint8_t BUFFER_SIZE:8;
+ } bits;
+ uint8_t raw[2];
+};
+
union dpcd_max_uncompressed_pixel_rate_cap {
struct {
uint16_t max_uncompressed_pixel_rate_cap :15;
@@ -1193,6 +1208,7 @@ struct dpcd_caps {
struct replay_info pr_info;
uint16_t edp_oled_emission_rate;
+ union dp_receive_port0_cap receive_port0_cap;
};
union dpcd_sink_ext_caps {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dsc.h b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
index 9014c2409817..9d18f1c08079 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dsc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dsc.h
@@ -94,6 +94,11 @@ uint32_t dc_dsc_stream_bandwidth_overhead_in_kbps(
const int num_slices_h,
const bool is_dp);
+void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc,
+ const struct dsc_dec_dpcd_caps *dsc_sink_caps);
+void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc,
+ const struct dc_crtc_timing *timing);
+
/* TODO - Hardware/specs limitation should be owned by dc dsc and returned to DM,
* and DM can choose to OVERRIDE the limitation on CASE BY CASE basis.
* Hardware/specs limitation should not be writable by DM.
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index c10567ec1c81..5ac55601a6da 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -341,89 +341,101 @@ enum swizzle_mode_addr3_values {
DC_ADDR3_SW_UNKNOWN = DC_ADDR3_SW_MAX
};
-union dc_tiling_info {
-
- struct {
- /* Specifies the number of memory banks for tiling
- * purposes.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 2,4,8,16
- */
- unsigned int num_banks;
- /* Specifies the number of tiles in the x direction
- * to be incorporated into the same bank.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 1,2,4,8
- */
- unsigned int bank_width;
- unsigned int bank_width_c;
- /* Specifies the number of tiles in the y direction to
- * be incorporated into the same bank.
- * Only applies to 2D and 3D tiling modes.
- * POSSIBLE VALUES: 1,2,4,8
- */
- unsigned int bank_height;
- unsigned int bank_height_c;
- /* Specifies the macro tile aspect ratio. Only applies
- * to 2D and 3D tiling modes.
- */
- unsigned int tile_aspect;
- unsigned int tile_aspect_c;
- /* Specifies the number of bytes that will be stored
- * contiguously for each tile.
- * If the tile data requires more storage than this
- * amount, it is split into multiple slices.
- * This field must not be larger than
- * GB_ADDR_CONFIG.DRAM_ROW_SIZE.
- * Only applies to 2D and 3D tiling modes.
- * For color render targets, TILE_SPLIT >= 256B.
- */
- enum tile_split_values tile_split;
- enum tile_split_values tile_split_c;
- /* Specifies the addressing within a tile.
- * 0x0 - DISPLAY_MICRO_TILING
- * 0x1 - THIN_MICRO_TILING
- * 0x2 - DEPTH_MICRO_TILING
- * 0x3 - ROTATED_MICRO_TILING
- */
- enum tile_mode_values tile_mode;
- enum tile_mode_values tile_mode_c;
- /* Specifies the number of pipes and how they are
- * interleaved in the surface.
- * Refer to memory addressing document for complete
- * details and constraints.
- */
- unsigned int pipe_config;
- /* Specifies the tiling mode of the surface.
- * THIN tiles use an 8x8x1 tile size.
- * THICK tiles use an 8x8x4 tile size.
- * 2D tiling modes rotate banks for successive Z slices
- * 3D tiling modes rotate pipes and banks for Z slices
- * Refer to memory addressing document for complete
- * details and constraints.
- */
- enum array_mode_values array_mode;
- } gfx8;
+enum dc_gfxversion {
+ DcGfxVersion7 = 0,
+ DcGfxVersion8,
+ DcGfxVersion9,
+ DcGfxVersion10,
+ DcGfxVersion11,
+ DcGfxAddr3,
+ DcGfxVersionUnknown
+};
+
+ struct dc_tiling_info {
+ unsigned int gfxversion; // Specifies which part of the union to use. Must use DalGfxVersion enum
+ union {
+ struct {
+ /* Specifies the number of memory banks for tiling
+ * purposes.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 2,4,8,16
+ */
+ unsigned int num_banks;
+ /* Specifies the number of tiles in the x direction
+ * to be incorporated into the same bank.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 1,2,4,8
+ */
+ unsigned int bank_width;
+ unsigned int bank_width_c;
+ /* Specifies the number of tiles in the y direction to
+ * be incorporated into the same bank.
+ * Only applies to 2D and 3D tiling modes.
+ * POSSIBLE VALUES: 1,2,4,8
+ */
+ unsigned int bank_height;
+ unsigned int bank_height_c;
+ /* Specifies the macro tile aspect ratio. Only applies
+ * to 2D and 3D tiling modes.
+ */
+ unsigned int tile_aspect;
+ unsigned int tile_aspect_c;
+ /* Specifies the number of bytes that will be stored
+ * contiguously for each tile.
+ * If the tile data requires more storage than this
+ * amount, it is split into multiple slices.
+ * This field must not be larger than
+ * GB_ADDR_CONFIG.DRAM_ROW_SIZE.
+ * Only applies to 2D and 3D tiling modes.
+ * For color render targets, TILE_SPLIT >= 256B.
+ */
+ enum tile_split_values tile_split;
+ enum tile_split_values tile_split_c;
+ /* Specifies the addressing within a tile.
+ * 0x0 - DISPLAY_MICRO_TILING
+ * 0x1 - THIN_MICRO_TILING
+ * 0x2 - DEPTH_MICRO_TILING
+ * 0x3 - ROTATED_MICRO_TILING
+ */
+ enum tile_mode_values tile_mode;
+ enum tile_mode_values tile_mode_c;
+ /* Specifies the number of pipes and how they are
+ * interleaved in the surface.
+ * Refer to memory addressing document for complete
+ * details and constraints.
+ */
+ unsigned int pipe_config;
+ /* Specifies the tiling mode of the surface.
+ * THIN tiles use an 8x8x1 tile size.
+ * THICK tiles use an 8x8x4 tile size.
+ * 2D tiling modes rotate banks for successive Z slices
+ * 3D tiling modes rotate pipes and banks for Z slices
+ * Refer to memory addressing document for complete
+ * details and constraints.
+ */
+ enum array_mode_values array_mode;
+ } gfx8;
- struct {
- enum swizzle_mode_values swizzle;
- unsigned int num_pipes;
- unsigned int max_compressed_frags;
- unsigned int pipe_interleave;
-
- unsigned int num_banks;
- unsigned int num_shader_engines;
- unsigned int num_rb_per_se;
- bool shaderEnable;
-
- bool meta_linear;
- bool rb_aligned;
- bool pipe_aligned;
- unsigned int num_pkrs;
- } gfx9;/*gfx9, gfx10 and above*/
- struct {
- enum swizzle_mode_addr3_values swizzle;
- } gfx_addr3;/*gfx with addr3 and above*/
+ struct {
+ enum swizzle_mode_values swizzle;
+ unsigned int num_pipes;
+ unsigned int max_compressed_frags;
+ unsigned int pipe_interleave;
+
+ unsigned int num_banks;
+ unsigned int num_shader_engines;
+ unsigned int num_rb_per_se;
+ bool shaderEnable;
+
+ bool meta_linear;
+ bool rb_aligned;
+ bool pipe_aligned;
+ unsigned int num_pkrs;
+ } gfx9;/*gfx9, gfx10 and above*/
+ struct {
+ enum swizzle_mode_addr3_values swizzle;
+ } gfx_addr3;/*gfx with addr3 and above*/
+ };
};
/* Rotation angle */
@@ -975,6 +987,9 @@ struct dc_crtc_timing {
struct dc_crtc_timing_flags flags;
uint32_t dsc_fixed_bits_per_pixel_x16; /* DSC target bitrate in 1/16 of bpp (e.g. 128 -> 8bpp) */
struct dc_dsc_config dsc_cfg;
+
+ /* The number of pixels that HBlank has been expanded by from the original EDID timing. */
+ uint32_t expanded_hblank;
};
enum trigger_delay {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h
index bd37ec82b42d..fabcefeda288 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_plane.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h
@@ -34,4 +34,7 @@ const struct dc_plane_status *dc_plane_get_status(
void dc_plane_state_retain(struct dc_plane_state *plane_state);
void dc_plane_state_release(struct dc_plane_state *plane_state);
+void dc_plane_force_update_for_panic(struct dc_plane_state *plane_state,
+ bool clear_tiling);
+
#endif /* _DC_PLANE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
index c8d8e335fa37..3518eb1b8cd1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c
@@ -64,6 +64,13 @@ static void populate_inits_from_splinits(struct scl_inits *inits,
inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19);
inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19);
}
+static void populate_splformat_from_format(enum spl_pixel_format *spl_pixel_format, const enum pixel_format pixel_format)
+{
+ if (pixel_format < PIXEL_FORMAT_INVALID)
+ *spl_pixel_format = (enum spl_pixel_format)pixel_format;
+ else
+ *spl_pixel_format = SPL_PIXEL_FORMAT_INVALID;
+}
/// @brief Translate SPL input parameters from pipe context
/// @param pipe_ctx
/// @param spl_in
@@ -89,7 +96,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl
spl_in->callbacks = dcn2_spl_callbacks;
}
// Make format field from spl_in point to plane_res scl_data format
- spl_in->basic_in.format = (enum spl_pixel_format)pipe_ctx->plane_res.scl_data.format;
+ populate_splformat_from_format(&spl_in->basic_in.format, pipe_ctx->plane_res.scl_data.format);
// Make view_format from basic_out point to view_format from stream
spl_in->basic_out.view_format = (enum spl_view_3d)stream->view_format;
// Populate spl input basic input clip rect from plane state clip rect
@@ -108,19 +115,21 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl
spl_in->basic_in.horizontal_mirror = plane_state->horizontal_mirror;
// Calculate horizontal splits and split index
- spl_in->basic_in.mpc_combine_h = resource_get_mpc_slice_count(pipe_ctx);
+ spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned = false;
+ spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices =
+ resource_get_mpc_slice_count(pipe_ctx);
if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE)
- spl_in->basic_in.mpc_combine_v = 0;
+ spl_in->basic_in.mpc_h_slice_index = 0;
else
- spl_in->basic_in.mpc_combine_v = resource_get_mpc_slice_index(pipe_ctx);
+ spl_in->basic_in.mpc_h_slice_index = resource_get_mpc_slice_index(pipe_ctx);
populate_splrect_from_rect(&spl_in->basic_out.odm_slice_rect, &odm_slice_src);
spl_in->basic_out.odm_combine_factor = 0;
spl_in->odm_slice_index = resource_get_odm_slice_index(pipe_ctx);
// Make spl input basic out info output_size width point to stream h active
spl_in->basic_out.output_size.width =
- stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
+ stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right + pipe_ctx->hblank_borrow;
// Make spl input basic out info output_size height point to v active
spl_in->basic_out.output_size.height =
stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 413970588a26..3e303c7808fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -56,7 +56,7 @@ struct dc_stream_status {
int plane_count;
int audio_inst;
struct timing_sync_info timing_sync_info;
- struct dc_plane_state *plane_states[MAX_SURFACE_NUM];
+ struct dc_plane_state *plane_states[MAX_SURFACES];
bool is_abm_supported;
struct mall_stream_config mall_stream_config;
bool fpo_in_use;
@@ -447,10 +447,6 @@ enum dc_status dc_stream_add_dsc_to_resource(struct dc *dc,
struct dc_state *state,
struct dc_stream_state *stream);
-bool dc_stream_warmup_writeback(struct dc *dc,
- int num_dwb,
- struct dc_writeback_info *wb_info);
-
bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream);
bool dc_stream_set_dynamic_metadata(struct dc *dc,
@@ -541,17 +537,26 @@ bool dc_stream_get_crtc_position(struct dc *dc,
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
bool dc_stream_forward_crc_window(struct dc_stream_state *stream,
struct rect *rect,
+ uint8_t phy_id,
bool is_stop);
+
+bool dc_stream_forward_multiple_crc_window(struct dc_stream_state *stream,
+ struct crc_window *window,
+ uint8_t phy_id,
+ bool stop);
#endif
bool dc_stream_configure_crc(struct dc *dc,
struct dc_stream_state *stream,
struct crc_params *crc_window,
bool enable,
- bool continuous);
+ bool continuous,
+ uint8_t idx,
+ bool reset);
bool dc_stream_get_crc(struct dc *dc,
struct dc_stream_state *stream,
+ uint8_t idx,
uint32_t *r_cr,
uint32_t *g_y,
uint32_t *b_cb);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index edf4df1d03b5..0c2aa91f0a11 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -76,7 +76,6 @@ struct dc_perf_trace {
unsigned long last_entry_write;
};
-#define MAX_SURFACE_NUM 6
#define NUM_PIXEL_FORMATS 10
enum tiling_mode {
@@ -875,6 +874,14 @@ struct dsc_dec_dpcd_caps {
bool is_dp; /* Decoded format */
};
+struct hblank_expansion_dpcd_caps {
+ bool expansion_supported;
+ bool reduction_supported;
+ bool buffer_unit_bytes; /* True: buffer size in bytes. False: buffer size in pixels*/
+ bool buffer_per_port; /* True: buffer size per port. False: buffer size per lane*/
+ uint32_t buffer_size; /* Add 1 to value and multiply by 32 */
+};
+
struct dc_golden_table {
uint16_t dc_golden_table_ver;
uint32_t aux_dphy_rx_control0_val;
@@ -932,10 +939,17 @@ enum backlight_control_type {
};
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+#define MAX_CRC_WINDOW_NUM 2
+
struct otg_phy_mux {
uint8_t phy_output_num;
uint8_t otg_output_num;
};
+
+struct crc_window {
+ struct rect rect;
+ bool enable;
+};
#endif
enum dc_detect_reason {
@@ -1052,10 +1066,13 @@ enum replay_FW_Message_type {
union replay_error_status {
struct {
- unsigned char STATE_TRANSITION_ERROR :1;
- unsigned char LINK_CRC_ERROR :1;
- unsigned char DESYNC_ERROR :1;
- unsigned char RESERVED :5;
+ unsigned int STATE_TRANSITION_ERROR :1;
+ unsigned int LINK_CRC_ERROR :1;
+ unsigned int DESYNC_ERROR :1;
+ unsigned int RESERVED_3 :1;
+ unsigned int LOW_RR_INCORRECT_VTOTAL :1;
+ unsigned int NO_DOUBLED_RR :1;
+ unsigned int RESERVED_6_7 :2;
} bits;
unsigned char raw;
};
@@ -1102,6 +1119,8 @@ struct replay_config {
union replay_error_status replay_error_status;
/* Replay Low Hz enable Options */
union replay_low_refresh_rate_enable_options low_rr_enable_options;
+ /* Replay coasting vtotal is within low refresh rate range. */
+ bool low_rr_activated;
};
/* Replay feature flags*/
@@ -1126,10 +1145,12 @@ struct replay_settings {
uint32_t defer_update_coasting_vtotal_table[PR_COASTING_TYPE_NUM];
/* Maximum link off frame count */
uint32_t link_off_frame_count;
- /* Replay pseudo vtotal for abm + ips on full screen video which can improve ips residency */
- uint16_t abm_with_ips_on_full_screen_video_pseudo_vtotal;
+ /* Replay pseudo vtotal for low refresh rate*/
+ uint16_t low_rr_full_screen_video_pseudo_vtotal;
/* Replay last pseudo vtotal set to DMUB */
uint16_t last_pseudo_vtotal;
+ /* Replay desync error */
+ uint32_t replay_desync_error_fail_count;
};
/* To split out "global" and "per-panel" config settings.
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index b700608e4240..077337698e0a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -1105,6 +1105,9 @@ static bool dcn401_program_pix_clk(
&dto_params);
} else {
+ if (pll_settings->actual_pix_clk_100hz > 6000000UL)
+ return false;
+
/* disables DP DTO when provided with TMDS signal type */
clock_source->ctx->dc->res_pool->dccg->funcs->set_dp_dto(
clock_source->ctx->dc->res_pool->dccg,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
index f5e1d9caee4c..1c2009e38aa1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
@@ -98,7 +98,7 @@ static enum mi_bits_per_pixel get_mi_bpp(
}
static enum mi_tiling_format get_mi_tiling(
- union dc_tiling_info *tiling_info)
+ struct dc_tiling_info *tiling_info)
{
switch (tiling_info->gfx8.array_mode) {
case DC_ARRAY_1D_TILED_THIN1:
@@ -133,7 +133,7 @@ static bool is_vert_scan(enum dc_rotation_angle rotation)
static void dce_mi_program_pte_vm(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation)
{
struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi);
@@ -430,7 +430,7 @@ static void dce120_mi_program_display_marks(struct mem_input *mi,
}
static void program_tiling(
- struct dce_mem_input *dce_mi, const union dc_tiling_info *info)
+ struct dce_mem_input *dce_mi, const struct dc_tiling_info *info)
{
if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */
REG_UPDATE_6(GRPH_CONTROL,
@@ -481,7 +481,6 @@ static void program_tiling(
}
}
-
static void program_size_and_rotation(
struct dce_mem_input *dce_mi,
enum dc_rotation_angle rotation,
@@ -627,10 +626,31 @@ static void program_grph_pixel_format(
GRPH_PRESCALE_B_SIGN, sign);
}
+static void dce_mi_clear_tiling(
+ struct mem_input *mi)
+{
+ struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi);
+
+ if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_SW_MODE, DC_SW_LINEAR);
+ }
+
+ if (dce_mi->masks->GRPH_MICRO_TILE_MODE) { /* GFX8 */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_ARRAY_MODE, DC_SW_LINEAR);
+ }
+
+ if (dce_mi->masks->GRPH_ARRAY_MODE) { /* GFX6 but reuses gfx8 struct */
+ REG_UPDATE(GRPH_CONTROL,
+ GRPH_ARRAY_MODE, DC_SW_LINEAR);
+ }
+}
+
static void dce_mi_program_surface_config(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -650,7 +670,7 @@ static void dce_mi_program_surface_config(
static void dce60_mi_program_surface_config(
struct mem_input *mi,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation, /* not used in DCE6 */
struct dc_plane_dcc_param *dcc,
@@ -884,7 +904,8 @@ static const struct mem_input_funcs dce_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
#if defined(CONFIG_DRM_AMD_DC_SI)
@@ -897,7 +918,8 @@ static const struct mem_input_funcs dce60_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce60_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
#endif
@@ -910,7 +932,8 @@ static const struct mem_input_funcs dce112_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
static const struct mem_input_funcs dce120_mi_funcs = {
@@ -922,7 +945,8 @@ static const struct mem_input_funcs dce120_mi_funcs = {
.mem_input_program_pte_vm = dce_mi_program_pte_vm,
.mem_input_program_surface_config =
dce_mi_program_surface_config,
- .mem_input_is_flip_pending = dce_mi_is_flip_pending
+ .mem_input_is_flip_pending = dce_mi_is_flip_pending,
+ .mem_input_clear_tiling = dce_mi_clear_tiling,
};
void dce_mem_input_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
index bf636b28e3e1..5bb8b78bf250 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c
@@ -63,7 +63,8 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv,
bool should_use_dmub_lock(struct dc_link *link)
{
- if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
+ if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 ||
+ link->psr_settings.psr_version == DC_PSR_VERSION_1)
return true;
if (link->replay_settings.replay_feature_enabled)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index cae18f8c1c9a..88c75c243bf8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -390,8 +390,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1,
sizeof(DP_SINK_DEVICE_STR_ID_1)))
link->psr_settings.force_ffu_mode = 1;
- else
- link->psr_settings.force_ffu_mode = 0;
+
copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode;
if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
index 8a3fbf95c48f..2c43c2422638 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
@@ -162,7 +162,7 @@ static void enable(struct dce_mem_input *mem_input110)
static void program_tiling(
struct dce_mem_input *mem_input110,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
uint32_t value = 0;
@@ -523,7 +523,7 @@ static const unsigned int dvmm_Hw_Setting_Linear[4][9] = {
/* Helper to get table entry from surface info */
static const unsigned int *get_dvmm_hw_setting(
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum surface_pixel_format format,
bool chroma)
{
@@ -563,7 +563,7 @@ static const unsigned int *get_dvmm_hw_setting(
static void dce_mem_input_v_program_pte_vm(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation)
{
struct dce_mem_input *mem_input110 = TO_DCE_MEM_INPUT(mem_input);
@@ -636,7 +636,7 @@ static void dce_mem_input_v_program_pte_vm(
static void dce_mem_input_v_program_surface_config(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index fa422a8cbced..61b0807693fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -2127,70 +2127,131 @@ bool dce110_configure_crc(struct timing_generator *tg,
cntl_addr = CRTC_REG(mmCRTC_CRC_CNTL);
- /* First, disable CRC before we configure it. */
- dm_write_reg(tg->ctx, cntl_addr, 0);
+ if (!params->enable || params->reset)
+ /* First, disable CRC before we configure it. */
+ dm_write_reg(tg->ctx, cntl_addr, 0);
if (!params->enable)
return true;
/* Program frame boundaries */
- /* Window A x axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL);
- set_reg_field_value(value, params->windowa_x_start,
- CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_START);
- set_reg_field_value(value, params->windowa_x_end,
- CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window A y axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL);
- set_reg_field_value(value, params->windowa_y_start,
- CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_START);
- set_reg_field_value(value, params->windowa_y_end,
- CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window B x axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL);
- set_reg_field_value(value, params->windowb_x_start,
- CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_START);
- set_reg_field_value(value, params->windowb_x_end,
- CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Window B y axis start and end. */
- value = 0;
- addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL);
- set_reg_field_value(value, params->windowb_y_start,
- CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_START);
- set_reg_field_value(value, params->windowb_y_end,
- CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_END);
- dm_write_reg(tg->ctx, addr, value);
-
- /* Set crc mode and selection, and enable. Only using CRC0*/
- value = 0;
- set_reg_field_value(value, params->continuous_mode ? 1 : 0,
- CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
- set_reg_field_value(value, params->selection,
- CRTC_CRC_CNTL, CRTC_CRC0_SELECT);
- set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
- dm_write_reg(tg->ctx, cntl_addr, value);
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_X_CONTROL);
+ set_reg_field_value(value, params->windowa_x_start,
+ CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_START);
+ set_reg_field_value(value, params->windowa_x_end,
+ CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window A y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWA_Y_CONTROL);
+ set_reg_field_value(value, params->windowa_y_start,
+ CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_START);
+ set_reg_field_value(value, params->windowa_y_end,
+ CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_X_CONTROL);
+ set_reg_field_value(value, params->windowb_x_start,
+ CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_START);
+ set_reg_field_value(value, params->windowb_x_end,
+ CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC0_WINDOWB_Y_CONTROL);
+ set_reg_field_value(value, params->windowb_y_start,
+ CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_START);
+ set_reg_field_value(value, params->windowb_y_end,
+ CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Set crc mode and selection, and enable.*/
+ value = 0;
+ set_reg_field_value(value, params->continuous_mode ? 1 : 0,
+ CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
+ set_reg_field_value(value, params->selection,
+ CRTC_CRC_CNTL, CRTC_CRC0_SELECT);
+ set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
+ dm_write_reg(tg->ctx, cntl_addr, value);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_X_CONTROL);
+ set_reg_field_value(value, params->windowa_x_start,
+ CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_START);
+ set_reg_field_value(value, params->windowa_x_end,
+ CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window A y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWA_Y_CONTROL);
+ set_reg_field_value(value, params->windowa_y_start,
+ CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_START);
+ set_reg_field_value(value, params->windowa_y_end,
+ CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B x axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_X_CONTROL);
+ set_reg_field_value(value, params->windowb_x_start,
+ CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_START);
+ set_reg_field_value(value, params->windowb_x_end,
+ CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Window B y axis start and end. */
+ value = 0;
+ addr = CRTC_REG(mmCRTC_CRC1_WINDOWB_Y_CONTROL);
+ set_reg_field_value(value, params->windowb_y_start,
+ CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_START);
+ set_reg_field_value(value, params->windowb_y_end,
+ CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_END);
+ dm_write_reg(tg->ctx, addr, value);
+
+ /* Set crc mode and selection, and enable.*/
+ value = 0;
+ set_reg_field_value(value, params->continuous_mode ? 1 : 0,
+ CRTC_CRC_CNTL, CRTC_CRC_CONT_EN);
+ set_reg_field_value(value, params->selection,
+ CRTC_CRC_CNTL, CRTC_CRC1_SELECT);
+ set_reg_field_value(value, 1, CRTC_CRC_CNTL, CRTC_CRC_EN);
+ dm_write_reg(tg->ctx, cntl_addr, value);
+ break;
+ default:
+ return false;
+ }
return true;
}
-bool dce110_get_crc(struct timing_generator *tg,
+bool dce110_get_crc(struct timing_generator *tg, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
uint32_t addr = 0;
@@ -2206,14 +2267,30 @@ bool dce110_get_crc(struct timing_generator *tg,
if (!field)
return false;
- addr = CRTC_REG(mmCRTC_CRC0_DATA_RG);
- value = dm_read_reg(tg->ctx, addr);
- *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR);
- *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y);
+ switch (idx) {
+ case 0:
+ addr = CRTC_REG(mmCRTC_CRC0_DATA_RG);
+ value = dm_read_reg(tg->ctx, addr);
+ *r_cr = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_R_CR);
+ *g_y = get_reg_field_value(value, CRTC_CRC0_DATA_RG, CRC0_G_Y);
- addr = CRTC_REG(mmCRTC_CRC0_DATA_B);
- value = dm_read_reg(tg->ctx, addr);
- *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB);
+ addr = CRTC_REG(mmCRTC_CRC0_DATA_B);
+ value = dm_read_reg(tg->ctx, addr);
+ *b_cb = get_reg_field_value(value, CRTC_CRC0_DATA_B, CRC0_B_CB);
+ break;
+ case 1:
+ addr = CRTC_REG(mmCRTC_CRC1_DATA_RG);
+ value = dm_read_reg(tg->ctx, addr);
+ *r_cr = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_R_CR);
+ *g_y = get_reg_field_value(value, CRTC_CRC1_DATA_RG, CRC1_G_Y);
+
+ addr = CRTC_REG(mmCRTC_CRC1_DATA_B);
+ value = dm_read_reg(tg->ctx, addr);
+ *b_cb = get_reg_field_value(value, CRTC_CRC1_DATA_B, CRC1_B_CB);
+ break;
+ default:
+ return false;
+ }
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
index ee4de740aceb..e4f5cad64f32 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
@@ -286,7 +286,7 @@ bool dce110_arm_vert_intr(
bool dce110_configure_crc(struct timing_generator *tg,
const struct crc_params *params);
-bool dce110_get_crc(struct timing_generator *tg,
+bool dce110_get_crc(struct timing_generator *tg, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
bool dce110_is_two_pixels_per_container(const struct dc_crtc_timing *timing);
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index fcf59348eb62..31c4f44ceaac 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -1100,45 +1100,79 @@ static bool dce120_configure_crc(struct timing_generator *tg,
if (!dce120_is_tg_enabled(tg))
return false;
- /* First, disable CRC before we configure it. */
- dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
- tg110->offsets.crtc, 0);
+ if (!params->enable || params->reset)
+ /* First, disable CRC before we configure it. */
+ dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
+ tg110->offsets.crtc, 0);
if (!params->enable)
return true;
/* Program frame boundaries */
- /* Window A x axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
- CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
- CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
-
- /* Window A y axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
- CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
- CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
-
- /* Window B x axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
- CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
- CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
-
- /* Window B y axis start and end. */
- CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
- CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
- CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
-
- /* Set crc mode and selection, and enable. Only using CRC0*/
- CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
- CRTC_CRC_EN, params->continuous_mode ? 1 : 0,
- CRTC_CRC0_SELECT, params->selection,
- CRTC_CRC_EN, 1);
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
+ CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+ CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
+ CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+ CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable.*/
+ CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
+ CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ CRTC_CRC0_SELECT, params->selection,
+ CRTC_CRC_EN, 1);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_X_CONTROL,
+ CRTC_CRC1_WINDOWA_X_START, params->windowa_x_start,
+ CRTC_CRC1_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWA_Y_CONTROL,
+ CRTC_CRC1_WINDOWA_Y_START, params->windowa_y_start,
+ CRTC_CRC1_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_X_CONTROL,
+ CRTC_CRC1_WINDOWB_X_START, params->windowb_x_start,
+ CRTC_CRC1_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC1_WINDOWB_Y_CONTROL,
+ CRTC_CRC1_WINDOWB_Y_START, params->windowb_y_start,
+ CRTC_CRC1_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable */
+ CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
+ CRTC_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ CRTC_CRC1_SELECT, params->selection,
+ CRTC_CRC_EN, 1);
+ break;
+ default:
+ return false;
+ }
return true;
}
-static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
- uint32_t *g_y, uint32_t *b_cb)
+static bool dce120_get_crc(struct timing_generator *tg, uint8_t idx,
+ uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
uint32_t value, field;
@@ -1151,14 +1185,30 @@ static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
if (!field)
return false;
- value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
- tg110->offsets.crtc);
- *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
- *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
+ switch (idx) {
+ case 0:
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
+ tg110->offsets.crtc);
+ *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
+ *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
- value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
- tg110->offsets.crtc);
- *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
+ tg110->offsets.crtc);
+ *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
+ break;
+ case 1:
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_RG,
+ tg110->offsets.crtc);
+ *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_R_CR);
+ *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_RG, CRC1_G_Y);
+
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC1_DATA_B,
+ tg110->offsets.crtc);
+ *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC1_DATA_B, CRC1_B_CB);
+ break;
+ default:
+ return false;
+ }
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
index 573898984726..f9961a6446f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
@@ -168,31 +168,33 @@ void dcn31_panel_cntl_construct(
struct dcn31_panel_cntl *dcn31_panel_cntl,
const struct panel_cntl_init_data *init_data)
{
- uint8_t pwrseq_inst = 0xF;
dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs;
dcn31_panel_cntl->base.ctx = init_data->ctx;
dcn31_panel_cntl->base.inst = init_data->inst;
- switch (init_data->eng_id) {
- case ENGINE_ID_DIGA:
- pwrseq_inst = 0;
- break;
- case ENGINE_ID_DIGB:
- pwrseq_inst = 1;
- break;
- default:
- DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id);
- ASSERT(false);
- break;
- }
-
- if (dcn31_panel_cntl->base.ctx->dc->config.support_edp0_on_dp1)
+ if (dcn31_panel_cntl->base.ctx->dc->config.support_edp0_on_dp1) {
//If supported, power sequencer mapping shall follow the DIG instance
+ uint8_t pwrseq_inst = 0xF;
+
+ switch (init_data->eng_id) {
+ case ENGINE_ID_DIGA:
+ pwrseq_inst = 0;
+ break;
+ case ENGINE_ID_DIGB:
+ pwrseq_inst = 1;
+ break;
+ default:
+ DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id);
+ ASSERT(false);
+ break;
+ }
+
dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst;
- else
+ } else {
/* If not supported, pwrseq will be assigned in order,
* so first pwrseq will be assigned to first panel instance (legacy behavior)
*/
dcn31_panel_cntl->base.pwrseq_inst = dcn31_panel_cntl->base.inst;
+ }
}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
index b2cea59ba5d4..9a92f73d5b7f 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn31/dcn31_dio_link_encoder.c
@@ -653,8 +653,9 @@ void dcn31_link_encoder_get_max_link_cap(struct link_encoder *enc, struct dc_lin
if (!query_dp_alt_from_dmub(enc, &cmd))
return;
- if (cmd.query_dp_alt.data.is_usb &&
- cmd.query_dp_alt.data.is_dp4 == 0)
+ if (cmd.query_dp_alt.data.is_dp_alt_disable == 0 &&
+ cmd.query_dp_alt.data.is_usb &&
+ cmd.query_dp_alt.data.is_dp4 == 0)
link_settings->lane_count = MIN(LANE_COUNT_TWO, link_settings->lane_count);
return;
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c
index d4a3e811aa39..ea0c9a9d0bd6 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.c
@@ -28,6 +28,7 @@
#include "link_encoder.h"
#include "dcn31/dcn31_dio_link_encoder.h"
#include "dcn35_dio_link_encoder.h"
+#include "dc_dmub_srv.h"
#define CTX \
enc10->base.ctx
#define DC_LOGGER \
@@ -159,6 +160,8 @@ static const struct link_encoder_funcs dcn35_link_enc_funcs = {
.is_in_alt_mode = dcn31_link_encoder_is_in_alt_mode,
.get_max_link_cap = dcn31_link_encoder_get_max_link_cap,
.set_dio_phy_mux = dcn31_link_encoder_set_dio_phy_mux,
+ .enable_dpia_output = dcn35_link_encoder_enable_dpia_output,
+ .disable_dpia_output = dcn35_link_encoder_disable_dpia_output,
};
void dcn35_link_encoder_construct(
@@ -265,3 +268,80 @@ void dcn35_link_encoder_construct(
enc10->base.features.flags.bits.HDMI_6GB_EN = 0;
}
+
+/* DPIA equivalent of link_transmitter_control. */
+static bool link_dpia_control(struct dc_context *dc_ctx,
+ struct dmub_cmd_dig_dpia_control_data *dpia_control)
+{
+ union dmub_rb_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.dig1_dpia_control.header.type = DMUB_CMD__DPIA;
+ cmd.dig1_dpia_control.header.sub_type =
+ DMUB_CMD__DPIA_DIG1_DPIA_CONTROL;
+ cmd.dig1_dpia_control.header.payload_bytes =
+ sizeof(cmd.dig1_dpia_control) -
+ sizeof(cmd.dig1_dpia_control.header);
+
+ cmd.dig1_dpia_control.dpia_control = *dpia_control;
+
+ dc_wake_and_execute_dmub_cmd(dc_ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT);
+
+ return true;
+}
+
+static void link_encoder_disable(struct dcn10_link_encoder *enc10)
+{
+ /* reset training complete */
+ REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, 0);
+}
+
+void dcn35_link_encoder_enable_dpia_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ uint8_t dpia_id,
+ uint8_t digmode,
+ uint8_t fec_rdy)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 };
+
+ enc1_configure_encoder(enc10, link_settings);
+
+ dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_ENABLE;
+ dpia_control.enc_id = enc->preferred_engine;
+ dpia_control.mode_laneset.digmode = digmode;
+ dpia_control.lanenum = (uint8_t)link_settings->lane_count;
+ dpia_control.symclk_10khz = link_settings->link_rate *
+ LINK_RATE_REF_FREQ_IN_KHZ / 10;
+ /* DIG_BE_CNTL.DIG_HPD_SELECT set to 5 (hpdsel - 1) to indicate HPD pin unused by DPIA. */
+ dpia_control.hpdsel = 6;
+ dpia_control.dpia_id = dpia_id;
+ dpia_control.fec_rdy = fec_rdy;
+
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
+ link_dpia_control(enc->ctx, &dpia_control);
+}
+
+void dcn35_link_encoder_disable_dpia_output(
+ struct link_encoder *enc,
+ uint8_t dpia_id,
+ uint8_t digmode)
+{
+ struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+ struct dmub_cmd_dig_dpia_control_data dpia_control = { 0 };
+
+ if (enc->funcs->is_dig_enabled && !enc->funcs->is_dig_enabled(enc))
+ return;
+
+ dpia_control.action = (uint8_t)TRANSMITTER_CONTROL_DISABLE;
+ dpia_control.enc_id = enc->preferred_engine;
+ dpia_control.mode_laneset.digmode = digmode;
+ dpia_control.dpia_id = dpia_id;
+
+ DC_LOG_DEBUG("%s: DPIA(%d) - enc_id(%d)\n", __func__, dpia_control.dpia_id, dpia_control.enc_id);
+ link_dpia_control(enc->ctx, &dpia_control);
+
+ link_encoder_disable(enc10);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h
index d546a3676304..f9d4221f4b43 100644
--- a/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dio/dcn35/dcn35_dio_link_encoder.h
@@ -144,4 +144,22 @@ bool dcn35_is_dig_enabled(struct link_encoder *enc);
enum signal_type dcn35_get_dig_mode(struct link_encoder *enc);
void dcn35_link_encoder_setup(struct link_encoder *enc, enum signal_type signal);
+/*
+ * Enable DP transmitter and its encoder for dpia port.
+ */
+void dcn35_link_encoder_enable_dpia_output(
+ struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ uint8_t dpia_id,
+ uint8_t digmode,
+ uint8_t fec_rdy);
+
+/*
+ * Disable transmitter and its encoder for dpia port.
+ */
+void dcn35_link_encoder_disable_dpia_output(
+ struct link_encoder *enc,
+ uint8_t dpia_id,
+ uint8_t digmode);
+
#endif /* __DC_LINK_ENCODER__DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index 2e4a46f1b499..5efddd48d5c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -158,6 +158,11 @@ bool dm_helpers_dp_write_dsc_enable(
const struct dc_stream_state *stream,
bool enable
);
+
+bool dm_helpers_dp_write_hblank_reduction(
+ struct dc_context *ctx,
+ const struct dc_stream_state *stream);
+
bool dm_helpers_is_dp_sink_present(
struct dc_link *link);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
index 39525721c976..f1235bf9a596 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c
@@ -1312,138 +1312,6 @@ bool dcn_validate_bandwidth(
return false;
}
-static unsigned int dcn_find_normalized_clock_vdd_Level(
- const struct dc *dc,
- enum dm_pp_clock_type clocks_type,
- int clocks_in_khz)
-{
- int vdd_level = dcn_bw_v_min0p65;
-
- if (clocks_in_khz == 0)/*todo some clock not in the considerations*/
- return vdd_level;
-
- switch (clocks_type) {
- case DM_PP_CLOCK_TYPE_DISPLAY_CLK:
- if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmax0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vnom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmin0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
- case DM_PP_CLOCK_TYPE_DISPLAYPHYCLK:
- if (clocks_in_khz > dc->dcn_soc->phyclkv_max0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_nom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_mid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->phyclkv_min0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- case DM_PP_CLOCK_TYPE_DPPCLK:
- if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmax0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vnom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmin0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- case DM_PP_CLOCK_TYPE_MEMORY_CLK:
- {
- unsigned factor = (ddr4_dram_factor_single_Channel * dc->dcn_soc->number_of_channels);
-
- if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9*1000000/factor) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8*1000000/factor) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72*1000000/factor) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65*1000000/factor) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- }
- break;
-
- case DM_PP_CLOCK_TYPE_DCFCLK:
- if (clocks_in_khz > dc->dcn_soc->dcfclkv_max0p9*1000) {
- vdd_level = dcn_bw_v_max0p91;
- BREAK_TO_DEBUGGER();
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_nom0p8*1000) {
- vdd_level = dcn_bw_v_max0p9;
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_mid0p72*1000) {
- vdd_level = dcn_bw_v_nom0p8;
- } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_min0p65*1000) {
- vdd_level = dcn_bw_v_mid0p72;
- } else
- vdd_level = dcn_bw_v_min0p65;
- break;
-
- default:
- break;
- }
- return vdd_level;
-}
-
-unsigned int dcn_find_dcfclk_suits_all(
- const struct dc *dc,
- struct dc_clocks *clocks)
-{
- unsigned vdd_level, vdd_level_temp;
- unsigned dcf_clk;
-
- /*find a common supported voltage level*/
- vdd_level = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DISPLAY_CLK, clocks->dispclk_khz);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DISPLAYPHYCLK, clocks->phyclk_khz);
-
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DPPCLK, clocks->dppclk_khz);
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
-
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_MEMORY_CLK, clocks->fclk_khz);
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- vdd_level_temp = dcn_find_normalized_clock_vdd_Level(
- dc, DM_PP_CLOCK_TYPE_DCFCLK, clocks->dcfclk_khz);
-
- /*find that level conresponding dcfclk*/
- vdd_level = dcn_bw_max(vdd_level, vdd_level_temp);
- if (vdd_level == dcn_bw_v_max0p91) {
- BREAK_TO_DEBUGGER();
- dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000;
- } else if (vdd_level == dcn_bw_v_max0p9)
- dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000;
- else if (vdd_level == dcn_bw_v_nom0p8)
- dcf_clk = dc->dcn_soc->dcfclkv_nom0p8*1000;
- else if (vdd_level == dcn_bw_v_mid0p72)
- dcf_clk = dc->dcn_soc->dcfclkv_mid0p72*1000;
- else
- dcf_clk = dc->dcn_soc->dcfclkv_min0p65*1000;
-
- DC_LOG_BANDWIDTH_CALCS("\tdcf_clk for voltage = %d\n", dcf_clk);
- return dcf_clk;
-}
-
void dcn_bw_update_from_pplib_fclks(
struct dc *dc,
struct dm_pp_clock_levels_with_voltage *fclks)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
index 76d3bb3c9155..8d4873f80df0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c
@@ -1562,6 +1562,7 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib,
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
+
disp_dlg_regs->refcyc_per_pte_group_vblank_l =
(unsigned int)(dst_y_per_row_vblank * (double)htotal
* ref_freq_to_pix_freq / (double)dpte_groups_per_row_ub_l);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index 86ac7d59fd32..0748ef36a16a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1595,6 +1595,7 @@ double dml32_TruncToValidBPP(
unsigned int NonDSCBPP0;
unsigned int NonDSCBPP1;
unsigned int NonDSCBPP2;
+ unsigned int NonDSCBPP3 = BPP_INVALID;
if (Format == dm_420) {
NonDSCBPP0 = 12;
@@ -1603,6 +1604,7 @@ double dml32_TruncToValidBPP(
MinDSCBPP = 6;
MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
} else if (Format == dm_444) {
+ NonDSCBPP3 = 18;
NonDSCBPP0 = 24;
NonDSCBPP1 = 30;
NonDSCBPP2 = 36;
@@ -1667,6 +1669,8 @@ double dml32_TruncToValidBPP(
return NonDSCBPP1;
else if (MaxLinkBPP >= NonDSCBPP0)
return 16.0;
+ else if ((Output == dm_dp2p0 || Output == dm_dp) && NonDSCBPP3 != BPP_INVALID && MaxLinkBPP >= NonDSCBPP3)
+ return NonDSCBPP3; // Special case to allow 6bpc RGB for DP connections.
else
return BPP_INVALID;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index beed7adbbd43..47d785204f29 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.dcn_downspread_percent = 0.5,
.gpuvm_min_page_size_bytes = 4096,
.hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = 1,
+ .do_urgent_latency_adjustment = 0,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};
void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index 072bd0539605..6b2ab4ec2b5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -66,11 +66,15 @@ static inline double dml_max5(double a, double b, double c, double d, double e)
static inline double dml_ceil(double a, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_ceil2(a, granularity);
}
static inline double dml_floor(double a, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_floor2(a, granularity);
}
@@ -114,11 +118,15 @@ static inline double dml_ceil_2(double f)
static inline double dml_ceil_ex(double x, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_ceil2(x, granularity);
}
static inline double dml_floor_ex(double x, double granularity)
{
+ if (granularity == 0)
+ return 0;
return (double) dcn_bw_floor2(x, granularity);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index c4378e620cbf..91c4f3b4bd5f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -29,7 +29,11 @@ dml2_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
+ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy)
+frame_warn_flag := -Wframe-larger-than=4096
+else
frame_warn_flag := -Wframe-larger-than=3072
+endif
else
frame_warn_flag := -Wframe-larger-than=2048
endif
@@ -73,9 +77,8 @@ AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2/,$(DML2))
AMD_DISPLAY_FILES += $(AMD_DAL_DML2)
-CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o := $(dml2_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := $(dml2_ccflags)
-CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags)
+CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag)
CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags)
@@ -94,9 +97,8 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_ccflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o := $(dml2_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := $(dml2_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags)
@@ -113,9 +115,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_r
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_rcflags)
-DML21 := src/dml2_top/dml_top.o
-DML21 += src/dml2_top/dml_top_mcache.o
-DML21 += src/dml2_top/dml2_top_optimization.o
+DML21 := src/dml2_top/dml2_top_interfaces.o
+DML21 += src/dml2_top/dml2_top_soc15.o
DML21 += src/inc/dml2_debug.o
DML21 += src/dml2_core/dml2_core_dcn4.o
DML21 += src/dml2_core/dml2_core_factory.o
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
index d851c081e376..35bc917631ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
@@ -1222,6 +1222,7 @@ static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *
s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
+ s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
#ifdef __DML_VBA_DEBUG__
dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
@@ -6300,9 +6301,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
mode_lib->ms.meta_row_bandwidth_this_state,
mode_lib->ms.dpte_row_bandwidth_this_state,
mode_lib->ms.NoOfDPPThisState,
- mode_lib->ms.UrgentBurstFactorLuma,
- mode_lib->ms.UrgentBurstFactorChroma,
- mode_lib->ms.UrgentBurstFactorCursor);
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j]);
s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only(
&mode_lib->ms.soc,
@@ -6433,7 +6434,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
/* Output */
&mode_lib->ms.UrgentBurstFactorCursorPre[k],
&mode_lib->ms.UrgentBurstFactorLumaPre[k],
- &mode_lib->ms.UrgentBurstFactorChroma[k],
+ &mode_lib->ms.UrgentBurstFactorChromaPre[k],
&mode_lib->ms.NotUrgentLatencyHidingPre[k]);
mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] *
@@ -6457,9 +6458,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
mode_lib->ms.cursor_bw_pre,
mode_lib->ms.prefetch_vmrow_bw,
mode_lib->ms.NoOfDPPThisState,
- mode_lib->ms.UrgentBurstFactorLuma,
- mode_lib->ms.UrgentBurstFactorChroma,
- mode_lib->ms.UrgentBurstFactorCursor,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
mode_lib->ms.UrgentBurstFactorLumaPre,
mode_lib->ms.UrgentBurstFactorChromaPre,
mode_lib->ms.UrgentBurstFactorCursorPre,
@@ -6516,9 +6517,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
mode_lib->ms.cursor_bw,
mode_lib->ms.cursor_bw_pre,
mode_lib->ms.NoOfDPPThisState,
- mode_lib->ms.UrgentBurstFactorLuma,
- mode_lib->ms.UrgentBurstFactorChroma,
- mode_lib->ms.UrgentBurstFactorCursor,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
mode_lib->ms.UrgentBurstFactorLumaPre,
mode_lib->ms.UrgentBurstFactorChromaPre,
mode_lib->ms.UrgentBurstFactorCursorPre);
@@ -6585,9 +6586,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
mode_lib->ms.cursor_bw_pre,
mode_lib->ms.prefetch_vmrow_bw,
mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here
- mode_lib->ms.UrgentBurstFactorLuma,
- mode_lib->ms.UrgentBurstFactorChroma,
- mode_lib->ms.UrgentBurstFactorCursor,
+ mode_lib->ms.UrgentBurstFactorLuma[j],
+ mode_lib->ms.UrgentBurstFactorChroma[j],
+ mode_lib->ms.UrgentBurstFactorCursor[j],
mode_lib->ms.UrgentBurstFactorLumaPre,
mode_lib->ms.UrgentBurstFactorChromaPre,
mode_lib->ms.UrgentBurstFactorCursorPre,
@@ -7808,9 +7809,9 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
mode_lib->ms.DETBufferSizeYThisState[k],
mode_lib->ms.DETBufferSizeCThisState[k],
/* Output */
- &mode_lib->ms.UrgentBurstFactorCursor[k],
- &mode_lib->ms.UrgentBurstFactorLuma[k],
- &mode_lib->ms.UrgentBurstFactorChroma[k],
+ &mode_lib->ms.UrgentBurstFactorCursor[j][k],
+ &mode_lib->ms.UrgentBurstFactorLuma[j][k],
+ &mode_lib->ms.UrgentBurstFactorChroma[j][k],
&mode_lib->ms.NotUrgentLatencyHiding[k]);
}
@@ -8317,7 +8318,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
if (clk_cfg->dcfclk_option != dml_use_override_freq)
locals->Dcfclk = mode_lib->ms.DCFCLK;
else
- locals->Dcfclk = clk_cfg->dcfclk_freq_mhz;
+ locals->Dcfclk = clk_cfg->dcfclk_mhz;
#ifdef __DML_VBA_DEBUG__
dml_print_dml_policy(&mode_lib->ms.policy);
@@ -8370,7 +8371,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
if (clk_cfg->dispclk_option == dml_use_required_freq)
locals->Dispclk = locals->Dispclk_calculated;
else if (clk_cfg->dispclk_option == dml_use_override_freq)
- locals->Dispclk = clk_cfg->dispclk_freq_mhz;
+ locals->Dispclk = clk_cfg->dispclk_mhz;
else
locals->Dispclk = mode_lib->ms.state.dispclk_mhz;
#ifdef __DML_VBA_DEBUG__
@@ -8411,7 +8412,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
if (clk_cfg->dppclk_option[k] == dml_use_required_freq)
locals->Dppclk[k] = locals->Dppclk_calculated[k];
else if (clk_cfg->dppclk_option[k] == dml_use_override_freq)
- locals->Dppclk[k] = clk_cfg->dppclk_freq_mhz[k];
+ locals->Dppclk[k] = clk_cfg->dppclk_mhz[k];
else
locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz;
#ifdef __DML_VBA_DEBUG__
@@ -9189,6 +9190,8 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
&locals->FractionOfUrgentBandwidth,
&s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport
+
+
if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) {
dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
@@ -9203,6 +9206,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc
}
}
+
if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) {
locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
mode_lib->ms.num_active_planes,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h
index f951936bb579..dd3f43181a6e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h
@@ -28,6 +28,7 @@
#define __DISPLAY_MODE_CORE_STRUCT_H__
#include "display_mode_lib_defines.h"
+#include "dml_top_display_cfg_types.h"
enum dml_project_id {
dml_project_invalid = 0,
@@ -49,7 +50,9 @@ enum dml_use_mall_for_pstate_change_mode {
dml_use_mall_pstate_change_disable = 0,
dml_use_mall_pstate_change_full_frame = 1,
dml_use_mall_pstate_change_sub_viewport = 2,
- dml_use_mall_pstate_change_phantom_pipe = 3
+ dml_use_mall_pstate_change_phantom_pipe = 3,
+ dml_use_mall_pstate_change_phantom_pipe_no_data_return = 4,
+ dml_use_mall_pstate_change_imall = 5
};
enum dml_use_mall_for_static_screen_mode {
dml_use_mall_static_screen_disable = 0,
@@ -171,7 +174,11 @@ enum dml_swizzle_mode {
dml_sw_256kb_z_x = 28,
dml_sw_256kb_s_x = 29,
dml_sw_256kb_d_x = 30,
- dml_sw_256kb_r_x = 31
+ dml_sw_256kb_r_x = 31,
+ dml_sw_256b_2d = 32,
+ dml_sw_4kb_2d = 33,
+ dml_sw_64kb_2d = 34,
+ dml_sw_256kb_2d = 35
};
enum dml_lb_depth {
dml_lb_6 = 0,
@@ -223,24 +230,28 @@ enum dml_mpc_use_policy {
dml_mpc_disabled = 0,
dml_mpc_as_possible = 1,
dml_mpc_as_needed_for_voltage = 2,
- dml_mpc_as_needed_for_pstate_and_voltage = 3
+ dml_mpc_as_needed_for_pstate_and_voltage = 3,
+ dml_mpc_as_needed = 4,
+ dml_mpc_2to1 = 5
};
enum dml_odm_use_policy {
dml_odm_use_policy_bypass = 0,
dml_odm_use_policy_combine_as_needed = 1,
dml_odm_use_policy_combine_2to1 = 2,
- dml_odm_use_policy_combine_4to1 = 3,
- dml_odm_use_policy_split_1to2 = 4,
- dml_odm_use_policy_mso_1to2 = 5,
- dml_odm_use_policy_mso_1to4 = 6
+ dml_odm_use_policy_combine_3to1 = 3,
+ dml_odm_use_policy_combine_4to1 = 4,
+ dml_odm_use_policy_split_1to2 = 5,
+ dml_odm_use_policy_mso_1to2 = 6,
+ dml_odm_use_policy_mso_1to4 = 7
};
enum dml_odm_mode {
dml_odm_mode_bypass = 0,
dml_odm_mode_combine_2to1 = 1,
- dml_odm_mode_combine_4to1 = 2,
- dml_odm_mode_split_1to2 = 3,
- dml_odm_mode_mso_1to2 = 4,
- dml_odm_mode_mso_1to4 = 5
+ dml_odm_mode_combine_3to1 = 2,
+ dml_odm_mode_combine_4to1 = 3,
+ dml_odm_mode_split_1to2 = 4,
+ dml_odm_mode_mso_1to2 = 5,
+ dml_odm_mode_mso_1to4 = 6
};
enum dml_writeback_configuration {
dml_whole_buffer_for_single_stream_no_interleave = 0,
@@ -289,6 +300,17 @@ struct soc_state_bounding_box_st {
dml_float_t fclk_change_latency_us;
dml_float_t usr_retraining_latency_us;
dml_bool_t use_ideal_dram_bw_strobe;
+ dml_float_t g6_temp_read_blackout_us;
+
+ struct {
+ dml_uint_t urgent_ramp_uclk_cycles;
+ dml_uint_t trip_to_memory_uclk_cycles;
+ dml_uint_t meta_trip_to_memory_uclk_cycles;
+ dml_uint_t maximum_latency_when_urgent_uclk_cycles;
+ dml_uint_t average_latency_when_urgent_uclk_cycles;
+ dml_uint_t maximum_latency_when_non_urgent_uclk_cycles;
+ dml_uint_t average_latency_when_non_urgent_uclk_cycles;
+ } dml_dcn401_uclk_dpm_dependent_soc_qos_params;
};
struct soc_bounding_box_st {
@@ -297,7 +319,7 @@ struct soc_bounding_box_st {
dml_float_t pcierefclk_mhz;
dml_float_t refclk_mhz;
dml_float_t amclk_mhz;
- dml_float_t max_outstanding_reqs;
+ dml_uint_t max_outstanding_reqs;
dml_float_t pct_ideal_sdp_bw_after_urgent;
dml_float_t pct_ideal_fabric_bw_after_urgent;
dml_float_t pct_ideal_dram_bw_after_urgent_pixel_only;
@@ -308,6 +330,16 @@ struct soc_bounding_box_st {
dml_float_t max_avg_fabric_bw_use_normal_percent;
dml_float_t max_avg_dram_bw_use_normal_percent;
dml_float_t max_avg_dram_bw_use_normal_strobe_percent;
+
+ dml_float_t svp_prefetch_pct_ideal_sdp_bw_after_urgent;
+ dml_float_t svp_prefetch_pct_ideal_fabric_bw_after_urgent;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_only;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_and_vm;
+ dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_vm_only;
+ dml_float_t svp_prefetch_max_avg_sdp_bw_use_normal_percent;
+ dml_float_t svp_prefetch_max_avg_fabric_bw_use_normal_percent;
+ dml_float_t svp_prefetch_max_avg_dram_bw_use_normal_percent;
+
dml_uint_t round_trip_ping_latency_dcfclk_cycles;
dml_uint_t urgent_out_of_order_return_per_channel_pixel_only_bytes;
dml_uint_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
@@ -324,6 +356,26 @@ struct soc_bounding_box_st {
dml_uint_t mall_allocated_for_dcn_mbytes;
dml_float_t dispclk_dppclk_vco_speed_mhz;
dml_bool_t do_urgent_latency_adjustment;
+
+ dml_uint_t mem_word_bytes;
+ dml_uint_t num_dcc_mcaches;
+ dml_uint_t mcache_size_bytes;
+ dml_uint_t mcache_line_size_bytes;
+
+ struct {
+ dml_bool_t UseNewDCN401SOCParameters;
+ dml_uint_t df_qos_response_time_fclk_cycles;
+ dml_uint_t max_round_trip_to_furthest_cs_fclk_cycles;
+ dml_uint_t mall_overhead_fclk_cycles;
+ dml_uint_t meta_trip_adder_fclk_cycles;
+ dml_uint_t average_transport_distance_fclk_cycles;
+ dml_float_t umc_urgent_ramp_latency_margin;
+ dml_float_t umc_max_latency_margin;
+ dml_float_t umc_average_latency_margin;
+ dml_float_t fabric_max_transport_latency_margin;
+ dml_float_t fabric_average_transport_latency_margin;
+ } dml_dcn401_soc_qos_params;
+
};
struct ip_params_st {
@@ -515,6 +567,10 @@ struct dml_plane_cfg_st {
dml_uint_t CursorWidth[__DML_NUM_PLANES__];
dml_uint_t CursorBPP[__DML_NUM_PLANES__];
+ dml_bool_t setup_for_tdlut[__DML_NUM_PLANES__];
+ enum dml2_tdlut_addressing_mode tdlut_addressing_mode[__DML_NUM_PLANES__];
+ enum dml2_tdlut_width_mode tdlut_width_mode[__DML_NUM_PLANES__];
+
enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[__DML_NUM_PLANES__];
enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[__DML_NUM_PLANES__];
@@ -604,6 +660,17 @@ struct dml_hw_resource_st {
dml_float_t DLGRefClkFreqMHz; /// <brief DLG Global Reference timer
};
+/// @brief To control the clk usage for model programming
+struct dml_clk_cfg_st {
+ enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq
+ enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq
+ enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__];
+
+ dml_float_t dcfclk_mhz;
+ dml_float_t dispclk_mhz;
+ dml_float_t dppclk_mhz[__DML_NUM_PLANES__];
+}; // dml_clk_cfg_st
+
/// @brief DML display configuration.
/// Describe how to display a surface in multi-plane setup and output to different output and writeback using the specified timgin
struct dml_display_cfg_st {
@@ -616,19 +683,9 @@ struct dml_display_cfg_st {
unsigned int num_timings;
struct dml_hw_resource_st hw; //< brief for mode programming
+ struct dml_clk_cfg_st clk_overrides; //< brief for mode programming clk override
}; // dml_display_cfg_st
-/// @brief To control the clk usage for model programming
-struct dml_clk_cfg_st {
- enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq
- enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq
- enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__];
-
- dml_float_t dcfclk_freq_mhz;
- dml_float_t dispclk_freq_mhz;
- dml_float_t dppclk_freq_mhz[__DML_NUM_PLANES__];
-}; // dml_clk_cfg_st
-
/// @brief DML mode evaluation and programming policy
/// Those knobs that affect mode support and mode programming
struct dml_mode_eval_policy_st {
@@ -884,11 +941,11 @@ struct mode_support_st {
dml_uint_t meta_row_height[__DML_NUM_PLANES__];
dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__];
dml_float_t UrgLatency;
- dml_float_t UrgentBurstFactorCursor[__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorCursor[2][__DML_NUM_PLANES__];
dml_float_t UrgentBurstFactorCursorPre[__DML_NUM_PLANES__];
- dml_float_t UrgentBurstFactorLuma[__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorLuma[2][__DML_NUM_PLANES__];
dml_float_t UrgentBurstFactorLumaPre[__DML_NUM_PLANES__];
- dml_float_t UrgentBurstFactorChroma[__DML_NUM_PLANES__];
+ dml_float_t UrgentBurstFactorChroma[2][__DML_NUM_PLANES__];
dml_float_t UrgentBurstFactorChromaPre[__DML_NUM_PLANES__];
dml_float_t MaximumSwathWidthInLineBufferLuma;
dml_float_t MaximumSwathWidthInLineBufferChroma;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c
index c247aee89caf..89890c88fd66 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c
@@ -690,12 +690,12 @@ __DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg)
dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option);
dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option);
- dml_print("DML: clk_cfg: dcfclk_freq_mhz = %f\n", clk_cfg->dcfclk_freq_mhz);
- dml_print("DML: clk_cfg: dispclk_freq_mhz = %f\n", clk_cfg->dispclk_freq_mhz);
+ dml_print("DML: clk_cfg: dcfclk_mhz = %f\n", clk_cfg->dcfclk_mhz);
+ dml_print("DML: clk_cfg: dispclk_mhz = %f\n", clk_cfg->dispclk_mhz);
for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) {
dml_print("DML: clk_cfg: i=%d, dppclk_option = %d\n", i, clk_cfg->dppclk_option[i]);
- dml_print("DML: clk_cfg: i=%d, dppclk_freq_mhz = %f\n", i, clk_cfg->dppclk_freq_mhz[i]);
+ dml_print("DML: clk_cfg: i=%d, dppclk_mhz = %f\n", i, clk_cfg->dppclk_mhz[i]);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index 138b4b1e42ed..b9c6b45f6872 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -10,7 +10,6 @@
#include "dml21_utils.h"
#include "dml21_translation_helper.h"
#include "bounding_boxes/dcn4_soc_bb.h"
-#include "bounding_boxes/dcn3_soc_bb.h"
static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_init,
const struct dml2_configuration_options *config,
@@ -20,10 +19,6 @@ static void dml21_init_socbb_params(struct dml2_initialize_instance_in_out *dml_
const struct dml2_soc_qos_parameters *qos_params;
switch (in_dc->ctx->dce_version) {
- case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete.
- soc_bb = &dml2_socbb_dcn31;
- qos_params = &dml_dcn31_soc_qos_params;
- break;
case DCN_VERSION_4_01:
default:
if (config->bb_from_dmub)
@@ -60,9 +55,6 @@ static void dml21_init_ip_params(struct dml2_initialize_instance_in_out *dml_ini
const struct dml2_ip_capabilities *ip_caps;
switch (in_dc->ctx->dce_version) {
- case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete.
- ip_caps = &dml2_dcn31_max_ip_caps;
- break;
case DCN_VERSION_4_01:
default:
ip_caps = &dml2_dcn401_max_ip_caps;
@@ -302,12 +294,17 @@ void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_in
dml_soc_bb->power_management_parameters.stutter_exit_latency_us =
(in_dc->ctx->dc_bios->bb_info.dram_sr_exit_latency_100ns + 9) / 10;
- if (in_dc->ctx->dc_bios->vram_info.num_chans) {
+ if (dc_bw_params->num_channels) {
+ dml_clk_table->dram_config.channel_count = dc_bw_params->num_channels;
+ dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576;
+ } else if (in_dc->ctx->dc_bios->vram_info.num_chans) {
dml_clk_table->dram_config.channel_count = in_dc->ctx->dc_bios->vram_info.num_chans;
dml_soc_bb->mall_allocated_for_dcn_mbytes = in_dc->caps.mall_size_total / 1048576;
}
- if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) {
+ if (dc_bw_params->dram_channel_width_bytes) {
+ dml_clk_table->dram_config.channel_width_bytes = dc_bw_params->dram_channel_width_bytes;
+ } else if (in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) {
dml_clk_table->dram_config.channel_width_bytes = in_dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
}
@@ -339,11 +336,22 @@ void dml21_apply_soc_bb_overrides(struct dml2_initialize_instance_in_out *dml_in
// }
}
+static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
+{
+ unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
+
+ if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
+ max_hw_v_total -= stream->timing.v_front_porch + 1;
+ }
+
+ return max_hw_v_total;
+}
+
static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing,
struct dc_stream_state *stream,
struct dml2_context *dml_ctx)
{
- unsigned int hblank_start, vblank_start;
+ unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz;
timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
@@ -371,11 +379,23 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf
- stream->timing.v_border_top - stream->timing.v_border_bottom;
timing->drr_config.enabled = stream->ignore_msa_timing_param;
- timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz;
timing->drr_config.drr_active_variable = stream->vrr_active_variable;
timing->drr_config.drr_active_fixed = stream->vrr_active_fixed;
timing->drr_config.disallowed = !stream->allow_freesync;
+ /* limit min refresh rate to DC cap */
+ min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz;
+ if (stream->ctx->dc->caps.max_v_total != 0) {
+ min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
+ (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream)));
+ }
+
+ if (stream->timing.min_refresh_in_uhz > min_hardware_refresh_in_uhz) {
+ timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz;
+ } else {
+ timing->drr_config.min_refresh_uhz = min_hardware_refresh_in_uhz;
+ }
+
if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase &&
stream->ctx->dc->config.enable_fpo_flicker_detection == 1)
timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false);
@@ -422,6 +442,21 @@ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cf
timing->vblank_nom = timing->v_total - timing->v_active;
}
+/**
+ * adjust_dml21_hblank_timing_config_from_pipe_ctx - Adjusts the horizontal blanking timing configuration
+ * based on the pipe context.
+ * @timing: Pointer to the dml2_timing_cfg structure to be adjusted.
+ * @pipe: Pointer to the pipe_ctx structure containing the horizontal blanking borrow value.
+ *
+ * This function modifies the horizontal active and blank end timings by adding and subtracting
+ * the horizontal blanking borrow value from the pipe context, respectively.
+ */
+static void adjust_dml21_hblank_timing_config_from_pipe_ctx(struct dml2_timing_cfg *timing, struct pipe_ctx *pipe)
+{
+ timing->h_active += pipe->hblank_borrow;
+ timing->h_blank_end -= pipe->hblank_borrow;
+}
+
static void populate_dml21_output_config_from_stream_state(struct dml2_link_output_cfg *output,
struct dc_stream_state *stream, const struct pipe_ctx *pipe)
{
@@ -683,11 +718,21 @@ static void populate_dml21_surface_config_from_plane_state(
surface->dcc.informative.fraction_of_zero_size_request_plane1 = plane_state->dcc.independent_64b_blks_c;
surface->dcc.plane0.pitch = plane_state->dcc.meta_pitch;
surface->dcc.plane1.pitch = plane_state->dcc.meta_pitch_c;
- if (in_dc->ctx->dce_version < DCN_VERSION_4_01) {
- /* needed for N-1 testing */
+
+ // Update swizzle / array mode based on the gfx_format
+ switch (plane_state->tiling_info.gfxversion) {
+ case DcGfxVersion7:
+ case DcGfxVersion8:
+ // Placeholder for programming the array_mode
+ break;
+ case DcGfxVersion9:
+ case DcGfxVersion10:
+ case DcGfxVersion11:
surface->tiling = gfx9_to_dml2_swizzle_mode(plane_state->tiling_info.gfx9.swizzle);
- } else {
+ break;
+ case DcGfxAddr3:
surface->tiling = gfx_addr3_to_dml2_swizzle_mode(plane_state->tiling_info.gfx_addr3.swizzle);
+ break;
}
}
@@ -709,6 +754,7 @@ static const struct scaler_data *get_scaler_data_for_plane(
temp_pipe->plane_state = pipe->plane_state;
temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps;
temp_pipe->stream_res = pipe->stream_res;
+ temp_pipe->hblank_borrow = pipe->hblank_borrow;
dml_ctx->config.callbacks.build_scaling_params(temp_pipe);
break;
}
@@ -973,6 +1019,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__);
populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx);
+ adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]);
populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]);
populate_dml21_stream_overrides_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location], context->streams[stream_index]);
@@ -1037,28 +1084,8 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state
context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0;
context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz;
context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz;
-}
-
-void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx)
-{
- struct dml2_core_internal_display_mode_lib *mode_lib = &in_ctx->v21.dml_init.dml2_instance->core_instance.clean_me_up.mode_lib;
- double refclk_freq_in_mhz = (in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz > 0) ? (double)in_ctx->v21.display_config.overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
-
- if (reg_set_idx >= DML2_DCHUB_WATERMARK_SET_NUM) {
- /* invalid register set index */
- return;
- }
-
- /* convert to legacy format (time in ns) */
- watermark->urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0;
- watermark->pte_meta_urgent_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0;
- watermark->cstate_pstate.cstate_enter_plus_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_enter / refclk_freq_in_mhz) * 1000.0;
- watermark->cstate_pstate.cstate_exit_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].sr_exit / refclk_freq_in_mhz) * 1000.0;
- watermark->cstate_pstate.pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].uclk_pstate / refclk_freq_in_mhz) * 1000.0;
- watermark->urgent_latency_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].urgent / refclk_freq_in_mhz) * 1000.0;
- watermark->cstate_pstate.fclk_pstate_change_ns = ((double)in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].fclk_pstate / refclk_freq_in_mhz) * 1000.0;
- watermark->frac_urg_bw_flip = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_flip;
- watermark->frac_urg_bw_nom = in_ctx->v21.mode_programming.programming->global_regs.wm_regs[reg_set_idx].frac_urg_bw_nom;
+ context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz;
+ context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz;
}
static struct dml2_dchub_watermark_regs *wm_set_index_to_dc_wm_set(union dcn_watermark_set *watermarks, const enum dml2_dchub_watermark_reg_set_index wm_index)
@@ -1104,53 +1131,6 @@ void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_se
}
}
-
-void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming)
-{
- unsigned int hactive, vactive, hblank_start, vblank_start, hblank_end, vblank_end;
- struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
- union dml2_global_sync_programming *global_sync = &stream_programming->global_sync;
-
- hactive = timing->h_addressable + timing->h_border_left + timing->h_border_right;
- vactive = timing->v_addressable + timing->v_border_bottom + timing->v_border_top;
- hblank_start = pipe_ctx->stream->timing.h_total - pipe_ctx->stream->timing.h_front_porch;
- vblank_start = pipe_ctx->stream->timing.v_total - pipe_ctx->stream->timing.v_front_porch;
-
- hblank_end = hblank_start - timing->h_addressable - timing->h_border_left - timing->h_border_right;
- vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom;
-
- if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
- /* phantom has its own global sync */
- global_sync = &stream_programming->phantom_stream.global_sync;
- }
-
- pipe_ctx->pipe_dlg_param.vstartup_start = global_sync->dcn4x.vstartup_lines;
- pipe_ctx->pipe_dlg_param.vupdate_offset = global_sync->dcn4x.vupdate_offset_pixels;
- pipe_ctx->pipe_dlg_param.vupdate_width = global_sync->dcn4x.vupdate_vupdate_width_pixels;
- pipe_ctx->pipe_dlg_param.vready_offset = global_sync->dcn4x.vready_offset_pixels;
- pipe_ctx->pipe_dlg_param.pstate_keepout = global_sync->dcn4x.pstate_keepout_start_lines;
-
- pipe_ctx->pipe_dlg_param.otg_inst = pipe_ctx->stream_res.tg->inst;
-
- pipe_ctx->pipe_dlg_param.hactive = hactive;
- pipe_ctx->pipe_dlg_param.vactive = vactive;
- pipe_ctx->pipe_dlg_param.htotal = pipe_ctx->stream->timing.h_total;
- pipe_ctx->pipe_dlg_param.vtotal = pipe_ctx->stream->timing.v_total;
- pipe_ctx->pipe_dlg_param.hblank_end = hblank_end;
- pipe_ctx->pipe_dlg_param.vblank_end = vblank_end;
- pipe_ctx->pipe_dlg_param.hblank_start = hblank_start;
- pipe_ctx->pipe_dlg_param.vblank_start = vblank_start;
- pipe_ctx->pipe_dlg_param.vfront_porch = pipe_ctx->stream->timing.v_front_porch;
- pipe_ctx->pipe_dlg_param.pixel_rate_mhz = pipe_ctx->stream->timing.pix_clk_100hz / 10000.00;
- pipe_ctx->pipe_dlg_param.refresh_rate = ((timing->pix_clk_100hz * 100) / timing->h_total) / timing->v_total;
- pipe_ctx->pipe_dlg_param.vtotal_max = pipe_ctx->stream->adjust.v_total_max;
- pipe_ctx->pipe_dlg_param.vtotal_min = pipe_ctx->stream->adjust.v_total_min;
- pipe_ctx->pipe_dlg_param.recout_height = pipe_ctx->plane_res.scl_data.recout.height;
- pipe_ctx->pipe_dlg_param.recout_width = pipe_ctx->plane_res.scl_data.recout.width;
- pipe_ctx->pipe_dlg_param.full_recout_height = pipe_ctx->plane_res.scl_data.recout.height;
- pipe_ctx->pipe_dlg_param.full_recout_width = pipe_ctx->plane_res.scl_data.recout.width;
-}
-
void dml21_map_hw_resources(struct dml2_context *dml_ctx)
{
unsigned int i = 0;
@@ -1186,22 +1166,22 @@ void dml21_set_dc_p_state_type(
bool sub_vp_enabled)
{
switch (stream_programming->uclk_pstate_method) {
- case dml2_uclk_pstate_support_method_vactive:
- case dml2_uclk_pstate_support_method_fw_vactive_drr:
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
pipe_ctx->p_state_type = P_STATE_V_ACTIVE;
break;
- case dml2_uclk_pstate_support_method_vblank:
- case dml2_uclk_pstate_support_method_fw_vblank_drr:
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
if (sub_vp_enabled)
pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP;
else
pipe_ctx->p_state_type = P_STATE_V_BLANK;
break;
- case dml2_uclk_pstate_support_method_fw_subvp_phantom:
- case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr:
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
pipe_ctx->p_state_type = P_STATE_SUB_VP;
break;
- case dml2_uclk_pstate_support_method_fw_drr:
+ case dml2_pstate_method_fw_drr:
if (sub_vp_enabled)
pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP;
else
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h
index 476a7f6e4875..069b939c672a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.h
@@ -21,8 +21,6 @@ void dml21_initialize_soc_bb_params(struct dml2_initialize_instance_in_out *dml_
void dml21_initialize_ip_params(struct dml2_initialize_instance_in_out *dml_init, const struct dml2_configuration_options *config, const struct dc *in_dc);
bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_state *context, struct dml2_context *dml_ctx);
void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state *context);
-void dml21_populate_pipe_ctx_dlg_params(struct dml2_context *dml_ctx, struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_stream_programming *stream_programming);
-void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx);
void dml21_extract_watermark_sets(const struct dc *in_dc, union dcn_watermark_set *watermarks, struct dml2_context *in_ctx);
void dml21_map_hw_resources(struct dml2_context *dml_ctx);
void dml21_get_pipe_mcache_config(struct dc_state *context, struct pipe_ctx *pipe_ctx, struct dml2_per_plane_programming *pln_prog, struct dml2_pipe_configuration_descriptor *mcache_pipe_config);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c
index 51d491bffa32..1e56d995cd0e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c
@@ -142,108 +142,21 @@ int dml21_find_dc_pipes_for_plane(const struct dc *in_dc,
return num_pipes;
}
-
-void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs,
- struct dml2_display_dlg_regs *disp_dlg_regs,
- struct dml2_display_ttu_regs *disp_ttu_regs,
- struct pipe_ctx *out)
+void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_stream_programming *stream_programming)
{
- memset(&out->rq_regs, 0, sizeof(out->rq_regs));
- out->rq_regs.rq_regs_l.chunk_size = rq_regs->rq_regs_l.chunk_size;
- out->rq_regs.rq_regs_l.min_chunk_size = rq_regs->rq_regs_l.min_chunk_size;
- //out->rq_regs.rq_regs_l.meta_chunk_size = rq_regs->rq_regs_l.meta_chunk_size;
- //out->rq_regs.rq_regs_l.min_meta_chunk_size = rq_regs->rq_regs_l.min_meta_chunk_size;
- out->rq_regs.rq_regs_l.dpte_group_size = rq_regs->rq_regs_l.dpte_group_size;
- out->rq_regs.rq_regs_l.mpte_group_size = rq_regs->rq_regs_l.mpte_group_size;
- out->rq_regs.rq_regs_l.swath_height = rq_regs->rq_regs_l.swath_height;
- out->rq_regs.rq_regs_l.pte_row_height_linear = rq_regs->rq_regs_l.pte_row_height_linear;
-
- out->rq_regs.rq_regs_c.chunk_size = rq_regs->rq_regs_c.chunk_size;
- out->rq_regs.rq_regs_c.min_chunk_size = rq_regs->rq_regs_c.min_chunk_size;
- //out->rq_regs.rq_regs_c.meta_chunk_size = rq_regs->rq_regs_c.meta_chunk_size;
- //out->rq_regs.rq_regs_c.min_meta_chunk_size = rq_regs->rq_regs_c.min_meta_chunk_size;
- out->rq_regs.rq_regs_c.dpte_group_size = rq_regs->rq_regs_c.dpte_group_size;
- out->rq_regs.rq_regs_c.mpte_group_size = rq_regs->rq_regs_c.mpte_group_size;
- out->rq_regs.rq_regs_c.swath_height = rq_regs->rq_regs_c.swath_height;
- out->rq_regs.rq_regs_c.pte_row_height_linear = rq_regs->rq_regs_c.pte_row_height_linear;
-
- out->rq_regs.drq_expansion_mode = rq_regs->drq_expansion_mode;
- out->rq_regs.prq_expansion_mode = rq_regs->prq_expansion_mode;
- //out->rq_regs.mrq_expansion_mode = rq_regs->mrq_expansion_mode;
- out->rq_regs.crq_expansion_mode = rq_regs->crq_expansion_mode;
- out->rq_regs.plane1_base_address = rq_regs->plane1_base_address;
- out->unbounded_req = rq_regs->unbounded_request_enabled;
-
- memset(&out->dlg_regs, 0, sizeof(out->dlg_regs));
- out->dlg_regs.refcyc_h_blank_end = disp_dlg_regs->refcyc_h_blank_end;
- out->dlg_regs.dlg_vblank_end = disp_dlg_regs->dlg_vblank_end;
- out->dlg_regs.min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start;
- out->dlg_regs.refcyc_per_htotal = disp_dlg_regs->refcyc_per_htotal;
- out->dlg_regs.refcyc_x_after_scaler = disp_dlg_regs->refcyc_x_after_scaler;
- out->dlg_regs.dst_y_after_scaler = disp_dlg_regs->dst_y_after_scaler;
- out->dlg_regs.dst_y_prefetch = disp_dlg_regs->dst_y_prefetch;
- out->dlg_regs.dst_y_per_vm_vblank = disp_dlg_regs->dst_y_per_vm_vblank;
- out->dlg_regs.dst_y_per_row_vblank = disp_dlg_regs->dst_y_per_row_vblank;
- out->dlg_regs.dst_y_per_vm_flip = disp_dlg_regs->dst_y_per_vm_flip;
- out->dlg_regs.dst_y_per_row_flip = disp_dlg_regs->dst_y_per_row_flip;
- out->dlg_regs.ref_freq_to_pix_freq = disp_dlg_regs->ref_freq_to_pix_freq;
- out->dlg_regs.vratio_prefetch = disp_dlg_regs->vratio_prefetch;
- out->dlg_regs.vratio_prefetch_c = disp_dlg_regs->vratio_prefetch_c;
- out->dlg_regs.refcyc_per_tdlut_group = disp_dlg_regs->refcyc_per_tdlut_group;
- out->dlg_regs.refcyc_per_pte_group_vblank_l = disp_dlg_regs->refcyc_per_pte_group_vblank_l;
- out->dlg_regs.refcyc_per_pte_group_vblank_c = disp_dlg_regs->refcyc_per_pte_group_vblank_c;
- //out->dlg_regs.refcyc_per_meta_chunk_vblank_l = disp_dlg_regs->refcyc_per_meta_chunk_vblank_l;
- //out->dlg_regs.refcyc_per_meta_chunk_vblank_c = disp_dlg_regs->refcyc_per_meta_chunk_vblank_c;
- out->dlg_regs.refcyc_per_pte_group_flip_l = disp_dlg_regs->refcyc_per_pte_group_flip_l;
- out->dlg_regs.refcyc_per_pte_group_flip_c = disp_dlg_regs->refcyc_per_pte_group_flip_c;
- //out->dlg_regs.refcyc_per_meta_chunk_flip_l = disp_dlg_regs->refcyc_per_meta_chunk_flip_l;
- //out->dlg_regs.refcyc_per_meta_chunk_flip_c = disp_dlg_regs->refcyc_per_meta_chunk_flip_c;
- out->dlg_regs.dst_y_per_pte_row_nom_l = disp_dlg_regs->dst_y_per_pte_row_nom_l;
- out->dlg_regs.dst_y_per_pte_row_nom_c = disp_dlg_regs->dst_y_per_pte_row_nom_c;
- out->dlg_regs.refcyc_per_pte_group_nom_l = disp_dlg_regs->refcyc_per_pte_group_nom_l;
- out->dlg_regs.refcyc_per_pte_group_nom_c = disp_dlg_regs->refcyc_per_pte_group_nom_c;
- //out->dlg_regs.dst_y_per_meta_row_nom_l = disp_dlg_regs->dst_y_per_meta_row_nom_l;
- //out->dlg_regs.dst_y_per_meta_row_nom_c = disp_dlg_regs->dst_y_per_meta_row_nom_c;
- //out->dlg_regs.refcyc_per_meta_chunk_nom_l = disp_dlg_regs->refcyc_per_meta_chunk_nom_l;
- //out->dlg_regs.refcyc_per_meta_chunk_nom_c = disp_dlg_regs->refcyc_per_meta_chunk_nom_c;
- out->dlg_regs.refcyc_per_line_delivery_pre_l = disp_dlg_regs->refcyc_per_line_delivery_pre_l;
- out->dlg_regs.refcyc_per_line_delivery_pre_c = disp_dlg_regs->refcyc_per_line_delivery_pre_c;
- out->dlg_regs.refcyc_per_line_delivery_l = disp_dlg_regs->refcyc_per_line_delivery_l;
- out->dlg_regs.refcyc_per_line_delivery_c = disp_dlg_regs->refcyc_per_line_delivery_c;
- out->dlg_regs.refcyc_per_vm_group_vblank = disp_dlg_regs->refcyc_per_vm_group_vblank;
- out->dlg_regs.refcyc_per_vm_group_flip = disp_dlg_regs->refcyc_per_vm_group_flip;
- out->dlg_regs.refcyc_per_vm_req_vblank = disp_dlg_regs->refcyc_per_vm_req_vblank;
- out->dlg_regs.refcyc_per_vm_req_flip = disp_dlg_regs->refcyc_per_vm_req_flip;
- out->dlg_regs.dst_y_offset_cur0 = disp_dlg_regs->dst_y_offset_cur0;
- out->dlg_regs.chunk_hdl_adjust_cur0 = disp_dlg_regs->chunk_hdl_adjust_cur0;
- //out->dlg_regs.dst_y_offset_cur1 = disp_dlg_regs->dst_y_offset_cur1;
- //out->dlg_regs.chunk_hdl_adjust_cur1 = disp_dlg_regs->chunk_hdl_adjust_cur1;
- out->dlg_regs.vready_after_vcount0 = disp_dlg_regs->vready_after_vcount0;
- out->dlg_regs.dst_y_delta_drq_limit = disp_dlg_regs->dst_y_delta_drq_limit;
- out->dlg_regs.refcyc_per_vm_dmdata = disp_dlg_regs->refcyc_per_vm_dmdata;
- out->dlg_regs.dmdata_dl_delta = disp_dlg_regs->dmdata_dl_delta;
-
- memset(&out->ttu_regs, 0, sizeof(out->ttu_regs));
- out->ttu_regs.qos_level_low_wm = disp_ttu_regs->qos_level_low_wm;
- out->ttu_regs.qos_level_high_wm = disp_ttu_regs->qos_level_high_wm;
- out->ttu_regs.min_ttu_vblank = disp_ttu_regs->min_ttu_vblank;
- out->ttu_regs.qos_level_flip = disp_ttu_regs->qos_level_flip;
- out->ttu_regs.refcyc_per_req_delivery_l = disp_ttu_regs->refcyc_per_req_delivery_l;
- out->ttu_regs.refcyc_per_req_delivery_c = disp_ttu_regs->refcyc_per_req_delivery_c;
- out->ttu_regs.refcyc_per_req_delivery_cur0 = disp_ttu_regs->refcyc_per_req_delivery_cur0;
- //out->ttu_regs.refcyc_per_req_delivery_cur1 = disp_ttu_regs->refcyc_per_req_delivery_cur1;
- out->ttu_regs.refcyc_per_req_delivery_pre_l = disp_ttu_regs->refcyc_per_req_delivery_pre_l;
- out->ttu_regs.refcyc_per_req_delivery_pre_c = disp_ttu_regs->refcyc_per_req_delivery_pre_c;
- out->ttu_regs.refcyc_per_req_delivery_pre_cur0 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur0;
- //out->ttu_regs.refcyc_per_req_delivery_pre_cur1 = disp_ttu_regs->refcyc_per_req_delivery_pre_cur1;
- out->ttu_regs.qos_level_fixed_l = disp_ttu_regs->qos_level_fixed_l;
- out->ttu_regs.qos_level_fixed_c = disp_ttu_regs->qos_level_fixed_c;
- out->ttu_regs.qos_level_fixed_cur0 = disp_ttu_regs->qos_level_fixed_cur0;
- //out->ttu_regs.qos_level_fixed_cur1 = disp_ttu_regs->qos_level_fixed_cur1;
- out->ttu_regs.qos_ramp_disable_l = disp_ttu_regs->qos_ramp_disable_l;
- out->ttu_regs.qos_ramp_disable_c = disp_ttu_regs->qos_ramp_disable_c;
- out->ttu_regs.qos_ramp_disable_cur0 = disp_ttu_regs->qos_ramp_disable_cur0;
- //out->ttu_regs.qos_ramp_disable_cur1 = disp_ttu_regs->qos_ramp_disable_cur1;
+ union dml2_global_sync_programming *global_sync = &stream_programming->global_sync;
+
+ if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
+ /* phantom has its own global sync */
+ global_sync = &stream_programming->phantom_stream.global_sync;
+ }
+
+ memcpy(&pipe_ctx->global_sync,
+ global_sync,
+ sizeof(union dml2_global_sync_programming));
}
void dml21_populate_mall_allocation_size(struct dc_state *context,
@@ -301,28 +214,16 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex
{
unsigned int pipe_reg_index = 0;
- dml21_populate_pipe_ctx_dlg_params(dml_ctx, context, pipe_ctx, stream_prog);
+ dml21_pipe_populate_global_sync(dml_ctx, context, pipe_ctx, stream_prog);
find_pipe_regs_idx(dml_ctx, pipe_ctx, &pipe_reg_index);
if (dml_ctx->config.svp_pstate.callbacks.get_pipe_subvp_type(context, pipe_ctx) == SUBVP_PHANTOM) {
memcpy(&pipe_ctx->hubp_regs, pln_prog->phantom_plane.pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set));
pipe_ctx->unbounded_req = false;
-
- /* legacy only, should be removed later */
- dml21_update_pipe_ctx_dchub_regs(&pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->rq_regs,
- &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->dlg_regs,
- &pln_prog->phantom_plane.pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx);
-
pipe_ctx->det_buffer_size_kb = 0;
} else {
memcpy(&pipe_ctx->hubp_regs, pln_prog->pipe_regs[pipe_reg_index], sizeof(struct dml2_dchub_per_pipe_register_set));
pipe_ctx->unbounded_req = pln_prog->pipe_regs[pipe_reg_index]->rq_regs.unbounded_request_enabled;
-
- /* legacy only, should be removed later */
- dml21_update_pipe_ctx_dchub_regs(&pln_prog->pipe_regs[pipe_reg_index]->rq_regs,
- &pln_prog->pipe_regs[pipe_reg_index]->dlg_regs,
- &pln_prog->pipe_regs[pipe_reg_index]->ttu_regs, pipe_ctx);
-
pipe_ctx->det_buffer_size_kb = pln_prog->pipe_regs[pipe_reg_index]->det_size * 64;
}
@@ -482,7 +383,8 @@ void dml21_build_fams2_programming(const struct dc *dc,
unsigned int num_fams2_streams = 0;
/* reset fams2 data */
- memset(&context->bw_ctx.bw.dcn.fams2_stream_params, 0, sizeof(struct dmub_fams2_stream_static_state) * DML2_MAX_PLANES);
+ memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES);
+ memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES);
memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config));
if (dml_ctx->v21.mode_programming.programming->fams2_required) {
@@ -490,8 +392,10 @@ void dml21_build_fams2_programming(const struct dc *dc,
int dml_stream_idx;
struct dc_stream_state *phantom_stream;
struct dc_stream_status *phantom_status;
+ enum fams2_stream_type type = 0;
- struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[num_fams2_streams];
+ union dmub_cmd_fams2_config *static_base_state = &context->bw_ctx.bw.dcn.fams2_stream_base_params[num_fams2_streams];
+ union dmub_cmd_fams2_config *static_sub_state = &context->bw_ctx.bw.dcn.fams2_stream_sub_params[num_fams2_streams];
struct dc_stream_state *stream = context->streams[i];
@@ -508,28 +412,38 @@ void dml21_build_fams2_programming(const struct dc *dc,
}
/* copy static state from PMO */
- memcpy(static_state,
- &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params,
- sizeof(struct dmub_fams2_stream_static_state));
-
- /* get information from context */
- static_state->num_planes = context->stream_status[i].plane_count;
- static_state->otg_inst = context->stream_status[i].primary_otg_inst;
-
- /* populate pipe masks for planes */
- for (j = 0; j < context->stream_status[i].plane_count; j++) {
- for (k = 0; k < dc->res_pool->pipe_count; k++) {
- if (context->res_ctx.pipe_ctx[k].stream &&
- context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id &&
- context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) {
- static_state->pipe_mask |= (1 << k);
- static_state->plane_pipe_masks[j] |= (1 << k);
+ memcpy(static_base_state,
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params,
+ sizeof(union dmub_cmd_fams2_config));
+ memcpy(static_sub_state,
+ &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params,
+ sizeof(union dmub_cmd_fams2_config));
+
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ type = static_base_state->stream_v1.base.type;
+
+ /* get information from context */
+ static_base_state->stream_v1.base.num_planes = context->stream_status[i].plane_count;
+ static_base_state->stream_v1.base.otg_inst = context->stream_status[i].primary_otg_inst;
+
+ /* populate pipe masks for planes */
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ for (k = 0; k < dc->res_pool->pipe_count; k++) {
+ if (context->res_ctx.pipe_ctx[k].stream &&
+ context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id &&
+ context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) {
+ static_base_state->stream_v1.base.pipe_mask |= (1 << k);
+ static_base_state->stream_v1.base.plane_pipe_masks[j] |= (1 << k);
+ }
}
}
}
+
/* get per method programming */
- switch (static_state->type) {
+ switch (type) {
case FAMS2_STREAM_TYPE_VBLANK:
case FAMS2_STREAM_TYPE_VACTIVE:
case FAMS2_STREAM_TYPE_DRR:
@@ -543,16 +457,27 @@ void dml21_build_fams2_programming(const struct dc *dc,
/* phantom status should always be present */
ASSERT(phantom_status);
- static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst;
+ if (!phantom_status)
+ break;
- /* populate pipe masks for phantom planes */
- for (j = 0; j < phantom_status->plane_count; j++) {
- for (k = 0; k < dc->res_pool->pipe_count; k++) {
- if (context->res_ctx.pipe_ctx[k].stream &&
- context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id &&
- context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) {
- static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k);
- static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k);
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ static_sub_state->stream_v1.sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst;
+
+ /* populate pipe masks for phantom planes */
+ for (j = 0; j < phantom_status->plane_count; j++) {
+ for (k = 0; k < dc->res_pool->pipe_count; k++) {
+ if (context->res_ctx.pipe_ctx[k].stream &&
+ context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id &&
+ context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) {
+ switch (dc->debug.fams_version.minor) {
+ case 1:
+ default:
+ static_sub_state->stream_v1.sub_state.subvp.phantom_pipe_mask |= (1 << k);
+ static_sub_state->stream_v1.sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k);
+ }
+ }
}
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h
index d5153fbac921..4bff52eaaef8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.h
@@ -18,10 +18,10 @@ struct dml2_display_ttu_regs;
int dml21_helper_find_dml_pipe_idx_by_stream_id(struct dml2_context *ctx, unsigned int stream_id);
int dml21_find_dml_pipe_idx_by_plane_id(struct dml2_context *ctx, unsigned int plane_id);
bool dml21_get_plane_id(const struct dc_state *state, const struct dc_plane_state *plane, unsigned int *plane_id);
-void dml21_update_pipe_ctx_dchub_regs(struct dml2_display_rq_regs *rq_regs,
- struct dml2_display_dlg_regs *disp_dlg_regs,
- struct dml2_display_ttu_regs *disp_ttu_regs,
- struct pipe_ctx *out);
+void dml21_pipe_populate_global_sync(struct dml2_context *dml_ctx,
+ struct dc_state *context,
+ struct pipe_ctx *pipe_ctx,
+ struct dml2_per_stream_programming *stream_programming);
void dml21_populate_mall_allocation_size(struct dc_state *context,
struct dml2_context *in_ctx,
struct dml2_per_plane_programming *pln_prog,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
index bbc28b9a15a3..fb80ba9287b6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c
@@ -75,7 +75,6 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co
{
switch (in_dc->ctx->dce_version) {
case DCN_VERSION_4_01:
- case DCN_VERSION_3_2: // TODO : Temporary for N-1 validation. Remove this after N-1 validation phase is complete.
(*dml_ctx)->v21.dml_init.options.project_id = dml2_project_dcn4x_stage2_auto_drr_svp;
break;
default:
@@ -233,13 +232,6 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s
dml21_calculate_rq_and_dlg_params(in_dc, context, &context->res_ctx, dml_ctx, in_dc->res_pool->pipe_count);
dml21_copy_clocks_to_dc_state(dml_ctx, context);
dml21_extract_watermark_sets(in_dc, &context->bw_ctx.bw.dcn.watermarks, dml_ctx);
- if (in_dc->ctx->dce_version == DCN_VERSION_3_2) {
- dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.a, DML2_DCHUB_WATERMARK_SET_A, dml_ctx);
- dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.b, DML2_DCHUB_WATERMARK_SET_A, dml_ctx);
- dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.c, DML2_DCHUB_WATERMARK_SET_A, dml_ctx);
- dml21_extract_legacy_watermark_set(in_dc, &context->bw_ctx.bw.dcn.watermarks.d, DML2_DCHUB_WATERMARK_SET_A, dml_ctx);
- }
-
dml21_build_fams2_programming(in_dc, context, dml_ctx);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h
deleted file mode 100644
index d82c681a5402..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn3_soc_bb.h
+++ /dev/null
@@ -1,401 +0,0 @@
-/*
- * Copyright 2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifndef __DML_DML_DCN3_SOC_BB__
-#define __DML_DML_DCN3_SOC_BB__
-
-#include "dml_top_soc_parameter_types.h"
-
-static const struct dml2_soc_qos_parameters dml_dcn31_soc_qos_params = {
- .derate_table = {
- .system_active_urgent = {
- .dram_derate_percent_pixel = 22,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 76,
- .dcfclk_derate_percent = 100,
- },
- .system_active_average = {
- .dram_derate_percent_pixel = 17,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 57,
- .dcfclk_derate_percent = 75,
- },
- .dcn_mall_prefetch_urgent = {
- .dram_derate_percent_pixel = 22,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 76,
- .dcfclk_derate_percent = 100,
- },
- .dcn_mall_prefetch_average = {
- .dram_derate_percent_pixel = 17,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 57,
- .dcfclk_derate_percent = 75,
- },
- .system_idle_average = {
- .dram_derate_percent_pixel = 17,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 57,
- .dcfclk_derate_percent = 100,
- },
- },
- .writeback = {
- .base_latency_us = 12,
- .scaling_factor_us = 0,
- .scaling_factor_mhz = 0,
- },
- .qos_params = {
- .dcn4x = {
- .df_qos_response_time_fclk_cycles = 300,
- .max_round_trip_to_furthest_cs_fclk_cycles = 350,
- .mall_overhead_fclk_cycles = 50,
- .meta_trip_adder_fclk_cycles = 36,
- .average_transport_distance_fclk_cycles = 257,
- .umc_urgent_ramp_latency_margin = 50,
- .umc_max_latency_margin = 30,
- .umc_average_latency_margin = 20,
- .fabric_max_transport_latency_margin = 20,
- .fabric_average_transport_latency_margin = 10,
-
- .per_uclk_dpm_params = {
- {
- .minimum_uclk_khz = 97,
- .urgent_ramp_uclk_cycles = 472,
- .trip_to_memory_uclk_cycles = 827,
- .meta_trip_to_memory_uclk_cycles = 827,
- .maximum_latency_when_urgent_uclk_cycles = 72,
- .average_latency_when_urgent_uclk_cycles = 61,
- .maximum_latency_when_non_urgent_uclk_cycles = 827,
- .average_latency_when_non_urgent_uclk_cycles = 118,
- },
- {
- .minimum_uclk_khz = 435,
- .urgent_ramp_uclk_cycles = 546,
- .trip_to_memory_uclk_cycles = 848,
- .meta_trip_to_memory_uclk_cycles = 848,
- .maximum_latency_when_urgent_uclk_cycles = 146,
- .average_latency_when_urgent_uclk_cycles = 90,
- .maximum_latency_when_non_urgent_uclk_cycles = 848,
- .average_latency_when_non_urgent_uclk_cycles = 135,
- },
- {
- .minimum_uclk_khz = 731,
- .urgent_ramp_uclk_cycles = 632,
- .trip_to_memory_uclk_cycles = 874,
- .meta_trip_to_memory_uclk_cycles = 874,
- .maximum_latency_when_urgent_uclk_cycles = 232,
- .average_latency_when_urgent_uclk_cycles = 124,
- .maximum_latency_when_non_urgent_uclk_cycles = 874,
- .average_latency_when_non_urgent_uclk_cycles = 155,
- },
- {
- .minimum_uclk_khz = 1187,
- .urgent_ramp_uclk_cycles = 716,
- .trip_to_memory_uclk_cycles = 902,
- .meta_trip_to_memory_uclk_cycles = 902,
- .maximum_latency_when_urgent_uclk_cycles = 316,
- .average_latency_when_urgent_uclk_cycles = 160,
- .maximum_latency_when_non_urgent_uclk_cycles = 902,
- .average_latency_when_non_urgent_uclk_cycles = 177,
- },
- },
- },
- },
- .qos_type = dml2_qos_param_type_dcn4x,
-};
-
-static const struct dml2_soc_bb dml2_socbb_dcn31 = {
- .clk_table = {
- .uclk = {
- .clk_values_khz = {97000, 435000, 731000, 1187000},
- .num_clk_values = 4,
- },
- .fclk = {
- .clk_values_khz = {300000, 2500000},
- .num_clk_values = 2,
- },
- .dcfclk = {
- .clk_values_khz = {200000, 1800000},
- .num_clk_values = 2,
- },
- .dispclk = {
- .clk_values_khz = {100000, 2000000},
- .num_clk_values = 2,
- },
- .dppclk = {
- .clk_values_khz = {100000, 2000000},
- .num_clk_values = 2,
- },
- .dtbclk = {
- .clk_values_khz = {100000, 2000000},
- .num_clk_values = 2,
- },
- .phyclk = {
- .clk_values_khz = {810000, 810000},
- .num_clk_values = 2,
- },
- .socclk = {
- .clk_values_khz = {300000, 1600000},
- .num_clk_values = 2,
- },
- .dscclk = {
- .clk_values_khz = {666667, 666667},
- .num_clk_values = 2,
- },
- .phyclk_d18 = {
- .clk_values_khz = {625000, 625000},
- .num_clk_values = 2,
- },
- .phyclk_d32 = {
- .clk_values_khz = {2000000, 2000000},
- .num_clk_values = 2,
- },
- .dram_config = {
- .channel_width_bytes = 2,
- .channel_count = 16,
- .transactions_per_clock = 16,
- },
- },
-
- .qos_parameters = {
- .derate_table = {
- .system_active_urgent = {
- .dram_derate_percent_pixel = 22,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 76,
- .dcfclk_derate_percent = 100,
- },
- .system_active_average = {
- .dram_derate_percent_pixel = 17,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 57,
- .dcfclk_derate_percent = 75,
- },
- .dcn_mall_prefetch_urgent = {
- .dram_derate_percent_pixel = 22,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 76,
- .dcfclk_derate_percent = 100,
- },
- .dcn_mall_prefetch_average = {
- .dram_derate_percent_pixel = 17,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 57,
- .dcfclk_derate_percent = 75,
- },
- .system_idle_average = {
- .dram_derate_percent_pixel = 17,
- .dram_derate_percent_vm = 0,
- .dram_derate_percent_pixel_and_vm = 0,
- .fclk_derate_percent = 57,
- .dcfclk_derate_percent = 100,
- },
- },
- .writeback = {
- .base_latency_us = 0,
- .scaling_factor_us = 0,
- .scaling_factor_mhz = 0,
- },
- .qos_params = {
- .dcn4x = {
- .df_qos_response_time_fclk_cycles = 300,
- .max_round_trip_to_furthest_cs_fclk_cycles = 350,
- .mall_overhead_fclk_cycles = 50,
- .meta_trip_adder_fclk_cycles = 36,
- .average_transport_distance_fclk_cycles = 260,
- .umc_urgent_ramp_latency_margin = 50,
- .umc_max_latency_margin = 30,
- .umc_average_latency_margin = 20,
- .fabric_max_transport_latency_margin = 20,
- .fabric_average_transport_latency_margin = 10,
-
- .per_uclk_dpm_params = {
- {
- // State 1
- .minimum_uclk_khz = 0,
- .urgent_ramp_uclk_cycles = 472,
- .trip_to_memory_uclk_cycles = 827,
- .meta_trip_to_memory_uclk_cycles = 827,
- .maximum_latency_when_urgent_uclk_cycles = 72,
- .average_latency_when_urgent_uclk_cycles = 72,
- .maximum_latency_when_non_urgent_uclk_cycles = 827,
- .average_latency_when_non_urgent_uclk_cycles = 117,
- },
- {
- // State 2
- .minimum_uclk_khz = 0,
- .urgent_ramp_uclk_cycles = 546,
- .trip_to_memory_uclk_cycles = 848,
- .meta_trip_to_memory_uclk_cycles = 848,
- .maximum_latency_when_urgent_uclk_cycles = 146,
- .average_latency_when_urgent_uclk_cycles = 146,
- .maximum_latency_when_non_urgent_uclk_cycles = 848,
- .average_latency_when_non_urgent_uclk_cycles = 133,
- },
- {
- // State 3
- .minimum_uclk_khz = 0,
- .urgent_ramp_uclk_cycles = 564,
- .trip_to_memory_uclk_cycles = 853,
- .meta_trip_to_memory_uclk_cycles = 853,
- .maximum_latency_when_urgent_uclk_cycles = 164,
- .average_latency_when_urgent_uclk_cycles = 164,
- .maximum_latency_when_non_urgent_uclk_cycles = 853,
- .average_latency_when_non_urgent_uclk_cycles = 136,
- },
- {
- // State 4
- .minimum_uclk_khz = 0,
- .urgent_ramp_uclk_cycles = 613,
- .trip_to_memory_uclk_cycles = 869,
- .meta_trip_to_memory_uclk_cycles = 869,
- .maximum_latency_when_urgent_uclk_cycles = 213,
- .average_latency_when_urgent_uclk_cycles = 213,
- .maximum_latency_when_non_urgent_uclk_cycles = 869,
- .average_latency_when_non_urgent_uclk_cycles = 149,
- },
- {
- // State 5
- .minimum_uclk_khz = 0,
- .urgent_ramp_uclk_cycles = 632,
- .trip_to_memory_uclk_cycles = 874,
- .meta_trip_to_memory_uclk_cycles = 874,
- .maximum_latency_when_urgent_uclk_cycles = 232,
- .average_latency_when_urgent_uclk_cycles = 232,
- .maximum_latency_when_non_urgent_uclk_cycles = 874,
- .average_latency_when_non_urgent_uclk_cycles = 153,
- },
- {
- // State 6
- .minimum_uclk_khz = 0,
- .urgent_ramp_uclk_cycles = 665,
- .trip_to_memory_uclk_cycles = 885,
- .meta_trip_to_memory_uclk_cycles = 885,
- .maximum_latency_when_urgent_uclk_cycles = 265,
- .average_latency_when_urgent_uclk_cycles = 265,
- .maximum_latency_when_non_urgent_uclk_cycles = 885,
- .average_latency_when_non_urgent_uclk_cycles = 161,
- },
- {
- // State 7
- .minimum_uclk_khz = 0,
- .urgent_ramp_uclk_cycles = 689,
- .trip_to_memory_uclk_cycles = 895,
- .meta_trip_to_memory_uclk_cycles = 895,
- .maximum_latency_when_urgent_uclk_cycles = 289,
- .average_latency_when_urgent_uclk_cycles = 289,
- .maximum_latency_when_non_urgent_uclk_cycles = 895,
- .average_latency_when_non_urgent_uclk_cycles = 167,
- },
- {
- // State 8
- .minimum_uclk_khz = 0,
- .urgent_ramp_uclk_cycles = 716,
- .trip_to_memory_uclk_cycles = 902,
- .meta_trip_to_memory_uclk_cycles = 902,
- .maximum_latency_when_urgent_uclk_cycles = 316,
- .average_latency_when_urgent_uclk_cycles = 316,
- .maximum_latency_when_non_urgent_uclk_cycles = 902,
- .average_latency_when_non_urgent_uclk_cycles = 174,
- },
- },
- },
- },
- .qos_type = dml2_qos_param_type_dcn4x,
- },
-
- .power_management_parameters = {
- .dram_clk_change_blackout_us = 400,
- .fclk_change_blackout_us = 0,
- .g7_ppt_blackout_us = 0,
- .stutter_enter_plus_exit_latency_us = 50,
- .stutter_exit_latency_us = 43,
- .z8_stutter_enter_plus_exit_latency_us = 0,
- .z8_stutter_exit_latency_us = 0,
- },
-
- .vmin_limit = {
- .dispclk_khz = 600 * 1000,
- },
-
- .dprefclk_mhz = 700,
- .xtalclk_mhz = 100,
- .pcie_refclk_mhz = 100,
- .dchub_refclk_mhz = 50,
- .mall_allocated_for_dcn_mbytes = 64,
- .max_outstanding_reqs = 512,
- .fabric_datapath_to_dcn_data_return_bytes = 64,
- .return_bus_width_bytes = 64,
- .hostvm_min_page_size_kbytes = 0,
- .gpuvm_min_page_size_kbytes = 256,
- .phy_downspread_percent = 0,
- .dcn_downspread_percent = 0,
- .dispclk_dppclk_vco_speed_mhz = 4500,
- .do_urgent_latency_adjustment = 0,
- .mem_word_bytes = 32,
- .num_dcc_mcaches = 8,
- .mcache_size_bytes = 2048,
- .mcache_line_size_bytes = 32,
- .max_fclk_for_uclk_dpm_khz = 1250 * 1000,
-};
-
-static const struct dml2_ip_capabilities dml2_dcn31_max_ip_caps = {
- .pipe_count = 4,
- .otg_count = 4,
- .num_dsc = 4,
- .max_num_dp2p0_streams = 4,
- .max_num_hdmi_frl_outputs = 1,
- .max_num_dp2p0_outputs = 4,
- .rob_buffer_size_kbytes = 192,
- .config_return_buffer_size_in_kbytes = 1152,
- .meta_fifo_size_in_kentries = 22,
- .compressed_buffer_segment_size_in_kbytes = 64,
- .subvp_drr_scheduling_margin_us = 100,
- .subvp_prefetch_end_to_mall_start_us = 15,
- .subvp_fw_processing_delay = 15,
-
- .fams2 = {
- .max_allow_delay_us = 100 * 1000,
- .scheduling_delay_us = 50,
- .vertical_interrupt_ack_delay_us = 18,
- .allow_programming_delay_us = 18,
- .min_allow_width_us = 20,
- .subvp_df_throttle_delay_us = 100,
- .subvp_programming_delay_us = 18,
- .subvp_prefetch_to_mall_delay_us = 18,
- .drr_programming_delay_us = 18,
- },
-};
-
-#endif /* __DML_DML_DCN3_SOC_BB__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h
index 8ef7977841de..793e1c038efd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h
@@ -344,6 +344,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = {
.config_return_buffer_segment_size_in_kbytes = 64,
.meta_fifo_size_in_kentries = 22,
.compressed_buffer_segment_size_in_kbytes = 64,
+ .cursor_buffer_size = 24,
.max_flip_time_us = 80,
.max_flip_time_lines = 32,
.hostvm_mode = 0,
@@ -354,7 +355,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = {
.fams2 = {
.max_allow_delay_us = 100 * 1000,
- .scheduling_delay_us = 125,
+ .scheduling_delay_us = 550,
.vertical_interrupt_ack_delay_us = 40,
.allow_programming_delay_us = 18,
.min_allow_width_us = 20,
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h
index b132f676a68d..5e1ab6d97640 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h
@@ -10,9 +10,10 @@
#define DML2_MAX_PLANES 8
#define DML2_MAX_DCN_PIPES 8
#define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches
+#define DML2_MAX_WRITEBACK 3
enum dml2_swizzle_mode {
- dml2_sw_linear,
+ dml2_sw_linear, // SW_LINEAR accepts 256 byte aligned pitch and also 128 byte aligned pitch if DCC is not enabled
dml2_sw_256b_2d,
dml2_sw_4kb_2d,
dml2_sw_64kb_2d,
@@ -24,7 +25,8 @@ enum dml2_swizzle_mode {
dml2_gfx11_sw_64kb_d_x,
dml2_gfx11_sw_64kb_r_x,
dml2_gfx11_sw_256kb_d_x,
- dml2_gfx11_sw_256kb_r_x
+ dml2_gfx11_sw_256kb_r_x,
+
};
enum dml2_source_format_class {
@@ -38,7 +40,13 @@ enum dml2_source_format_class {
dml2_rgbe_alpha = 9,
dml2_rgbe = 10,
dml2_mono_8 = 11,
- dml2_mono_16 = 12
+ dml2_mono_16 = 12,
+ dml2_422_planar_8 = 13,
+ dml2_422_planar_10 = 14,
+ dml2_422_planar_12 = 15,
+ dml2_422_packed_8 = 16,
+ dml2_422_packed_10 = 17,
+ dml2_422_packed_12 = 18
};
enum dml2_rotation_angle {
@@ -121,15 +129,6 @@ enum dml2_dsc_enable_option {
dml2_dsc_enable_if_necessary = 2
};
-enum dml2_pstate_support_method {
- dml2_pstate_method_uninitialized,
- dml2_pstate_method_not_supported,
- dml2_pstate_method_vactive,
- dml2_pstate_method_vblank,
- dml2_pstate_method_svp,
- dml2_pstate_method_drr
-};
-
enum dml2_tdlut_addressing_mode {
dml2_tdlut_sw_linear = 0,
dml2_tdlut_simple_linear = 1
@@ -287,22 +286,23 @@ struct dml2_link_output_cfg {
bool validate_output; // Do not validate the link configuration for this display stream.
};
-struct dml2_writeback_cfg {
- bool enable;
+struct dml2_writeback_info {
enum dml2_source_format_class pixel_format;
- unsigned int active_writebacks_per_surface;
+ unsigned long input_width;
+ unsigned long input_height;
+ unsigned long output_width;
+ unsigned long output_height;
+ unsigned long v_taps;
+ unsigned long h_taps;
+ unsigned long v_taps_chroma;
+ unsigned long h_taps_chroma;
+ double h_ratio;
+ double v_ratio;
+};
- struct {
- bool enabled;
- unsigned long input_width;
- unsigned long input_height;
- unsigned long output_width;
- unsigned long output_height;
- unsigned long v_taps;
- unsigned long h_taps;
- double h_ratio;
- double v_ratio;
- } scaling_info;
+struct dml2_writeback_cfg {
+ unsigned int active_writebacks_per_stream;
+ struct dml2_writeback_info writeback_stream[DML2_MAX_WRITEBACK];
};
struct dml2_plane_parameters {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h
index ebd8abe894a9..5f0bc42d1d2f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h
@@ -167,11 +167,13 @@ struct dml2_ip_capabilities {
unsigned int max_num_dp2p0_streams;
unsigned int max_num_hdmi_frl_outputs;
unsigned int max_num_dp2p0_outputs;
+ unsigned int max_num_wb;
unsigned int rob_buffer_size_kbytes;
unsigned int config_return_buffer_size_in_kbytes;
unsigned int config_return_buffer_segment_size_in_kbytes;
unsigned int meta_fifo_size_in_kentries;
unsigned int compressed_buffer_segment_size_in_kbytes;
+ unsigned int cursor_buffer_size;
unsigned int max_flip_time_us;
unsigned int max_flip_time_lines;
unsigned int hostvm_mode;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
index eeb96c455658..d2d053f2354d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h
@@ -26,20 +26,14 @@ enum dml2_project_id {
dml2_project_dcn4x_stage2_auto_drr_svp = 3,
};
-enum dml2_dram_clock_change_support {
- dml2_dram_clock_change_vactive = 0,
- dml2_dram_clock_change_vblank = 1,
- dml2_dram_clock_change_vblank_and_vactive = 2,
- dml2_dram_clock_change_drr = 3,
- dml2_dram_clock_change_mall_svp = 4,
- dml2_dram_clock_change_mall_full_frame = 6,
- dml2_dram_clock_change_unsupported = 7
-};
-
-enum dml2_fclock_change_support {
- dml2_fclock_change_vactive = 0,
- dml2_fclock_change_vblank = 1,
- dml2_fclock_change_unsupported = 2
+enum dml2_pstate_change_support {
+ dml2_pstate_change_vactive = 0,
+ dml2_pstate_change_vblank = 1,
+ dml2_pstate_change_vblank_and_vactive = 2,
+ dml2_pstate_change_drr = 3,
+ dml2_pstate_change_mall_svp = 4,
+ dml2_pstate_change_mall_full_frame = 6,
+ dml2_pstate_change_unsupported = 7
};
enum dml2_output_type_and_rate__type {
@@ -202,24 +196,23 @@ struct dml2_mcache_surface_allocation {
} informative;
};
-enum dml2_uclk_pstate_support_method {
- dml2_uclk_pstate_support_method_not_supported = 0,
- /* hw */
- dml2_uclk_pstate_support_method_vactive = 1,
- dml2_uclk_pstate_support_method_vblank = 2,
- dml2_uclk_pstate_support_method_reserved_hw = 5,
- /* fw */
- dml2_uclk_pstate_support_method_fw_subvp_phantom = 6,
- dml2_uclk_pstate_support_method_reserved_fw = 10,
- /* fw w/drr */
- dml2_uclk_pstate_support_method_fw_vactive_drr = 11,
- dml2_uclk_pstate_support_method_fw_vblank_drr = 12,
- dml2_uclk_pstate_support_method_fw_subvp_phantom_drr = 13,
- dml2_uclk_pstate_support_method_reserved_fw_drr_fixed = 20,
- dml2_uclk_pstate_support_method_fw_drr = 21,
- dml2_uclk_pstate_support_method_reserved_fw_drr_var = 22,
-
- dml2_uclk_pstate_support_method_count
+enum dml2_pstate_method {
+ dml2_pstate_method_na = 0,
+ /* hw exclusive modes */
+ dml2_pstate_method_vactive = 1,
+ dml2_pstate_method_vblank = 2,
+ dml2_pstate_method_reserved_hw = 5,
+ /* fw assisted exclusive modes */
+ dml2_pstate_method_fw_svp = 6,
+ dml2_pstate_method_reserved_fw = 10,
+ /* fw assisted modes requiring drr modulation */
+ dml2_pstate_method_fw_vactive_drr = 11,
+ dml2_pstate_method_fw_vblank_drr = 12,
+ dml2_pstate_method_fw_svp_drr = 13,
+ dml2_pstate_method_reserved_fw_drr_clamped = 20,
+ dml2_pstate_method_fw_drr = 21,
+ dml2_pstate_method_reserved_fw_drr_var = 22,
+ dml2_pstate_method_count
};
struct dml2_per_plane_programming {
@@ -241,7 +234,7 @@ struct dml2_per_plane_programming {
// If a stream is using odm split, then this value is always 1
unsigned int num_dpps_required;
- enum dml2_uclk_pstate_support_method uclk_pstate_support_method;
+ enum dml2_pstate_method uclk_pstate_support_method;
// MALL size requirements for MALL SS and SubVP
unsigned int surface_size_mall_bytes;
@@ -281,7 +274,7 @@ struct dml2_per_stream_programming {
unsigned int num_odms_required;
- enum dml2_uclk_pstate_support_method uclk_pstate_method;
+ enum dml2_pstate_method uclk_pstate_method;
struct {
bool enabled;
@@ -289,7 +282,8 @@ struct dml2_per_stream_programming {
union dml2_global_sync_programming global_sync;
} phantom_stream;
- struct dmub_fams2_stream_static_state fams2_params;
+ union dmub_cmd_fams2_config fams2_base_params;
+ union dmub_cmd_fams2_config fams2_sub_params;
};
//-----------------
@@ -339,7 +333,7 @@ struct dml2_mode_support_info {
bool DCCMetaBufferSizeNotExceeded;
bool TotalVerticalActiveBandwidthSupport;
bool VActiveBandwidthSupport;
- enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES];
+ enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES];
bool USRRetrainingSupport;
bool PrefetchSupported;
bool DynamicMetadataSupported;
@@ -361,6 +355,7 @@ struct dml2_mode_support_info {
unsigned int AlignedYPitch[DML2_MAX_PLANES];
unsigned int AlignedCPitch[DML2_MAX_PLANES];
bool g6_temp_read_support;
+ bool temp_read_or_ppt_support;
}; // dml2_mode_support_info
struct dml2_display_cfg_programming {
@@ -392,6 +387,11 @@ struct dml2_display_cfg_programming {
unsigned long fclk_khz;
unsigned long dcfclk_khz;
} svp_prefetch;
+ struct {
+ unsigned long uclk_khz;
+ unsigned long fclk_khz;
+ unsigned long dcfclk_khz;
+ } svp_prefetch_no_throttle;
unsigned long deepsleep_dcfclk_khz;
unsigned long dispclk_khz;
@@ -444,7 +444,7 @@ struct dml2_display_cfg_programming {
double pstate_change_us;
double fclk_pstate_change_us;
double usr_retraining_us;
- double g6_temp_read_watermark_us;
+ double temp_read_or_ppt_watermark_us;
} watermarks;
struct {
@@ -653,6 +653,7 @@ struct dml2_display_cfg_programming {
double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES];
double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES];
+ double WritebackRequiredBandwidth;
double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES];
double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES];
double DSCCLK_calculated[DML2_MAX_PLANES];
@@ -662,6 +663,7 @@ struct dml2_display_cfg_programming {
double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES];
unsigned int PrefetchMode[DML2_MAX_PLANES]; // LEGACY_ONLY
bool ROBUrgencyAvoidance;
+ double LowestPrefetchMargin;
} misc;
struct dml2_mode_support_info mode_support_info;
@@ -675,6 +677,7 @@ struct dml2_display_cfg_programming {
bool failed_mcache_validation;
bool failed_dpmm;
bool failed_mode_programming;
+ bool failed_map_watermarks;
} informative;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
index 3d41ffde91c1..d68b4567e218 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c
@@ -9,7 +9,7 @@
#include "dml2_debug.h"
#include "lib_float_math.h"
-static const struct dml2_core_ip_params core_dcn4_ip_caps_base = {
+struct dml2_core_ip_params core_dcn4_ip_caps_base = {
// Hardcoded values for DCN3x
.vblank_nom_default_us = 668,
.remote_iommu_outstanding_translations = 256,
@@ -90,6 +90,7 @@ static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *i
ip_caps->config_return_buffer_segment_size_in_kbytes = ip_params->config_return_buffer_segment_size_in_kbytes;
ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries;
ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes;
+ ip_caps->cursor_buffer_size = ip_params->cursor_buffer_size;
ip_caps->max_flip_time_us = ip_params->max_flip_time_us;
ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines;
ip_caps->hostvm_mode = ip_params->hostvm_mode;
@@ -114,6 +115,7 @@ static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params,
ip_params->config_return_buffer_segment_size_in_kbytes = ip_caps->config_return_buffer_segment_size_in_kbytes;
ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries;
ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes;
+ ip_params->cursor_buffer_size = ip_caps->cursor_buffer_size;
ip_params->max_flip_time_us = ip_caps->max_flip_time_us;
ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines;
ip_params->hostvm_mode = ip_caps->hostvm_mode;
@@ -316,28 +318,9 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in
// Setup the appropriate p-state strategy
if (display_cfg->stage3.performed && display_cfg->stage3.success) {
- switch (display_cfg->stage3.pstate_switch_modes[plane_index]) {
- case dml2_uclk_pstate_support_method_vactive:
- case dml2_uclk_pstate_support_method_vblank:
- case dml2_uclk_pstate_support_method_fw_subvp_phantom:
- case dml2_uclk_pstate_support_method_fw_drr:
- case dml2_uclk_pstate_support_method_fw_vactive_drr:
- case dml2_uclk_pstate_support_method_fw_vblank_drr:
- case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr:
- programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index];
- break;
- case dml2_uclk_pstate_support_method_reserved_hw:
- case dml2_uclk_pstate_support_method_reserved_fw:
- case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed:
- case dml2_uclk_pstate_support_method_reserved_fw_drr_var:
- case dml2_uclk_pstate_support_method_not_supported:
- case dml2_uclk_pstate_support_method_count:
- default:
- programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported;
- break;
- }
+ programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index];
} else {
- programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported;
+ programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na;
}
dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index);
@@ -360,7 +343,8 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in
/* unconditionally populate fams2 params */
dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib,
display_cfg,
- &programming->stream_programming[main_plane->stream_index].fams2_params,
+ &programming->stream_programming[main_plane->stream_index].fams2_base_params,
+ &programming->stream_programming[main_plane->stream_index].fams2_sub_params,
programming->stream_programming[main_plane->stream_index].uclk_pstate_method,
plane_index);
@@ -572,18 +556,18 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out
in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index];
if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe)
- in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom;
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe)
- in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom;
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
- in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom;
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp;
else {
if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us)
- in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive;
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vactive;
else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us)
- in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank;
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vblank;
else
- in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported;
+ in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na;
}
dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
index 601320b1be81..c4dbf27abaf8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
@@ -11,6 +11,9 @@
#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
#define DML_MAX_NUM_OF_SLICES_PER_DSC 4
+#define DML_MAX_COMPRESSION_RATIO 4
+//#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW
+//#define DML_GLOBAL_PREFETCH_CHECK
#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
@@ -132,9 +135,9 @@ static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_su
dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
if (!fail_only || support->VRatioInPrefetchSupported == 0)
dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
- if (!fail_only || support->PTEBufferSizeNotExceeded == 1)
+ if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
- if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1)
+ if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
if (!fail_only || support->ExceededMALLSize == 1)
dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
@@ -315,12 +318,11 @@ dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory);
dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark);
dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark);
-dml_get_var_func(wm_g6_temp_read, double, mode_lib->mp.Watermark.g6_temp_read_watermark_us);
+dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us);
dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth);
dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL);
-dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency);
@@ -355,7 +357,9 @@ dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_ban
dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]);
+dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us);
+dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us);
dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us);
dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us);
@@ -466,6 +470,24 @@ static bool dml_is_420(enum dml2_source_format_class source_format)
case dml2_420_12:
val = 1;
break;
+ case dml2_422_planar_8:
+ val = 0;
+ break;
+ case dml2_422_planar_10:
+ val = 0;
+ break;
+ case dml2_422_planar_12:
+ val = 0;
+ break;
+ case dml2_422_packed_8:
+ val = 0;
+ break;
+ case dml2_422_packed_10:
+ val = 0;
+ break;
+ case dml2_422_packed_12:
+ val = 0;
+ break;
case dml2_rgbe_alpha:
val = 0;
break;
@@ -487,32 +509,31 @@ static bool dml_is_420(enum dml2_source_format_class source_format)
static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
{
- switch (sw_mode) {
- case (dml2_sw_linear):
- return 256; break;
- case (dml2_sw_256b_2d):
- return 256; break;
- case (dml2_sw_4kb_2d):
- return 4096; break;
- case (dml2_sw_64kb_2d):
- return 65536; break;
- case (dml2_sw_256kb_2d):
- return 262144; break;
- case (dml2_gfx11_sw_linear):
- return 256; break;
- case (dml2_gfx11_sw_64kb_d):
- return 65536; break;
- case (dml2_gfx11_sw_64kb_d_t):
- return 65536; break;
- case (dml2_gfx11_sw_64kb_d_x):
- return 65536; break;
- case (dml2_gfx11_sw_64kb_r_x):
- return 65536; break;
- case (dml2_gfx11_sw_256kb_d_x):
- return 262144; break;
- case (dml2_gfx11_sw_256kb_r_x):
- return 262144; break;
- default:
+ if (sw_mode == dml2_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_sw_256b_2d)
+ return 256;
+ else if (sw_mode == dml2_sw_4kb_2d)
+ return 4096;
+ else if (sw_mode == dml2_sw_64kb_2d)
+ return 65536;
+ else if (sw_mode == dml2_sw_256kb_2d)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
+ return 262144;
+ else {
DML2_ASSERT(0);
return 256;
}
@@ -579,8 +600,8 @@ static void CalculateBytePerPixelAndBlockSizes(
{
*BytePerPixelDETY = 0;
*BytePerPixelDETC = 0;
- *BytePerPixelY = 0;
- *BytePerPixelC = 0;
+ *BytePerPixelY = 1;
+ *BytePerPixelC = 1;
if (SourcePixelFormat == dml2_444_64) {
*BytePerPixelDETY = 8;
@@ -820,7 +841,7 @@ static void CalculateSwathWidth(
// Output
unsigned int req_per_swath_ub_l[],
unsigned int req_per_swath_ub_c[],
- unsigned int SwathWidthSingleDPPY[],
+ unsigned int SwathWidthSingleDPPY[], // post-rotated plane width
unsigned int SwathWidthSingleDPPC[],
unsigned int SwathWidthY[], // per-pipe
unsigned int SwathWidthC[], // per-pipe
@@ -1403,7 +1424,6 @@ static unsigned int dscceComputeDelay(
// N422/N420 operate at 2 pixels per clock
unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
-
if (pixelFormat == dml2_420)
pixelsPerClock = 2;
// #all other modes operate at 1 pixel per clock
@@ -1428,7 +1448,6 @@ static unsigned int dscceComputeDelay(
}
}
-
//sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
if (bpc == 8)
ssm_group_priming_delay = 83;
@@ -1447,9 +1466,6 @@ static unsigned int dscceComputeDelay(
//determine number of padded pixels in the last group of a slice line, computed as
slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
-
-
-
//determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
@@ -1463,7 +1479,6 @@ static unsigned int dscceComputeDelay(
//number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
-
//number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
//needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
@@ -1506,7 +1521,6 @@ static unsigned int dscceComputeDelay(
return pixels;
}
-
//updated in dcn4
static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
{
@@ -2090,7 +2104,6 @@ static void CalculateDCCConfiguration(
yuv420 = 1;
else
yuv420 = 0;
-
horz_div_l = 1;
horz_div_c = 1;
vert_div_l = 1;
@@ -2561,8 +2574,7 @@ static void calculate_mcache_setting(
if (*p->num_mcaches_l) {
l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
}
-
- if (l->is_dual_plane && *p->num_mcaches_c) {
+ if (l->is_dual_plane) {
l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
@@ -2682,12 +2694,12 @@ static double dml_get_return_bandwidth_available(
bool is_avg_bw,
bool is_hvm_en,
bool is_hvm_only,
- double dcflk_mhz,
+ double dcfclk_mhz,
double fclk_mhz,
double dram_bw_mbps)
{
double return_bw_mbps = 0.;
- double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcflk_mhz;
+ double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz;
double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
@@ -2753,7 +2765,7 @@ static double dml_get_return_bandwidth_available(
dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
- dml2_printf("DML::%s: dcflk_mhz = %f\n", __func__, dcflk_mhz);
+ dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
@@ -3516,10 +3528,9 @@ static void CalculateUrgentBurstFactor(
dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
#endif
-
}
-static void CalculateDCFCLKDeepSleep(
+static void CalculateDCFCLKDeepSleepTdlut(
const struct dml2_display_cfg *display_cfg,
unsigned int NumberOfActiveSurfaces,
unsigned int BytePerPixelY[],
@@ -3534,6 +3545,10 @@ static void CalculateDCFCLKDeepSleep(
double ReadBandwidthChroma[],
unsigned int ReturnBusWidth,
+ double dispclk,
+ unsigned int tdlut_bytes_to_deliver[],
+ double prefetch_swath_time_us[],
+
// Output
double *DCFClkDeepSleep)
{
@@ -3568,6 +3583,22 @@ static void CalculateDCFCLKDeepSleep(
}
DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
+ // adjust for 3dlut delivery time
+ if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) {
+ double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k];
+
+ dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
+ dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
+ dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
+ dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
+
+ // increase the deepsleep dcfclk to match the original dispclk throughput rate
+ if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) {
+ DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk);
+ DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0);
+ }
+ }
+
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
@@ -3590,9 +3621,56 @@ static void CalculateDCFCLKDeepSleep(
for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
*DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
}
+
dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
}
+static void CalculateDCFCLKDeepSleep(
+ const struct dml2_display_cfg *display_cfg,
+ unsigned int NumberOfActiveSurfaces,
+ unsigned int BytePerPixelY[],
+ unsigned int BytePerPixelC[],
+ unsigned int SwathWidthY[],
+ unsigned int SwathWidthC[],
+ unsigned int DPPPerSurface[],
+ double PSCL_THROUGHPUT[],
+ double PSCL_THROUGHPUT_CHROMA[],
+ double Dppclk[],
+ double ReadBandwidthLuma[],
+ double ReadBandwidthChroma[],
+ unsigned int ReturnBusWidth,
+
+ // Output
+ double *DCFClkDeepSleep)
+{
+ double zero_double[DML2_MAX_PLANES];
+ unsigned int zero_integer[DML2_MAX_PLANES];
+
+ memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double));
+ memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int));
+
+ CalculateDCFCLKDeepSleepTdlut(
+ display_cfg,
+ NumberOfActiveSurfaces,
+ BytePerPixelY,
+ BytePerPixelC,
+ SwathWidthY,
+ SwathWidthC,
+ DPPPerSurface,
+ PSCL_THROUGHPUT,
+ PSCL_THROUGHPUT_CHROMA,
+ Dppclk,
+ ReadBandwidthLuma,
+ ReadBandwidthChroma,
+ ReturnBusWidth,
+ 0,
+ zero_integer, //tdlut_bytes_to_deliver,
+ zero_double, //prefetch_swath_time_us,
+
+ // Output
+ DCFClkDeepSleep);
+}
+
static double CalculateWriteBackDelay(
enum dml2_source_format_class WritebackPixelFormat,
double WritebackHRatio,
@@ -3816,8 +3894,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch
p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
- p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
- p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
+ p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;;
+ p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;;
}
if (p->SwathHeightC[k] == 0)
@@ -4592,6 +4670,7 @@ static void calculate_tdlut_setting(
*p->tdlut_groups_per_2row_ub = 0;
*p->tdlut_opt_time = 0;
*p->tdlut_drain_time = 0;
+ *p->tdlut_bytes_to_deliver = 0;
*p->tdlut_bytes_per_group = 0;
*p->tdlut_pte_bytes_per_frame = 0;
*p->tdlut_bytes_per_frame = 0;
@@ -4660,6 +4739,7 @@ static void calculate_tdlut_setting(
*p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
*p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
*p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
+ *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0);
}
#ifdef __DML_VBA_DEBUG__
@@ -4680,6 +4760,7 @@ static void calculate_tdlut_setting(
dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles);
dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
+ dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
#endif
}
@@ -5069,20 +5150,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
s->trip_to_mem = 0.0;
*p->Tvm_trips = 0.0;
*p->Tr0_trips = 0.0;
- s->Tvm_no_trip_oto = 0.0;
- s->Tr0_no_trip_oto = 0.0;
s->Tvm_trips_rounded = 0.0;
s->Tr0_trips_rounded = 0.0;
s->max_Tsw = 0.0;
s->Lsw_oto = 0.0;
- s->Tpre_rounded = 0.0;
+ *p->Tpre_rounded = 0.0;
s->prefetch_bw_equ = 0.0;
s->Tvm_equ = 0.0;
s->Tr0_equ = 0.0;
s->Tdmbf = 0.0;
s->Tdmec = 0.0;
s->Tdmsks = 0.0;
- s->prefetch_sw_bytes = 0.0;
+ *p->prefetch_sw_bytes = 0.0;
s->prefetch_bw_pr = 0.0;
s->bytes_pp = 0.0;
s->dep_bytes = 0.0;
@@ -5207,6 +5286,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
+ dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
#endif
if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
@@ -5277,23 +5357,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
}
- s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (double)p->myPipe->DPPPerSurface;
- if (p->myPipe->VRatio < 1.0)
- s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
- s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);
-
- s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
- s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor;
- s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
- s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);
-
- s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
- s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
- s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
-
- s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
- s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
- s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
+ *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
+ *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
@@ -5302,57 +5367,103 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
+
+ s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
+ s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
+ s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
+
+ // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto
+ // Note: in prefetch calculation, acounting is done mostly per-pipe.
+ // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time
+ s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface;
+
+ // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1)
+ s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime;
+
+ if (p->myPipe->BytePerPixelC > 0) {
+ s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface;
+ s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime;
+ }
+
+ s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
+
+ s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime));
+
+ s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0;
+
s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
(p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
- s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;
+
+#ifdef __DML_VBA_DEBUG__
+ dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
+ dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
+ dml2_printf("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
+#endif
if (p->display_cfg->gpuvm_enable == true) {
- s->Tvm_no_trip_oto = math_max2(
+ s->Tvm_oto = math_max3(
+ *p->Tvm_trips,
*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
s->LineTime / 4.0);
- s->Tvm_oto = math_max2(
- *p->Tvm_trips,
- s->Tvm_no_trip_oto);
+
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
#endif
} else {
- s->Tvm_no_trip_oto = s->Tvm_trips_rounded;
s->Tvm_oto = s->Tvm_trips_rounded;
}
if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
- s->Tr0_no_trip_oto = math_max2(
+ s->Tr0_oto = math_max3(
+ *p->Tr0_trips,
(p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
s->LineTime / 4.0);
- s->Tr0_oto = math_max2(
- *p->Tr0_trips,
- s->Tr0_no_trip_oto);
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
#endif
- } else {
- s->Tr0_no_trip_oto = (s->LineTime - s->Tvm_oto) / 4.0;
- s->Tr0_oto = s->Tr0_no_trip_oto;
- }
+ } else
+ s->Tr0_oto = s->LineTime / 4.0;
s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ dml2_printf("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
+ if (p->impacted_dst_y_pre > 0) {
+ dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
+ s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre);
+ dml2_printf("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
+ }
+#endif
+ *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime;
+
//To (time for delay after scaler) in line time
Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
+ s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
+ s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
+ s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
//Tpre_equ in line time
if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable)
s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo;
else
s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo;
+
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ);
+
+ s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+
+ if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ)
+ s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ;
+#endif
+
s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
#ifdef __DML_VBA_DEBUG__
@@ -5370,7 +5481,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
- dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
+ dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
@@ -5394,7 +5505,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
#endif
double Tpre = s->dst_y_prefetch_equ * s->LineTime;
s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
- s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
+ *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
@@ -5420,7 +5531,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
- dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre));
+ dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre));
dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
#endif
@@ -5434,78 +5545,85 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
// Tpre_rounded is Tpre rounding to 2-bit fraction
// Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time
// Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time
- // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less
- bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
+ // So that means prefetch bw calculated can be higher since the total time available for prefetch is less
+ bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
+ bool tpre_gt_req_latency = true;
+#if 0
+ // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained.
+ // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages.
+ // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary.
+ tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch);
+#endif
- if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) {
+ if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) {
s->prefetch_bw1 = 0.;
s->prefetch_bw2 = 0.;
s->prefetch_bw3 = 0.;
s->prefetch_bw4 = 0.;
// prefetch_bw1: VM + 2*R0 + SW
- if (s->Tpre_rounded - *p->Tno_bw > 0) {
+ if (*p->Tpre_rounded - *p->Tno_bw > 0) {
s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
+ 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
- + s->prefetch_sw_bytes)
- / (s->Tpre_rounded - *p->Tno_bw);
- s->Tsw_est1 = s->prefetch_sw_bytes / s->prefetch_bw1;
+ + *p->prefetch_sw_bytes)
+ / (*p->Tpre_rounded - *p->Tno_bw);
+ s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1;
} else
s->prefetch_bw1 = 0;
dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
- if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
+ if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
- (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
+ (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
- dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded);
+ dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
- dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
+ dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
#endif
}
// prefetch_bw2: VM + SW
- if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
- s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
- (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
- s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2;
+ if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
+ s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) /
+ (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
+ s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2;
} else
s->prefetch_bw2 = 0;
dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
- if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
- s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
+ if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
+ s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
}
// prefetch_bw3: 2*R0 + SW
- if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
- s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) /
- (s->Tpre_rounded - s->Tvm_trips_rounded);
- s->Tsw_est3 = s->prefetch_sw_bytes / s->prefetch_bw3;
+ if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) {
+ s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) /
+ (*p->Tpre_rounded - s->Tvm_trips_rounded);
+ s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3;
} else
s->prefetch_bw3 = 0;
dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
- if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
- s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
+ if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
+ s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
}
// prefetch_bw4: SW
- if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
- s->prefetch_bw4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
+ if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
+ s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
else
s->prefetch_bw4 = 0;
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
- dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre));
+ dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre));
dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
@@ -5617,9 +5735,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
#endif
- // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
- s->Lsw_equ = s->dst_y_prefetch_equ - math_ceil2(4.0 * (s->Tvm_equ + 2 * s->Tr0_equ) / s->LineTime, 1.0) / 4.0;
-
// Use the more stressful prefetch schedule
if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
*p->dst_y_prefetch = s->dst_y_prefetch_oto;
@@ -5628,31 +5743,33 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
*p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
*p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
- s->dst_y_per_vm_no_trip_vblank = math_ceil2(4.0 * s->Tvm_no_trip_oto / s->LineTime, 1.0) / 4.0;
- s->dst_y_per_row_no_trip_vblank = math_ceil2(4.0 * s->Tr0_no_trip_oto / s->LineTime, 1.0) / 4.0;
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__);
#endif
+
} else {
*p->dst_y_prefetch = s->dst_y_prefetch_equ;
+
+ if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted)
+ *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted;
+
s->TimeForFetchingVM = s->Tvm_equ;
s->TimeForFetchingRowInVBlank = s->Tr0_equ;
- *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
- *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
- s->dst_y_per_vm_no_trip_vblank = *p->dst_y_per_vm_vblank;
- s->dst_y_per_row_no_trip_vblank = *p->dst_y_per_row_vblank;
+ *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
+ *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
#endif
}
- /* take worst case Lsw to calculate bandwidth requirement regardless of schedule */
- s->LinesToRequestPrefetchPixelData = math_min2(s->Lsw_equ, s->Lsw_oto); // Lsw
+ // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
+ s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw
s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
*p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
+ *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime);
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
@@ -5663,6 +5780,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
+ dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
@@ -5749,8 +5867,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
} else {
dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
- dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
- __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
+ dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
+ __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
+ dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded+Tvm_trips_rounded+2.0*Tr0_trips_rounded+min_Tsw_equ (%f) should be > \n",
+ __func__, tpre_gt_req_latency, (s->min_Lsw_equ*s->LineTime + s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded), p->Turg, s->trip_to_mem, p->ExtraLatencyPrefetch);
s->NoTimeToPrefetch = true;
s->TimeForFetchingVM = 0;
s->TimeForFetchingRowInVBlank = 0;
@@ -5769,13 +5889,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
if (vm_bytes == 0) {
prefetch_vm_bw = 0;
- } else if (s->dst_y_per_vm_no_trip_vblank > 0) {
+ } else if (*p->dst_y_per_vm_vblank > 0) {
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
#endif
- prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (s->dst_y_per_vm_no_trip_vblank * s->LineTime);
+ prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
#endif
@@ -5787,8 +5907,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
prefetch_row_bw = 0;
- } else if (s->dst_y_per_row_no_trip_vblank > 0) {
- prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (s->dst_y_per_row_no_trip_vblank * s->LineTime);
+ } else if (*p->dst_y_per_row_vblank > 0) {
+ prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
@@ -5828,6 +5948,171 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch
return s->NoTimeToPrefetch;
}
+static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines,
+ unsigned int line_buffer_size_bits,
+ unsigned int num_pipes,
+ unsigned int vp_width,
+ unsigned int vp_height,
+ double h_ratio,
+ enum dml2_rotation_angle rotation_angle)
+{
+ unsigned int num_lb_source_lines = 0;
+ double lb_bit_per_pixel = 57.0;
+ unsigned recin_width = vp_width/num_pipes;
+
+ if (dml_is_vertical_rotation(rotation_angle))
+ recin_width = vp_height/num_pipes;
+
+ num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines,
+ math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0));
+
+ return num_lb_source_lines;
+}
+
+static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[])
+{
+ int max_value = -1;
+ int max_idx = -1;
+ for (unsigned int i = 0; i < num_planes; i++) {
+ if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) {
+ max_value = Trpd_dcfclk_cycles[i];
+ max_idx = i;
+ }
+ }
+ if (max_idx <= 0) {
+ dml2_assert(max_idx >= 0);
+ max_idx = this_plane_idx;
+ }
+
+ return max_idx;
+}
+
+static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps)
+{
+ double sum = 0.;
+ for (unsigned int i = 0; i < num_planes; i++) {
+ if (i != exclude_plane_idx) {
+ sum += prefetch_swath_bytes[i];
+ }
+ }
+ return sum / bw_mbps;
+}
+
+// a global check against the aggregate effect of the per plane prefetch schedule
+static bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p)
+{
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals;
+ unsigned int i, k;
+
+ memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals));
+
+ *p->recalc_prefetch_schedule = 0;
+ s->prefetch_global_check_passed = 1;
+ // worst case if the rob and cdb is fully hogged
+ s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0);
+#ifdef __DML_VBA_DEBUG__
+ dml2_printf("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
+ dml2_printf("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
+ dml2_printf("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
+ dml2_printf("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
+ dml2_printf("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
+ dml2_printf("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
+#endif
+
+ // calculate the return impact from each plane, request is 256B per dcfclk
+ for (i = 0; i < p->num_active_planes; i++) {
+ s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i];
+ s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i];
+ s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i];
+ s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i];
+
+ if (p->pixel_format[i] == dml2_420_10) {
+ s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5);
+ s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5);
+ s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5);
+ s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5);
+ }
+
+ s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l);
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]);
+
+#ifdef __DML_VBA_DEBUG__
+ dml2_printf("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
+ dml2_printf("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
+ dml2_printf("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
+ dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
+ dml2_printf("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
+ dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
+#endif
+
+ if (s->src_swath_bytes_c[i] > 0) { // dual_plane
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c);
+
+ if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) {
+ s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]);
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml2_printf("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
+ dml2_printf("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
+ dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
+ dml2_printf("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
+#endif
+ }
+
+ s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate
+ s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk
+
+#ifdef __DML_VBA_DEBUG__
+ dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
+ dml2_printf("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
+ dml2_printf("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
+#endif
+ // clamping to worst case delay which is one which occupy the full rob+cdb
+ if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles)
+ s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles;
+ }
+
+ // Figure out the impacted prefetch time for each plane
+ // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw
+ for (i = 0; i < p->num_active_planes; i++) {
+ k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i
+ // the rest of planes (except for k) complete for bw
+ p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz;
+ p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps);
+ p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25);
+
+#ifdef __DML_VBA_DEBUG__
+ dml2_printf("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
+#endif
+ }
+
+ if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) {
+ for (i = 0; i < p->num_active_planes; i++) {
+ if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) {
+ s->prefetch_global_check_passed = 0;
+ *p->recalc_prefetch_schedule = 1;
+ }
+#ifdef __DML_VBA_DEBUG__
+ dml2_printf("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
+ dml2_printf("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
+#endif
+ }
+ } else {
+ // likely a mode programming calls, assume support, and no recalc - not used anyways
+ s->prefetch_global_check_passed = 1;
+ *p->recalc_prefetch_schedule = 0;
+ }
+
+#ifdef __DML_VBA_DEBUG__
+ dml2_printf("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
+ dml2_printf("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
+#endif
+
+ return s->prefetch_global_check_passed;
+}
+
static void calculate_peak_bandwidth_required(
struct dml2_core_internal_scratch *s,
struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p)
@@ -6046,7 +6331,7 @@ static void check_urgent_bandwidth_support(
double *frac_urg_bandwidth_nom,
double *frac_urg_bandwidth_mall,
bool *vactive_bandwidth_support_ok, // vactive ok
- bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok
+ bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok
unsigned int mall_allocated_for_dcn_mbytes,
double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
@@ -6116,7 +6401,6 @@ static void check_urgent_bandwidth_support(
}
}
#endif
-
}
static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
@@ -6438,7 +6722,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
}
- p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
+ p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
@@ -6454,12 +6738,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
- dml2_printf("DML::%s: g6_temp_read_watermark_us = %f\n", __func__, p->Watermark->g6_temp_read_watermark_us);
+ dml2_printf("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
#endif
s->TotalActiveWriteback = 0;
for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
- if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
}
}
@@ -6522,7 +6806,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
#ifdef __DML_VBA_DEBUG__
- dml2_printf("DML::%s: k=%u, MaxLineBufferLines= %u\n", __func__, k, p->MaxLineBufferLines);
+ dml2_printf("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel);
dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
@@ -6563,7 +6847,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
- s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->g6_temp_read_watermark_us;
+ s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us;
if (p->VActiveLatencyHidingMargin)
p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
@@ -6571,9 +6855,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
if (p->VActiveLatencyHidingUs)
p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
- if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) {
- s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height * (double)h_total / pixel_clock_mhz) * 4.0);
- if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) {
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0
+ / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0);
+ if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
}
s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
@@ -6588,36 +6875,36 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
- p->FCLKChangeSupport[k] = dml2_fclock_change_unsupported;
+ p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported;
if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
- p->FCLKChangeSupport[k] = dml2_fclock_change_vactive;
+ p->FCLKChangeSupport[k] = dml2_pstate_change_vactive;
else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
- p->FCLKChangeSupport[k] = dml2_fclock_change_vblank;
+ p->FCLKChangeSupport[k] = dml2_pstate_change_vblank;
- if (p->FCLKChangeSupport[k] == dml2_fclock_change_unsupported)
+ if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported)
*p->global_fclk_change_supported = false;
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported;
if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
if (p->display_cfg->overrides.all_streams_blanked ||
(s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency))
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive;
else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
} else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_drr;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr;
else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_svp;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp;
else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
- p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_full_frame;
+ p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame;
- if (p->DRAMClockChangeSupport[k] == dml2_dram_clock_change_unsupported)
+ if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported)
*p->global_dram_clock_change_supported = false;
s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
@@ -6915,8 +7202,7 @@ struct dml2_core_internal_g6_temp_read_blackouts_table {
} entries[DML_MAX_CLK_TABLE_SIZE];
};
-static const struct dml2_core_internal_g6_temp_read_blackouts_table
- core_dcn4_g6_temp_read_blackout_table = {
+struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = {
.entries = {
{
.uclk_khz = 96000,
@@ -7036,6 +7322,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
+#endif
struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
@@ -7083,12 +7372,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
for (k = 0; k < mode_lib->ms.num_active_planes; k++)
dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
-
- // dml2_printf_dml_policy(&mode_lib->ms.policy);
- // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, mode_lib->ms.num_active_planes);
- // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, mode_lib->ms.num_active_planes);
- // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, mode_lib->ms.num_active_planes);
- // dml2_printf_dml_display_cfg_output(&display_cfg->output, mode_lib->ms.num_active_planes);
#endif
CalculateMaxDETAndMinCompressedBufferSize(
@@ -7183,8 +7466,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
}
for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
- mode_lib->ms.SurfaceReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
- mode_lib->ms.SurfaceReadBandwidthChroma[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
@@ -7194,35 +7477,35 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0;
dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma);
dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma);
- dml2_printf("DML::%s: k=%u, ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthLuma[k]);
- dml2_printf("DML::%s: k=%u, ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthChroma[k]);
+ dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
+ dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
#endif
}
// Writeback bandwidth
for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) {
- mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height
- * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width
- / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
+ mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
* display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
/ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
- } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
- mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height
- * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width
- / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height
+ } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
+ * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
+ / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
* display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
/ ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
} else {
- mode_lib->ms.WriteBandwidth[k] = 0.0;
+ mode_lib->ms.WriteBandwidth[k][0] = 0.0;
}
}
/*Writeback Latency support check*/
mode_lib->ms.support.WritebackLatencySupport = true;
for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true &&
- (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 &&
+ (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
mode_lib->ms.support.WritebackLatencySupport = false;
}
}
@@ -7231,19 +7514,19 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
/* Writeback Scale Ratio and Taps Support Check */
mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > mode_lib->ip.writeback_max_hscl_ratio
- || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > mode_lib->ip.writeback_max_vscl_ratio
- || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio < mode_lib->ip.writeback_min_hscl_ratio
- || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio < mode_lib->ip.writeback_min_vscl_ratio
- || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
- || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
- || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps
- || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps
- || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps % 2) == 1))) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps
+ || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps
+ || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) {
mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
}
- if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
+ if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
}
}
@@ -7423,8 +7706,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
- CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.SurfaceReadBandwidthLuma;
- CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.SurfaceReadBandwidthChroma;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c;
CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
@@ -7671,16 +7954,16 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
//DISPCLK/DPPCLK
mode_lib->ms.WritebackRequiredDISPCLK = 0;
for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
- CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
+ CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
mode_lib->ip.writeback_line_buffer_buffer_size));
}
@@ -7712,7 +7995,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true)
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0)
s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
@@ -8256,23 +8539,23 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
mode_lib->ms.PSCL_FACTOR,
mode_lib->ms.PSCL_FACTOR_CHROMA,
mode_lib->ms.RequiredDPPCLK,
- mode_lib->ms.SurfaceReadBandwidthLuma,
- mode_lib->ms.SurfaceReadBandwidthChroma,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
mode_lib->soc.return_bus_width_bytes,
/* Output */
&mode_lib->ms.dcfclk_deepsleep);
for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
} else {
mode_lib->ms.WritebackDelayTime[k] = 0.0;
@@ -8349,7 +8632,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
- dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)));
+ dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)));
#endif
mode_lib->ms.support.OutstandingRequestsSupport = true;
@@ -8367,6 +8650,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
+ mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
* (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
+ mode_lib->ms.support.max_non_urgent_latency_us
+ = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
+ / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock
+ + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock
+ * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
+
for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
@@ -8408,7 +8698,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
}
memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
- if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
+ if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) {
for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
@@ -8515,8 +8805,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
display_cfg->hostvm_enable,
mode_lib->ms.MaxDCFCLK,
mode_lib->ms.MaxFabricClock,
+#ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW
+ mode_lib->ms.dram_bw_mbps);
+#else
mode_lib->ms.max_dram_bw_mbps);
-
+#endif
// Average BW support check
calculate_avg_bandwidth_required(
@@ -8524,8 +8817,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
// input
display_cfg,
mode_lib->ms.num_active_planes,
- mode_lib->ms.SurfaceReadBandwidthLuma,
- mode_lib->ms.SurfaceReadBandwidthChroma,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
mode_lib->ms.cursor_bw,
mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
@@ -8595,6 +8888,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
+ calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
@@ -8638,9 +8932,32 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
&mode_lib->ms.ExtraLatency_sr,
&mode_lib->ms.ExtraLatencyPrefetch);
- {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++)
+ s->impacted_dst_y_pre[k] = 0;
+
+ s->recalc_prefetch_schedule = 0;
+ s->recalc_prefetch_done = 0;
+ do {
mode_lib->ms.support.PrefetchSupported = true;
+
for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
+
+ s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->ms.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->ms.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
mode_lib->ms.TWait[k] = CalculateTWait(
@@ -8730,6 +9047,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
+ CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
// output
CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
@@ -8758,6 +9078,10 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
+ CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
+ CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
+ CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
+ CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
@@ -8766,6 +9090,27 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
} // for k num_planes
+ CalculateDCFCLKDeepSleepTdlut(
+ display_cfg,
+ mode_lib->ms.num_active_planes,
+ mode_lib->ms.BytePerPixelY,
+ mode_lib->ms.BytePerPixelC,
+ mode_lib->ms.SwathWidthY,
+ mode_lib->ms.SwathWidthC,
+ mode_lib->ms.NoOfDPP,
+ mode_lib->ms.PSCL_FACTOR,
+ mode_lib->ms.PSCL_FACTOR_CHROMA,
+ mode_lib->ms.RequiredDPPCLK,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
+ mode_lib->soc.return_bus_width_bytes,
+ mode_lib->ms.RequiredDISPCLK,
+ s->tdlut_bytes_to_deliver,
+ s->prefetch_swath_time_us,
+
+ /* Output */
+ &mode_lib->ms.dcfclk_deepsleep);
+
for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
if (mode_lib->ms.dst_y_prefetch[k] < 2.0
|| mode_lib->ms.LinesForVM[k] >= 32.0
@@ -8789,7 +9134,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
}
mode_lib->ms.support.VRatioInPrefetchSupported = true;
- for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
mode_lib->ms.support.VRatioInPrefetchSupported = false;
@@ -8799,10 +9144,14 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
}
}
+ mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
+
+ // By default, do not recalc prefetch schedule
+ s->recalc_prefetch_schedule = 0;
+
// Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
if (mode_lib->ms.support.PrefetchSupported) {
- for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
- double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
// Calculate Urgent burst factor for prefetch
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
@@ -8815,7 +9164,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
mode_lib->ms.swath_width_chroma_ub[k],
mode_lib->ms.SwathHeightY[k],
mode_lib->ms.SwathHeightC[k],
- line_time_us,
+ s->line_times[k],
mode_lib->ms.UrgLatency,
mode_lib->ms.VRatioPreY[k],
mode_lib->ms.VRatioPreC[k],
@@ -8852,8 +9201,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
- calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.SurfaceReadBandwidthLuma;
- calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.SurfaceReadBandwidthChroma;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
@@ -8899,127 +9248,164 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
}
}
+#ifdef DML_GLOBAL_PREFETCH_CHECK
+ if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
+ CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes;
+ CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC;
+ CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
+ CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
+ CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
+ CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
+ CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
+ if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
+ CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
+
+ CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
+ ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
+
+ // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
+ CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
+ CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
+ mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
+ s->recalc_prefetch_done = 1;
+ s->recalc_prefetch_schedule = 1;
+ }
+#endif
+ } // prefetch schedule ok, do urg bw and flip schedule
+ } while (s->recalc_prefetch_schedule);
- // Both prefetch schedule and BW okay
- if (mode_lib->ms.support.PrefetchSupported == true && mode_lib->ms.support.VRatioInPrefetchSupported == true) {
- mode_lib->ms.BandwidthAvailableForImmediateFlip =
- get_bandwidth_available_for_immediate_flip(
- dml2_core_internal_soc_state_sys_active,
- mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
- mode_lib->ms.support.urg_bandwidth_available);
-
- mode_lib->ms.TotImmediateFlipBytes = 0;
- for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
- if (display_cfg->plane_descriptors[k].immediate_flip) {
- s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
- s->HostVMInefficiencyFactor,
- mode_lib->ms.vm_bytes[k],
- mode_lib->ms.DPTEBytesPerRow[k],
- mode_lib->ms.meta_row_bytes[k]);
- } else {
- s->per_pipe_flip_bytes[k] = 0;
- }
- mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
+ // Flip Schedule
+ // Both prefetch schedule and BW okay
+ if (mode_lib->ms.support.PrefetchSupported == true) {
+ mode_lib->ms.BandwidthAvailableForImmediateFlip =
+ get_bandwidth_available_for_immediate_flip(
+ dml2_core_internal_soc_state_sys_active,
+ mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
+ mode_lib->ms.support.urg_bandwidth_available);
- }
+ mode_lib->ms.TotImmediateFlipBytes = 0;
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ if (display_cfg->plane_descriptors[k].immediate_flip) {
+ s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
+ s->HostVMInefficiencyFactor,
+ mode_lib->ms.vm_bytes[k],
+ mode_lib->ms.DPTEBytesPerRow[k],
+ mode_lib->ms.meta_row_bytes[k]);
+ } else {
+ s->per_pipe_flip_bytes[k] = 0;
+ }
+ mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
- for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
- CalculateFlipSchedule(
- &mode_lib->scratch,
- display_cfg->plane_descriptors[k].immediate_flip,
- 1, // use_lb_flip_bw
- s->HostVMInefficiencyFactor,
- s->Tvm_trips_flip[k],
- s->Tr0_trips_flip[k],
- s->Tvm_trips_flip_rounded[k],
- s->Tr0_trips_flip_rounded[k],
- display_cfg->gpuvm_enable,
- mode_lib->ms.vm_bytes[k],
- mode_lib->ms.DPTEBytesPerRow[k],
- mode_lib->ms.BandwidthAvailableForImmediateFlip,
- mode_lib->ms.TotImmediateFlipBytes,
- display_cfg->plane_descriptors[k].pixel_format,
- (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
- display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
- display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
- mode_lib->ms.Tno_bw_flip[k],
- mode_lib->ms.dpte_row_height[k],
- mode_lib->ms.dpte_row_height_chroma[k],
- mode_lib->ms.use_one_row_for_frame_flip[k],
- mode_lib->ip.max_flip_time_us,
- mode_lib->ip.max_flip_time_lines,
- s->per_pipe_flip_bytes[k],
- mode_lib->ms.meta_row_bytes[k],
- s->meta_row_height_luma[k],
- s->meta_row_height_chroma[k],
- mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
-
- /* Output */
- &mode_lib->ms.dst_y_per_vm_flip[k],
- &mode_lib->ms.dst_y_per_row_flip[k],
- &mode_lib->ms.final_flip_bw[k],
- &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
- }
+ }
- calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
- calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
- calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
- calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
- calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
- calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
-
- calculate_peak_bandwidth_params->display_cfg = display_cfg;
- calculate_peak_bandwidth_params->inc_flip_bw = 1;
- calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
- calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
- calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
- calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
- calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
- calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
- calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
- calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
-
- calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.SurfaceReadBandwidthLuma;
- calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.SurfaceReadBandwidthChroma;
- calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
- calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
- calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
- calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
- calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
- calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
- calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
- calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
- calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
- calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
- calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
- calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
- calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
- calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
- calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
- calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
-
- calculate_peak_bandwidth_required(
- &mode_lib->scratch,
- calculate_peak_bandwidth_params);
-
- calculate_immediate_flip_bandwidth_support(
- &s->dummy_single[0], // double* frac_urg_bandwidth_flip
- &mode_lib->ms.support.ImmediateFlipSupport,
-
- dml2_core_internal_soc_state_sys_active,
- mode_lib->ms.support.urg_bandwidth_required_flip,
- mode_lib->ms.support.non_urg_bandwidth_required_flip,
- mode_lib->ms.support.urg_bandwidth_available);
-
- for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
- if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
- mode_lib->ms.support.ImmediateFlipSupport = false;
- }
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
+ CalculateFlipSchedule(
+ &mode_lib->scratch,
+ display_cfg->plane_descriptors[k].immediate_flip,
+ 1, // use_lb_flip_bw
+ s->HostVMInefficiencyFactor,
+ s->Tvm_trips_flip[k],
+ s->Tr0_trips_flip[k],
+ s->Tvm_trips_flip_rounded[k],
+ s->Tr0_trips_flip_rounded[k],
+ display_cfg->gpuvm_enable,
+ mode_lib->ms.vm_bytes[k],
+ mode_lib->ms.DPTEBytesPerRow[k],
+ mode_lib->ms.BandwidthAvailableForImmediateFlip,
+ mode_lib->ms.TotImmediateFlipBytes,
+ display_cfg->plane_descriptors[k].pixel_format,
+ (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
+ mode_lib->ms.Tno_bw_flip[k],
+ mode_lib->ms.dpte_row_height[k],
+ mode_lib->ms.dpte_row_height_chroma[k],
+ mode_lib->ms.use_one_row_for_frame_flip[k],
+ mode_lib->ip.max_flip_time_us,
+ mode_lib->ip.max_flip_time_lines,
+ s->per_pipe_flip_bytes[k],
+ mode_lib->ms.meta_row_bytes[k],
+ s->meta_row_height_luma[k],
+ s->meta_row_height_chroma[k],
+ mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
+
+ /* Output */
+ &mode_lib->ms.dst_y_per_vm_flip[k],
+ &mode_lib->ms.dst_y_per_row_flip[k],
+ &mode_lib->ms.final_flip_bw[k],
+ &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
+ }
+
+ calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
+ calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
+ calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
+ calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
+ calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
+ calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
+
+ calculate_peak_bandwidth_params->display_cfg = display_cfg;
+ calculate_peak_bandwidth_params->inc_flip_bw = 1;
+ calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
+ calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
+ calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
+ calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
+ calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
+
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
+ calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
+ calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
+ calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
+ calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
+ calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
+ calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
+ calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
+ calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
+ calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
+ calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
+ calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
+
+ calculate_peak_bandwidth_required(
+ &mode_lib->scratch,
+ calculate_peak_bandwidth_params);
- } else { // if prefetch not support, assume iflip is not supported too
+ calculate_immediate_flip_bandwidth_support(
+ &s->dummy_single[0], // double* frac_urg_bandwidth_flip
+ &mode_lib->ms.support.ImmediateFlipSupport,
+
+ dml2_core_internal_soc_state_sys_active,
+ mode_lib->ms.support.urg_bandwidth_required_flip,
+ mode_lib->ms.support.non_urg_bandwidth_required_flip,
+ mode_lib->ms.support.urg_bandwidth_available);
+
+ for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
mode_lib->ms.support.ImmediateFlipSupport = false;
- }
- } // prefetch schedule
+ }
+
+ } else { // if prefetch not support, assume iflip is not supported too
+ mode_lib->ms.support.ImmediateFlipSupport = false;
}
s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
@@ -9116,8 +9502,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
s->pstate_bytes_required_c,
mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
- mode_lib->ms.SurfaceReadBandwidthLuma,
- mode_lib->ms.SurfaceReadBandwidthChroma,
+ mode_lib->ms.vactive_sw_bw_l,
+ mode_lib->ms.vactive_sw_bw_c,
mode_lib->ms.surface_avg_vactive_required_bw,
mode_lib->ms.surface_peak_required_bw,
/* outputs */
@@ -9187,12 +9573,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out
dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
- for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
}
- for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
+ for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
@@ -9229,7 +9615,7 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support
dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
- dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
+ dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
@@ -9882,7 +10268,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc
if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) {
- if (p->display_cfg->stream_descriptors[k].writeback.enable)
+ if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0)
l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
if (TotalNumberOfActiveOTG == 0) { // first otg
@@ -9984,6 +10370,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
@@ -10075,12 +10462,6 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
dml2_assert(s->SOCCLK > 0);
#ifdef __DML_VBA_DEBUG__
- // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, s->num_active_planes);
- // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, s->num_active_planes);
- // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, s->num_active_planes);
- // dml2_printf_dml_display_cfg_output(&display_cfg->output, s->num_active_planes);
- // dml2_printf_dml_display_cfg_hw_resource(&display_cfg->hw, s->num_active_planes);
-
dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
@@ -10198,10 +10579,10 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
for (k = 0; k < s->num_active_planes; ++k) {
mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
- mode_lib->mp.SurfaceReadBandwidthLuma[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
- mode_lib->mp.SurfaceReadBandwidthChroma[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
- dml2_printf("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
- dml2_printf("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
+ mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
+ mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
+ dml2_printf("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+ dml2_printf("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
}
CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
@@ -10217,8 +10598,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
- CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.SurfaceReadBandwidthLuma;
- CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.SurfaceReadBandwidthChroma;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l;
+ CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c;
CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
@@ -10539,8 +10920,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
+ calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
-
calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
}
@@ -10583,17 +10964,17 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
for (k = 0; k < s->num_active_planes; ++k) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
mode_lib->mp.WritebackDelay[k] =
mode_lib->soc.qos_parameters.writeback.base_latency_us
+ CalculateWriteBackDelay(
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height,
- display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
+ display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
} else
mode_lib->mp.WritebackDelay[k] = 0;
@@ -10679,10 +11060,25 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
for (k = 0; k < s->num_active_planes; ++k) {
bool cursor_not_enough_urgent_latency_hiding = 0;
- double line_time_us = 0.0;
-
- line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
+ s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
+
+ s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
+
+ s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->mp.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
+ s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
+ mode_lib->mp.NoOfDPP[k],
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
+ display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
+ display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
+ display_cfg->plane_descriptors[k].composition.rotation_angle);
+
if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
calculate_cursor_req_attributes(
display_cfg->plane_descriptors[k].cursor.cursor_width,
@@ -10699,7 +11095,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
display_cfg->plane_descriptors[k].cursor.cursor_width,
s->cursor_bytes_per_chunk[k],
s->cursor_lines_per_chunk[k],
- line_time_us,
+ s->line_times[k],
mode_lib->mp.UrgentLatency,
// output
@@ -10714,7 +11110,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
mode_lib->mp.swath_width_chroma_ub[k],
mode_lib->mp.SwathHeightY[k],
mode_lib->mp.SwathHeightC[k],
- line_time_us,
+ s->line_times[k],
mode_lib->mp.UrgentLatency,
display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
@@ -10752,6 +11148,35 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
#endif
+ if (s->num_active_planes > 1) {
+ CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes;
+ CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
+ CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY;
+ CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC;
+ CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
+ CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY;
+ CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
+ CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
+ CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
+ CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care
+ CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care
+ CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
+ CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk;
+ CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
+ CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch;
+
+ // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
+ CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0];
+ CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
+ CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming
+ }
+
{
s->DestinationLineTimesForPrefetchLessThan2 = false;
s->VRatioPrefetchMoreThanMax = false;
@@ -10763,11 +11188,11 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
mode_lib->mp.TWait[k] = CalculateTWait(
- display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
- mode_lib->mp.UrgentLatency,
- mode_lib->mp.TripToMemory,
- !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
- get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
+ display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
+ mode_lib->mp.UrgentLatency,
+ mode_lib->mp.TripToMemory,
+ !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
+ get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
myPipe->Dppclk = mode_lib->mp.Dppclk[k];
myPipe->Dispclk = mode_lib->mp.Dispclk;
@@ -10848,6 +11273,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k];
CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k];
+ CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k];
+ CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k];
// output
CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k];
@@ -10876,9 +11304,18 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k];
CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k];
CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k];
+ CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
+ CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
+ CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
+ CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0];
mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
+ if (s->impacted_dst_y_pre[k] > 0)
+ mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k];
+ else
+ mode_lib->mp.impacted_prefetch_margin_us[k] = 0;
+
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
#endif
@@ -10956,8 +11393,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
- dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
- dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
+ dml2_printf("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+ dml2_printf("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
@@ -10988,8 +11425,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
- calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.SurfaceReadBandwidthLuma;
- calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.SurfaceReadBandwidthChroma;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
@@ -11120,8 +11557,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
- calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.SurfaceReadBandwidthLuma;
- calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.SurfaceReadBandwidthChroma;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
+ calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
@@ -11238,8 +11675,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
s->mmSOCParameters.USRRetrainingLatency = 0;
s->mmSOCParameters.SMNLatency = 0;
s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
- s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index);
- s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
+ s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index);
+ s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock;
s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
CalculateWatermarks_params->display_cfg = display_cfg;
@@ -11289,7 +11726,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
for (k = 0; k < s->num_active_planes; ++k) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
+ if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
@@ -11475,25 +11912,25 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
//Maximum Bandwidth Used
s->TotalWRBandwidth = 0;
- s->WRBandwidth = 0;
- for (k = 0; k < s->num_active_planes; ++k) {
- if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_32) {
- s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width /
- (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4;
- } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
- s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width /
- (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8;
+ for (k = 0; k < display_cfg->num_streams; ++k) {
+ s->WRBandwidth = 0;
+ if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) {
+ s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height
+ * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width /
+ (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height
+ / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000))
+ * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0);
+ s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
}
- s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
}
mode_lib->mp.TotalDataReadBandwidth = 0;
for (k = 0; k < s->num_active_planes; ++k) {
- mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.SurfaceReadBandwidthLuma[k] + mode_lib->mp.SurfaceReadBandwidthChroma[k];
+ mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k];
#ifdef __DML_VBA_DEBUG__
dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
- dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
- dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
+ dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
+ dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
#endif
}
@@ -11530,8 +11967,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex
CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC;
CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock;
CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock;
- CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.SurfaceReadBandwidthLuma;
- CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.SurfaceReadBandwidthChroma;
+ CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l;
+ CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c;
CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw;
CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present;
@@ -11742,7 +12179,7 @@ static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const
wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
- wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz);
+ wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
@@ -12321,14 +12758,18 @@ void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_interna
void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
const struct display_configuation_with_meta *display_cfg,
- struct dmub_fams2_stream_static_state *fams2_programming,
- enum dml2_uclk_pstate_support_method pstate_method,
+ union dmub_cmd_fams2_config *fams2_base_programming,
+ union dmub_cmd_fams2_config *fams2_sub_programming,
+ enum dml2_pstate_method pstate_method,
int plane_index)
{
const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index];
const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index];
const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index];
+ struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base;
+ union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state;
+
unsigned int i;
if (display_cfg->display_config.overrides.all_streams_blanked) {
@@ -12337,110 +12778,110 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna
}
/* from display configuration */
- fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total;
- fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total;
- fams2_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal -
+ base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total;
+ base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total;
+ base_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal -
stream_descriptor->timing.v_front_porch);
- fams2_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal -
+ base_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal -
stream_descriptor->timing.v_front_porch -
stream_descriptor->timing.v_active);
- fams2_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled;
+ base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled;
/* from meta */
- fams2_programming->otg_vline_time_ns =
+ base_programming->otg_vline_time_ns =
(unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0);
- fams2_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines;
- fams2_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines;
- fams2_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines;
- fams2_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal -
+ base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines;
+ base_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines;
+ base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines;
+ base_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal -
stream_descriptor->timing.v_front_porch -
stream_fams2_meta->method_drr.programming_delay_otg_vlines);
- fams2_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines;
- fams2_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal;
+ base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines;
+ base_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal;
/* from core */
- fams2_programming->config.bits.min_ttu_vblank_usable = true;
+ base_programming->config.bits.min_ttu_vblank_usable = true;
for (i = 0; i < display_cfg->display_config.num_planes; i++) {
/* check if all planes support p-state in blank */
if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index &&
mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) {
- fams2_programming->config.bits.min_ttu_vblank_usable = false;
+ base_programming->config.bits.min_ttu_vblank_usable = false;
break;
}
}
switch (pstate_method) {
- case dml2_uclk_pstate_support_method_vactive:
- case dml2_uclk_pstate_support_method_fw_vactive_drr:
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
/* legacy vactive */
- fams2_programming->type = FAMS2_STREAM_TYPE_VACTIVE;
- fams2_programming->sub_state.legacy.vactive_det_fill_delay_otg_vlines =
- (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
- fams2_programming->allow_start_otg_vline =
- (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline;
- fams2_programming->allow_end_otg_vline =
- (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline;
- fams2_programming->config.bits.clamp_vtotal_min = true;
+ base_programming->type = FAMS2_STREAM_TYPE_VACTIVE;
+ sub_programming->legacy.vactive_det_fill_delay_otg_vlines =
+ (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
+ base_programming->allow_start_otg_vline =
+ (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline;
+ base_programming->allow_end_otg_vline =
+ (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline;
+ base_programming->config.bits.clamp_vtotal_min = true;
break;
- case dml2_uclk_pstate_support_method_vblank:
- case dml2_uclk_pstate_support_method_fw_vblank_drr:
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
/* legacy vblank */
- fams2_programming->type = FAMS2_STREAM_TYPE_VBLANK;
- fams2_programming->allow_start_otg_vline =
- (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline;
- fams2_programming->allow_end_otg_vline =
- (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline;
- fams2_programming->config.bits.clamp_vtotal_min = true;
+ base_programming->type = FAMS2_STREAM_TYPE_VBLANK;
+ base_programming->allow_start_otg_vline =
+ (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline;
+ base_programming->allow_end_otg_vline =
+ (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline;
+ base_programming->config.bits.clamp_vtotal_min = true;
break;
- case dml2_uclk_pstate_support_method_fw_drr:
+ case dml2_pstate_method_fw_drr:
/* drr */
- fams2_programming->type = FAMS2_STREAM_TYPE_DRR;
- fams2_programming->sub_state.drr.programming_delay_otg_vlines =
- (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines;
- fams2_programming->sub_state.drr.nom_stretched_vtotal =
- (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal;
- fams2_programming->allow_start_otg_vline =
- (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline;
- fams2_programming->allow_end_otg_vline =
- (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline;
+ base_programming->type = FAMS2_STREAM_TYPE_DRR;
+ sub_programming->drr.programming_delay_otg_vlines =
+ (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines;
+ sub_programming->drr.nom_stretched_vtotal =
+ (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal;
+ base_programming->allow_start_otg_vline =
+ (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline;
+ base_programming->allow_end_otg_vline =
+ (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline;
/* drr only clamps to vtotal min for single display */
- fams2_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1;
- fams2_programming->sub_state.drr.only_stretch_if_required = true;
+ base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1;
+ sub_programming->drr.only_stretch_if_required = true;
break;
- case dml2_uclk_pstate_support_method_fw_subvp_phantom:
- case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr:
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
/* subvp */
- fams2_programming->type = FAMS2_STREAM_TYPE_SUBVP;
- fams2_programming->sub_state.subvp.vratio_numerator =
- (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0);
- fams2_programming->sub_state.subvp.vratio_denominator = 1000;
- fams2_programming->sub_state.subvp.programming_delay_otg_vlines =
- (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines;
- fams2_programming->sub_state.subvp.prefetch_to_mall_otg_vlines =
- (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines;
- fams2_programming->sub_state.subvp.phantom_vtotal =
- (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal;
- fams2_programming->sub_state.subvp.phantom_vactive =
- (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive;
- fams2_programming->sub_state.subvp.config.bits.is_multi_planar =
- plane_descriptor->surface.plane1.height > 0;
- fams2_programming->sub_state.subvp.config.bits.is_yuv420 =
- plane_descriptor->pixel_format == dml2_420_8 ||
- plane_descriptor->pixel_format == dml2_420_10 ||
- plane_descriptor->pixel_format == dml2_420_12;
-
- fams2_programming->allow_start_otg_vline =
- (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline;
- fams2_programming->allow_end_otg_vline =
- (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline;
- fams2_programming->config.bits.clamp_vtotal_min = true;
+ base_programming->type = FAMS2_STREAM_TYPE_SUBVP;
+ sub_programming->subvp.vratio_numerator =
+ (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0);
+ sub_programming->subvp.vratio_denominator = 1000;
+ sub_programming->subvp.programming_delay_otg_vlines =
+ (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines;
+ sub_programming->subvp.prefetch_to_mall_otg_vlines =
+ (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines;
+ sub_programming->subvp.phantom_vtotal =
+ (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal;
+ sub_programming->subvp.phantom_vactive =
+ (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive;
+ sub_programming->subvp.config.bits.is_multi_planar =
+ plane_descriptor->surface.plane1.height > 0;
+ sub_programming->subvp.config.bits.is_yuv420 =
+ plane_descriptor->pixel_format == dml2_420_8 ||
+ plane_descriptor->pixel_format == dml2_420_10 ||
+ plane_descriptor->pixel_format == dml2_420_12;
+
+ base_programming->allow_start_otg_vline =
+ (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline;
+ base_programming->allow_end_otg_vline =
+ (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline;
+ base_programming->config.bits.clamp_vtotal_min = true;
break;
- case dml2_uclk_pstate_support_method_reserved_hw:
- case dml2_uclk_pstate_support_method_reserved_fw:
- case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed:
- case dml2_uclk_pstate_support_method_reserved_fw_drr_var:
- case dml2_uclk_pstate_support_method_not_supported:
- case dml2_uclk_pstate_support_method_count:
+ case dml2_pstate_method_reserved_hw:
+ case dml2_pstate_method_reserved_fw:
+ case dml2_pstate_method_reserved_fw_drr_clamped:
+ case dml2_pstate_method_reserved_fw_drr_var:
+ case dml2_pstate_method_na:
+ case dml2_pstate_method_count:
default:
/* this should never happen */
break;
@@ -12569,6 +13010,8 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod
out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState;
out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize;
out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits;
+ out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.temp_read_or_ppt_support;
+ out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support;
out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots;
out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits;
@@ -12662,7 +13105,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod
out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib);
out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib);
out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib);
- out->informative.watermarks.g6_temp_read_watermark_us = dml_get_wm_g6_temp_read(mode_lib);
+ out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib);
out->informative.mall.total_surface_size_in_mall_bytes = 0;
for (k = 0; k < out->display_config.num_planes; ++k)
@@ -12745,6 +13188,8 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod
out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib);
+ out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000;
+
for (k = 0; k < out->display_config.num_planes; k++) {
if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us)
@@ -12824,6 +13269,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod
out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k];
out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k];
+ out->informative.misc.WritebackRequiredBandwidth = mode_lib->scratch.dml_core_mode_programming_locals.TotalWRBandwidth / 1000.0;
out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k];
out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k];
out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k];
@@ -12831,6 +13277,9 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod
out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k];
out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k];
out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k];
+
+ if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin)
+ out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k];
}
// For this DV informative layer, all pipes in the same planes will just use the same id
@@ -12853,16 +13302,11 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod
out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k;
}
}
-
- out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
- / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
- + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock
- + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock
- * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
+ out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib);
if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
- / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
+ / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
out->informative.misc.ROBUrgencyAvoidance = true;
} else {
out->informative.misc.ROBUrgencyAvoidance = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h
index df2d1550a14b..27ef0e096b25 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h
@@ -28,7 +28,7 @@ void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *displ
void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out);
void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index);
void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index);
-void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, enum dml2_uclk_pstate_support_method pstate_method, int plane_index);
+void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, union dmub_cmd_fams2_config *fams2_base_programming, union dmub_cmd_fams2_config *fams2_sub_programming, enum dml2_pstate_method pstate_method, int plane_index);
void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config);
void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
index cbdfbd5a0bde..23c0fca5515f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h
@@ -201,7 +201,7 @@ struct dml2_core_internal_watermarks {
double Z8StutterExitWatermark;
double Z8StutterEnterPlusExitWatermark;
double USRRetrainingWatermark;
- double g6_temp_read_watermark_us;
+ double temp_read_or_ppt_watermark_us;
};
struct dml2_core_internal_mode_support_info {
@@ -252,8 +252,8 @@ struct dml2_core_internal_mode_support_info {
bool PTEBufferSizeNotExceeded;
bool DCCMetaBufferSizeNotExceeded;
- enum dml2_dram_clock_change_support DRAMClockChangeSupport[DML2_MAX_PLANES];
- enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES];
+ enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES];
+ enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES];
bool global_dram_clock_change_supported;
bool global_fclk_change_supported;
bool USRRetrainingSupport;
@@ -318,12 +318,15 @@ struct dml2_core_internal_mode_support_info {
bool avg_bandwidth_support_ok[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max];
double max_urgent_latency_us;
+ double max_non_urgent_latency_us;
double avg_non_urgent_latency_us;
double avg_urgent_latency_us;
+ double df_response_time_us;
bool incorrect_imall_usage;
bool g6_temp_read_support;
+ bool temp_read_or_ppt_support;
struct dml2_core_internal_watermarks watermarks;
};
@@ -378,8 +381,8 @@ struct dml2_core_internal_mode_support {
unsigned int DETBufferSizeC[DML2_MAX_PLANES];
unsigned int SwathHeightY[DML2_MAX_PLANES];
unsigned int SwathHeightC[DML2_MAX_PLANES];
- unsigned int SwathWidthY[DML2_MAX_PLANES];
- unsigned int SwathWidthC[DML2_MAX_PLANES];
+ unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe
+ unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe
// ----------------------------------
// Intermediates/Informational
@@ -476,9 +479,9 @@ struct dml2_core_internal_mode_support {
// Bandwidth Related Info
double BandwidthAvailableForImmediateFlip;
- double SurfaceReadBandwidthLuma[DML2_MAX_PLANES]; // no dcc overhead, for the plane
- double SurfaceReadBandwidthChroma[DML2_MAX_PLANES];
- double WriteBandwidth[DML2_MAX_PLANES];
+ double vactive_sw_bw_l[DML2_MAX_PLANES]; // no dcc overhead, for the plane
+ double vactive_sw_bw_c[DML2_MAX_PLANES];
+ double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK];
double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES];
double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES];
double cursor_bw[DML2_MAX_PLANES];
@@ -539,7 +542,7 @@ struct dml2_core_internal_mode_program {
unsigned int qos_param_index; // to access the uclk dependent dpm table
unsigned int active_min_uclk_dpm_index; // to access the min_clk table
double FabricClock; /// <brief Basically just the clock freq at the min (or given) state
- double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting
+ //double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting
double dram_bw_mbps;
double uclk_freq_mhz;
unsigned int NoOfDPP[DML2_MAX_PLANES];
@@ -562,14 +565,14 @@ struct dml2_core_internal_mode_program {
double BytePerPixelInDETC[DML2_MAX_PLANES];
unsigned int BytePerPixelY[DML2_MAX_PLANES];
unsigned int BytePerPixelC[DML2_MAX_PLANES];
- unsigned int SwathWidthY[DML2_MAX_PLANES];
- unsigned int SwathWidthC[DML2_MAX_PLANES];
+ unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe
+ unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe
unsigned int req_per_swath_ub_l[DML2_MAX_PLANES];
unsigned int req_per_swath_ub_c[DML2_MAX_PLANES];
unsigned int SwathWidthSingleDPPY[DML2_MAX_PLANES];
unsigned int SwathWidthSingleDPPC[DML2_MAX_PLANES];
- double SurfaceReadBandwidthLuma[DML2_MAX_PLANES];
- double SurfaceReadBandwidthChroma[DML2_MAX_PLANES];
+ double vactive_sw_bw_l[DML2_MAX_PLANES];
+ double vactive_sw_bw_c[DML2_MAX_PLANES];
double excess_vactive_fill_bw_l[DML2_MAX_PLANES];
double excess_vactive_fill_bw_c[DML2_MAX_PLANES];
@@ -797,8 +800,9 @@ struct dml2_core_internal_mode_program {
double MaxActiveFCLKChangeLatencySupported;
bool USRRetrainingSupport;
bool g6_temp_read_support;
- enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES];
- enum dml2_dram_clock_change_support DRAMClockChangeSupport[DML2_MAX_PLANES];
+ bool temp_read_or_ppt_support;
+ enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES];
+ enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES];
bool global_dram_clock_change_supported;
bool global_fclk_change_supported;
double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES];
@@ -846,6 +850,8 @@ struct dml2_core_internal_mode_program {
bool mall_comb_mcache_l[DML2_MAX_PLANES];
bool mall_comb_mcache_c[DML2_MAX_PLANES];
bool lc_comb_mcache[DML2_MAX_PLANES];
+
+ double impacted_prefetch_margin_us[DML2_MAX_PLANES];
};
struct dml2_core_internal_SOCParametersList {
@@ -862,6 +868,7 @@ struct dml2_core_internal_SOCParametersList {
double USRRetrainingLatency;
double SMNLatency;
double g6_temp_read_blackout_us;
+ double temp_read_or_ppt_blackout_us;
double max_urgent_latency_us;
double df_response_time_us;
enum dml2_qos_param_type qos_type;
@@ -951,6 +958,7 @@ struct dml2_core_calcs_mode_support_locals {
unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES];
double tdlut_opt_time[DML2_MAX_PLANES];
double tdlut_drain_time[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES];
unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES];
unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES];
@@ -961,6 +969,18 @@ struct dml2_core_calcs_mode_support_locals {
unsigned int pstate_bytes_required_l[DML2_MAX_PLANES];
unsigned int pstate_bytes_required_c[DML2_MAX_PLANES];
+
+ double prefetch_sw_bytes[DML2_MAX_PLANES];
+ double Tpre_rounded[DML2_MAX_PLANES];
+ double Tpre_oto[DML2_MAX_PLANES];
+ bool recalc_prefetch_schedule;
+ bool recalc_prefetch_done;
+ double impacted_dst_y_pre[DML2_MAX_PLANES];
+ double line_times[DML2_MAX_PLANES];
+ enum dml2_source_format_class pixel_format[DML2_MAX_PLANES];
+ unsigned int lb_source_lines_l[DML2_MAX_PLANES];
+ unsigned int lb_source_lines_c[DML2_MAX_PLANES];
+ double prefetch_swath_time_us[DML2_MAX_PLANES];
};
struct dml2_core_calcs_mode_programming_locals {
@@ -1024,6 +1044,7 @@ struct dml2_core_calcs_mode_programming_locals {
unsigned int tdlut_groups_per_2row_ub[DML2_MAX_PLANES];
double tdlut_opt_time[DML2_MAX_PLANES];
double tdlut_drain_time[DML2_MAX_PLANES];
+ unsigned int tdlut_bytes_to_deliver[DML2_MAX_PLANES];
unsigned int tdlut_bytes_per_group[DML2_MAX_PLANES];
unsigned int cursor_bytes_per_chunk[DML2_MAX_PLANES];
@@ -1041,6 +1062,16 @@ struct dml2_core_calcs_mode_programming_locals {
unsigned int pstate_bytes_required_l[DML2_MAX_PLANES];
unsigned int pstate_bytes_required_c[DML2_MAX_PLANES];
+
+ double prefetch_sw_bytes[DML2_MAX_PLANES];
+ double Tpre_rounded[DML2_MAX_PLANES];
+ double Tpre_oto[DML2_MAX_PLANES];
+ bool recalc_prefetch_schedule;
+ double impacted_dst_y_pre[DML2_MAX_PLANES];
+ double line_times[DML2_MAX_PLANES];
+ enum dml2_source_format_class pixel_format[DML2_MAX_PLANES];
+ unsigned int lb_source_lines_l[DML2_MAX_PLANES];
+ unsigned int lb_source_lines_c[DML2_MAX_PLANES];
};
struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals {
@@ -1048,6 +1079,7 @@ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_local
double ActiveFCLKChangeLatencyMargin[DML2_MAX_PLANES];
double USRRetrainingLatencyMargin[DML2_MAX_PLANES];
double g6_temp_read_latency_margin[DML2_MAX_PLANES];
+ double temp_read_or_ppt_latency_margin[DML2_MAX_PLANES];
double EffectiveLBLatencyHidingY;
double EffectiveLBLatencyHidingC;
@@ -1185,17 +1217,14 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals {
double LineTime;
double dst_y_prefetch_equ;
double prefetch_bw_oto;
+ double per_pipe_vactive_sw_bw;
double Tvm_oto;
double Tr0_oto;
- double Tvm_no_trip_oto;
- double Tr0_no_trip_oto;
double Tvm_oto_lines;
double Tr0_oto_lines;
double dst_y_prefetch_oto;
double TimeForFetchingVM;
double TimeForFetchingRowInVBlank;
- double dst_y_per_vm_no_trip_vblank;
- double dst_y_per_row_no_trip_vblank;
double LinesToRequestPrefetchPixelData;
unsigned int HostVMDynamicLevelsTrips;
double trip_to_mem;
@@ -1203,15 +1232,12 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals {
double Tr0_trips_rounded;
double max_Tsw;
double Lsw_oto;
- double Lsw_equ;
- double Tpre_rounded;
double prefetch_bw_equ;
double Tvm_equ;
double Tr0_equ;
double Tdmbf;
double Tdmec;
double Tdmsks;
- double prefetch_sw_bytes;
double total_row_bytes;
double prefetch_bw_pr;
double bytes_pp;
@@ -1225,6 +1251,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals {
double prefetch_bw2;
double prefetch_bw3;
double prefetch_bw4;
+ double dst_y_prefetch_equ_impacted;
double TWait_p;
unsigned int cursor_prefetch_bytes;
@@ -1545,17 +1572,18 @@ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_param
// Output
struct dml2_core_internal_watermarks *Watermark;
- enum dml2_dram_clock_change_support *DRAMClockChangeSupport;
+ enum dml2_pstate_change_support *DRAMClockChangeSupport;
bool *global_dram_clock_change_supported;
double *MaxActiveDRAMClockChangeLatencySupported;
unsigned int *SubViewportLinesNeededInMALL;
- enum dml2_fclock_change_support *FCLKChangeSupport;
+ enum dml2_pstate_change_support *FCLKChangeSupport;
bool *global_fclk_change_supported;
double *MaxActiveFCLKChangeLatencySupported;
bool *USRRetrainingSupport;
double *VActiveLatencyHidingMargin;
double *VActiveLatencyHidingUs;
bool *g6_temp_read_support;
+ bool *temp_read_or_ppt_support;
};
@@ -1727,8 +1755,8 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params {
double PrefetchSourceLinesC;
unsigned int VInitPreFillC;
unsigned int MaxNumSwathC;
- unsigned int swath_width_luma_ub;
- unsigned int swath_width_chroma_ub;
+ unsigned int swath_width_luma_ub; // per-pipe
+ unsigned int swath_width_chroma_ub; // per-pipe
unsigned int SwathHeightY;
unsigned int SwathHeightC;
double TWait;
@@ -1750,6 +1778,10 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params {
unsigned int meta_row_bytes;
double mall_prefetch_sdp_overhead_factor;
+ double impacted_dst_y_pre;
+ double vactive_sw_bw_l; // per surface bw
+ double vactive_sw_bw_c; // per surface bw
+
// output
unsigned int *DSTXAfterScaler;
unsigned int *DSTYAfterScaler;
@@ -1767,6 +1799,8 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params {
double *Tdmdl_vm;
double *Tdmdl;
double *TSetup;
+ double *Tpre_rounded;
+ double *Tpre_oto;
double *Tvm_trips;
double *Tr0_trips;
double *Tvm_trips_flip;
@@ -1777,6 +1811,48 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params {
unsigned int *VUpdateWidthPix;
unsigned int *VReadyOffsetPix;
double *prefetch_cursor_bw;
+ double *prefetch_sw_bytes;
+ double *prefetch_swath_time_us;
+};
+
+struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params {
+ unsigned int num_active_planes;
+ enum dml2_source_format_class *pixel_format;
+ unsigned int rob_buffer_size_kbytes;
+ unsigned int compressed_buffer_size_kbytes;
+ unsigned int chunk_bytes_l; // same for all planes
+ unsigned int chunk_bytes_c;
+ unsigned int *detile_buffer_size_bytes_l;
+ unsigned int *detile_buffer_size_bytes_c;
+ unsigned int *full_swath_bytes_l;
+ unsigned int *full_swath_bytes_c;
+ unsigned int *lb_source_lines_l;
+ unsigned int *lb_source_lines_c;
+ unsigned int *swath_height_l;
+ unsigned int *swath_height_c;
+ double *prefetch_sw_bytes;
+ double *Tpre_rounded;
+ double *Tpre_oto;
+ double estimated_dcfclk_mhz;
+ double estimated_urg_bandwidth_required_mbps;
+ double *line_time;
+ double *dst_y_prefetch;
+
+ // output
+ bool *recalc_prefetch_schedule;
+ double *impacted_dst_y_pre;
+};
+
+struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals {
+ unsigned int max_Trpd_dcfclk_cycles;
+ unsigned int burst_bytes_to_fill_det;
+ double time_to_fill_det_us;
+ unsigned int accumulated_return_path_dcfclk_cycles[DML2_MAX_PLANES];
+ bool prefetch_global_check_passed;
+ unsigned int src_swath_bytes_l[DML2_MAX_PLANES];
+ unsigned int src_swath_bytes_c[DML2_MAX_PLANES];
+ unsigned int src_detile_buf_size_bytes_l[DML2_MAX_PLANES];
+ unsigned int src_detile_buf_size_bytes_c[DML2_MAX_PLANES];
};
struct dml2_core_calcs_calculate_mcache_row_bytes_params {
@@ -1921,6 +1997,7 @@ struct dml2_core_calcs_calculate_tdlut_setting_params {
unsigned int *tdlut_groups_per_2row_ub;
double *tdlut_opt_time;
double *tdlut_drain_time;
+ unsigned int *tdlut_bytes_to_deliver;
unsigned int *tdlut_bytes_per_group;
};
@@ -2004,6 +2081,7 @@ struct dml2_core_internal_scratch {
struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
struct dml2_core_calcs_CalculateVMRowAndSwath_locals CalculateVMRowAndSwath_locals;
struct dml2_core_calcs_CalculatePrefetchSchedule_locals CalculatePrefetchSchedule_locals;
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals CheckGlobalPrefetchAdmissibility_locals;
struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals CalculateSwathAndDETConfiguration_locals;
struct dml2_core_shared_TruncToValidBPP_locals TruncToValidBPP_locals;
struct dml2_core_shared_CalculateDETBufferSize_locals CalculateDETBufferSize_locals;
@@ -2019,6 +2097,7 @@ struct dml2_core_internal_scratch {
struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params CalculateSwathAndDETConfiguration_params;
struct dml2_core_calcs_CalculateStutterEfficiency_params CalculateStutterEfficiency_params;
struct dml2_core_calcs_CalculatePrefetchSchedule_params CalculatePrefetchSchedule_params;
+ struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params CheckGlobalPrefetchAdmissibility_params;
struct dml2_core_calcs_calculate_mcache_setting_params calculate_mcache_setting_params;
struct dml2_core_calcs_calculate_tdlut_setting_params calculate_tdlut_setting_params;
struct dml2_core_shared_calculate_vm_and_row_bytes_params calculate_vm_and_row_bytes_params;
@@ -2038,7 +2117,6 @@ struct dml2_core_internal_display_mode_lib {
// Used to hold input; intermediate and output of the calculations
struct dml2_core_internal_mode_support ms; // struct for mode support
struct dml2_core_internal_mode_program mp; // struct for mode programming
-
// Available overridable calculators for core_shared.
// if null, core_shared will use default calculators.
struct dml2_core_shared_calculation_funcs funcs;
@@ -2051,7 +2129,6 @@ struct dml2_core_calcs_mode_support_ex {
const struct dml2_display_cfg *in_display_cfg;
const struct dml2_mcg_min_clock_table *min_clk_table;
int min_clk_index;
-
//unsigned int in_state_index;
struct dml2_core_internal_mode_support_info *out_evaluation_info;
};
@@ -2064,9 +2141,7 @@ struct dml2_core_calcs_mode_programming_ex {
const struct dml2_mcg_min_clock_table *min_clk_table;
const struct core_display_cfg_support_info *cfg_support_info;
int min_clk_index;
-
struct dml2_display_cfg_programming *programming;
-
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
index 714b5c39b7e6..456b3f8a6d38 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c
@@ -63,6 +63,150 @@ bool dml2_core_utils_is_420(enum dml2_source_format_class source_format)
case dml2_mono_16:
val = 0;
break;
+ case dml2_422_planar_8:
+ val = 0;
+ break;
+ case dml2_422_planar_10:
+ val = 0;
+ break;
+ case dml2_422_planar_12:
+ val = 0;
+ break;
+ case dml2_422_packed_8:
+ val = 0;
+ break;
+ case dml2_422_packed_10:
+ val = 0;
+ break;
+ case dml2_422_packed_12:
+ val = 0;
+ break;
+ default:
+ DML2_ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format)
+{
+ bool val = false;
+
+ switch (source_format) {
+ case dml2_444_8:
+ val = 0;
+ break;
+ case dml2_444_16:
+ val = 0;
+ break;
+ case dml2_444_32:
+ val = 0;
+ break;
+ case dml2_444_64:
+ val = 0;
+ break;
+ case dml2_420_8:
+ val = 0;
+ break;
+ case dml2_420_10:
+ val = 0;
+ break;
+ case dml2_420_12:
+ val = 0;
+ break;
+ case dml2_rgbe_alpha:
+ val = 0;
+ break;
+ case dml2_rgbe:
+ val = 0;
+ break;
+ case dml2_mono_8:
+ val = 0;
+ break;
+ case dml2_mono_16:
+ val = 0;
+ break;
+ case dml2_422_planar_8:
+ val = 1;
+ break;
+ case dml2_422_planar_10:
+ val = 1;
+ break;
+ case dml2_422_planar_12:
+ val = 1;
+ break;
+ case dml2_422_packed_8:
+ val = 0;
+ break;
+ case dml2_422_packed_10:
+ val = 0;
+ break;
+ case dml2_422_packed_12:
+ val = 0;
+ break;
+ default:
+ DML2_ASSERT(0);
+ break;
+ }
+ return val;
+}
+
+bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format)
+{
+ bool val = false;
+
+ switch (source_format) {
+ case dml2_444_8:
+ val = 0;
+ break;
+ case dml2_444_16:
+ val = 0;
+ break;
+ case dml2_444_32:
+ val = 0;
+ break;
+ case dml2_444_64:
+ val = 0;
+ break;
+ case dml2_420_8:
+ val = 0;
+ break;
+ case dml2_420_10:
+ val = 0;
+ break;
+ case dml2_420_12:
+ val = 0;
+ break;
+ case dml2_rgbe_alpha:
+ val = 0;
+ break;
+ case dml2_rgbe:
+ val = 0;
+ break;
+ case dml2_mono_8:
+ val = 0;
+ break;
+ case dml2_mono_16:
+ val = 0;
+ break;
+ case dml2_422_planar_8:
+ val = 0;
+ break;
+ case dml2_422_planar_10:
+ val = 0;
+ break;
+ case dml2_422_planar_12:
+ val = 0;
+ break;
+ case dml2_422_packed_8:
+ val = 1;
+ break;
+ case dml2_422_packed_10:
+ val = 1;
+ break;
+ case dml2_422_packed_12:
+ val = 1;
+ break;
default:
DML2_ASSERT(0);
break;
@@ -154,9 +298,9 @@ void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mod
dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
if (!fail_only || support->VRatioInPrefetchSupported == 0)
dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
- if (!fail_only || support->PTEBufferSizeNotExceeded == 1)
+ if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
- if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1)
+ if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
if (!fail_only || support->ExceededMALLSize == 1)
dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
@@ -280,39 +424,49 @@ bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_c
return is_phantom;
}
-unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
-{
- switch (sw_mode) {
- case (dml2_sw_linear):
- return 256; break;
- case (dml2_sw_256b_2d):
- return 256; break;
- case (dml2_sw_4kb_2d):
- return 4096; break;
- case (dml2_sw_64kb_2d):
- return 65536; break;
- case (dml2_sw_256kb_2d):
- return 262144; break;
- case (dml2_gfx11_sw_linear):
- return 256; break;
- case (dml2_gfx11_sw_64kb_d):
- return 65536; break;
- case (dml2_gfx11_sw_64kb_d_t):
- return 65536; break;
- case (dml2_gfx11_sw_64kb_d_x):
- return 65536; break;
- case (dml2_gfx11_sw_64kb_r_x):
- return 65536; break;
- case (dml2_gfx11_sw_256kb_d_x):
- return 262144; break;
- case (dml2_gfx11_sw_256kb_r_x):
- return 262144; break;
- default:
+unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel)
+{
+
+ if (sw_mode == dml2_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_sw_256b_2d)
+ return 256;
+ else if (sw_mode == dml2_sw_4kb_2d)
+ return 4096;
+ else if (sw_mode == dml2_sw_64kb_2d)
+ return 65536;
+ else if (sw_mode == dml2_sw_256kb_2d)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_linear)
+ return 256;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
+ return 65536;
+ else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
+ return 262144;
+ else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
+ return 262144;
+ else {
DML2_ASSERT(0);
return 256;
};
}
+bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel)
+{
+ return (byte_per_pixel != 2);
+}
+
+bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode)
+{
+ return (sw_mode == dml2_sw_linear || sw_mode == dml2_sw_linear_256b || sw_mode == dml2_linear_64elements);
+};
+
bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan)
{
@@ -325,7 +479,6 @@ bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan)
return is_vert;
}
-
int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode)
{
int unsigned version = 0;
@@ -334,17 +487,17 @@ int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode)
sw_mode == dml2_sw_256b_2d ||
sw_mode == dml2_sw_4kb_2d ||
sw_mode == dml2_sw_64kb_2d ||
- sw_mode == dml2_sw_256kb_2d) {
+ sw_mode == dml2_sw_256kb_2d)
version = 12;
- } else if (sw_mode == dml2_gfx11_sw_linear ||
+ else if (sw_mode == dml2_gfx11_sw_linear ||
sw_mode == dml2_gfx11_sw_64kb_d ||
sw_mode == dml2_gfx11_sw_64kb_d_t ||
sw_mode == dml2_gfx11_sw_64kb_d_x ||
sw_mode == dml2_gfx11_sw_64kb_r_x ||
sw_mode == dml2_gfx11_sw_256kb_d_x ||
- sw_mode == dml2_gfx11_sw_256kb_r_x) {
+ sw_mode == dml2_gfx11_sw_256kb_r_x)
version = 11;
- } else {
+ else {
dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
DML2_ASSERT(0);
}
@@ -403,7 +556,7 @@ bool dml2_core_utils_is_dual_plane(enum dml2_source_format_class source_format)
{
bool ret_val = 0;
- if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
+ if (dml2_core_utils_is_420(source_format) || dml2_core_utils_is_422_planar(source_format) || (source_format == dml2_rgbe_alpha))
ret_val = 1;
return ret_val;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h
index a5cc6a07167a..95f0d017add4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h
@@ -11,6 +11,8 @@
double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder);
const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type);
bool dml2_core_utils_is_420(enum dml2_source_format_class source_format);
+bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format);
+bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format);
void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only);
const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type);
void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg);
@@ -18,8 +20,10 @@ unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int mu
unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info);
void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane);
bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg);
-unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode);
+unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel);
+bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel);
bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan);
+bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode);
int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode);
unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params);
unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
index 8869ea089312..fc77fb34a19a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c
@@ -96,6 +96,7 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm
double min_uclk_latency;
const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
+ /* assumes DF throttling is enabled */
min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100);
@@ -125,6 +126,37 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm
in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency);
+
+ /* assumes DF throttling is disabled */
+ min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100);
+
+ min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config);
+ min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100);
+
+ min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg;
+
+ min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100);
+
+ min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes;
+ min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100);
+
+ min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg;
+
+ min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100);
+
+ min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes;
+ min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100);
+
+ min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg;
+
+ get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency);
+
+ in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency);
+ in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency);
+ in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency);
}
static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out)
@@ -272,6 +304,17 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr
if (result)
result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk);
+ /* these clocks are optional, so they can fail to map, in which case map all to 0 */
+ if (result) {
+ if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) ||
+ !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) ||
+ !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) {
+ display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0;
+ display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0;
+ display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0;
+ }
+ }
+
return result;
}
@@ -374,11 +417,11 @@ static bool map_min_clocks_to_dpm(const struct dml2_core_mode_support_result *mo
static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *display_config, int mask)
{
- unsigned char i;
+ unsigned int i;
bool identical = true;
bool contains_drr = false;
- unsigned char remap_array[DML2_MAX_PLANES];
- unsigned char remap_array_size = 0;
+ unsigned int remap_array[DML2_MAX_PLANES];
+ unsigned int remap_array_size = 0;
// Create a remap array to enable simple iteration through only masked stream indicies
for (i = 0; i < display_config->num_streams; i++) {
@@ -413,10 +456,10 @@ static bool are_timings_trivially_synchronizable(struct dml2_display_cfg *displa
static int find_smallest_idle_time_in_vblank_us(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out, int mask)
{
- unsigned char i;
+ unsigned int i;
int min_idle_us = 0;
- unsigned char remap_array[DML2_MAX_PLANES];
- unsigned char remap_array_size = 0;
+ unsigned int remap_array[DML2_MAX_PLANES];
+ unsigned int remap_array_size = 0;
const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result;
// Create a remap array to enable simple iteration through only masked stream indicies
@@ -711,7 +754,7 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
- dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
@@ -725,7 +768,7 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
- dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz);
+ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
index a31db5742675..e763c8e45da8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn3.c
@@ -195,11 +195,11 @@ static int count_planes_with_stream_index(const struct dml2_display_cfg *display
static bool are_timings_trivially_synchronizable(struct display_configuation_with_meta *display_config, int mask)
{
- unsigned char i;
+ unsigned int i;
bool identical = true;
bool contains_drr = false;
- unsigned char remap_array[DML2_MAX_PLANES];
- unsigned char remap_array_size = 0;
+ unsigned int remap_array[DML2_MAX_PLANES];
+ unsigned int remap_array_size = 0;
// Create a remap array to enable simple iteration through only masked stream indicies
for (i = 0; i < display_config->display_config.num_streams; i++) {
@@ -347,8 +347,12 @@ static int find_highest_odm_load_stream_index(
int odm_load, highest_odm_load = -1, highest_odm_load_index = -1;
for (i = 0; i < display_config->num_streams; i++) {
- odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz
+ if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0)
+ odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz
/ mode_support_result->cfg_support_info.stream_support_info[i].odms_used;
+ else
+ odm_load = 0;
+
if (odm_load > highest_odm_load) {
highest_odm_load_index = i;
highest_odm_load = odm_load;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
index 92269f0e50ed..a3324f7b9ba6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c
@@ -13,32 +13,32 @@ static const double MIN_BLANK_STUTTER_FACTOR = 3.0;
static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = {
// VActive Preferred
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
// Then SVP
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
// Then VBlank
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = false,
},
// Then DRR
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
// Finally VBlank, but allow base clocks for latency to increase
/*
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
*/
@@ -49,56 +49,56 @@ static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1
static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = {
// VActive only is preferred
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
// Then VActive + VBlank
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = false,
},
// Then VBlank only
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = false,
},
// Then SVP + VBlank
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = false,
},
// Then SVP + DRR
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
// Then SVP + SVP
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
// Then DRR + VActive
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
// Then DRR + DRR
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
// Finally VBlank, but allow base clocks for latency to increase
/*
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na },
.allow_state_increase = true,
},
*/
@@ -109,32 +109,32 @@ static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2
static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = {
// All VActive
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na },
.allow_state_increase = true,
},
// VActive + 1 VBlank
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na },
.allow_state_increase = false,
},
// All VBlank
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na },
.allow_state_increase = false,
},
// All DRR
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na },
.allow_state_increase = true,
},
// All VBlank, with state increase allowed
/*
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na },
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na },
.allow_state_increase = true,
},
*/
@@ -145,32 +145,32 @@ static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3
static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = {
// All VActive
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive },
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive },
.allow_state_increase = true,
},
// VActive + 1 VBlank
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank },
+ .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank },
.allow_state_increase = false,
},
// All Vblank
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank },
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank },
.allow_state_increase = false,
},
// All DRR
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr },
+ .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr },
.allow_state_increase = true,
},
// All VBlank, with state increase allowed
/*
{
- .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank },
+ .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank },
.allow_state_increase = true,
},
*/
@@ -355,29 +355,30 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o
return result;
}
-static enum dml2_pmo_pstate_method convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_method base_strategy)
+static enum dml2_pstate_method convert_strategy_to_drr_variant(const enum dml2_pstate_method base_strategy)
{
- enum dml2_pmo_pstate_method variant_strategy = 0;
+ enum dml2_pstate_method variant_strategy = 0;
switch (base_strategy) {
- case dml2_pmo_pstate_strategy_vactive:
- variant_strategy = dml2_pmo_pstate_strategy_fw_vactive_drr;
+ case dml2_pstate_method_vactive:
+ variant_strategy = dml2_pstate_method_fw_vactive_drr;
break;
- case dml2_pmo_pstate_strategy_vblank:
- variant_strategy = dml2_pmo_pstate_strategy_fw_vblank_drr;
+ case dml2_pstate_method_vblank:
+ variant_strategy = dml2_pstate_method_fw_vblank_drr;
break;
- case dml2_pmo_pstate_strategy_fw_svp:
- variant_strategy = dml2_pmo_pstate_strategy_fw_svp_drr;
+ case dml2_pstate_method_fw_svp:
+ variant_strategy = dml2_pstate_method_fw_svp_drr;
break;
- case dml2_pmo_pstate_strategy_fw_vactive_drr:
- case dml2_pmo_pstate_strategy_fw_vblank_drr:
- case dml2_pmo_pstate_strategy_fw_svp_drr:
- case dml2_pmo_pstate_strategy_fw_drr:
- case dml2_pmo_pstate_strategy_reserved_hw:
- case dml2_pmo_pstate_strategy_reserved_fw:
- case dml2_pmo_pstate_strategy_reserved_fw_drr_clamped:
- case dml2_pmo_pstate_strategy_reserved_fw_drr_var:
- case dml2_pmo_pstate_strategy_na:
+ case dml2_pstate_method_fw_vactive_drr:
+ case dml2_pstate_method_fw_vblank_drr:
+ case dml2_pstate_method_fw_svp_drr:
+ case dml2_pstate_method_fw_drr:
+ case dml2_pstate_method_reserved_hw:
+ case dml2_pstate_method_reserved_fw:
+ case dml2_pstate_method_reserved_fw_drr_clamped:
+ case dml2_pstate_method_reserved_fw_drr_var:
+ case dml2_pstate_method_count:
+ case dml2_pstate_method_na:
default:
/* no variant for this mode */
variant_strategy = base_strategy;
@@ -419,23 +420,22 @@ static unsigned int get_num_expanded_strategies(
static void insert_strategy_into_expanded_list(
const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy,
- int stream_count,
- struct dml2_pmo_init_data *init_data)
+ const int stream_count,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies)
{
- struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL;
-
- expanded_strategy_list = get_expanded_strategy_list(init_data, stream_count);
+ if (expanded_strategy_list && num_expanded_strategies) {
+ memcpy(&expanded_strategy_list[*num_expanded_strategies], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy));
- if (expanded_strategy_list) {
- memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy));
-
- init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++;
+ (*num_expanded_strategies)++;
}
}
-static void expand_base_strategy(struct dml2_pmo_instance *pmo,
+static void expand_base_strategy(
const struct dml2_pmo_pstate_strategy *base_strategy,
- unsigned int stream_count)
+ const unsigned int stream_count,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies)
{
bool skip_to_next_stream;
bool expanded_strategy_added;
@@ -473,7 +473,7 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo,
if (i >= stream_count - 1) {
/* insert into strategy list */
- insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, &pmo->init_data);
+ insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, expanded_strategy_list, num_expanded_strategies);
expanded_strategy_added = true;
} else {
/* skip to next stream */
@@ -512,9 +512,9 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo,
static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy,
const struct dml2_pmo_pstate_strategy *variant_strategy,
- unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS],
- unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS],
- unsigned int stream_count)
+ const unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS],
+ const unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS],
+ const unsigned int stream_count)
{
bool valid = true;
unsigned int i;
@@ -522,7 +522,7 @@ static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_
/* check all restrictions are met */
for (i = 0; i < stream_count; i++) {
/* vblank + vblank_drr variants are invalid */
- if (base_strategy->per_stream_pstate_method[i] == dml2_pmo_pstate_strategy_vblank &&
+ if (base_strategy->per_stream_pstate_method[i] == dml2_pstate_method_vblank &&
((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) ||
num_streams_per_variant_method[i] > 1)) {
valid = false;
@@ -533,9 +533,12 @@ static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_
return valid;
}
-static void expand_variant_strategy(struct dml2_pmo_instance *pmo,
+static void expand_variant_strategy(
const struct dml2_pmo_pstate_strategy *base_strategy,
- unsigned int stream_count)
+ const unsigned int stream_count,
+ const bool should_permute,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies)
{
bool variant_found;
unsigned int i, j;
@@ -544,7 +547,7 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo,
unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 };
unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 };
unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 };
- enum dml2_pmo_pstate_method per_stream_variant_method[DML2_MAX_PLANES];
+ enum dml2_pstate_method per_stream_variant_method[DML2_MAX_PLANES];
struct dml2_pmo_pstate_strategy variant_strategy = { 0 };
/* determine number of displays per method */
@@ -585,7 +588,13 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo,
}
if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) {
- expand_base_strategy(pmo, &variant_strategy, stream_count);
+ if (should_permute) {
+ /* permutations are permitted, proceed to expand */
+ expand_base_strategy(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies);
+ } else {
+ /* no permutations allowed, so add to list now */
+ insert_strategy_into_expanded_list(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies);
+ }
}
/* rollback to earliest method with bases remaining */
@@ -612,18 +621,19 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo,
}
}
-static void expand_base_strategies(
- struct dml2_pmo_instance *pmo,
- const struct dml2_pmo_pstate_strategy *base_strategies_list,
- const unsigned int num_base_strategies,
- unsigned int stream_count)
+void pmo_dcn4_fams2_expand_base_pstate_strategies(
+ const struct dml2_pmo_pstate_strategy *base_strategies_list,
+ const unsigned int num_base_strategies,
+ const unsigned int stream_count,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies)
{
unsigned int i;
/* expand every explicit base strategy (except all DRR) */
for (i = 0; i < num_base_strategies; i++) {
- expand_base_strategy(pmo, &base_strategies_list[i], stream_count);
- expand_variant_strategy(pmo, &base_strategies_list[i], stream_count);
+ expand_base_strategy(&base_strategies_list[i], stream_count, expanded_strategy_list, num_expanded_strategies);
+ expand_variant_strategy(&base_strategies_list[i], stream_count, true, expanded_strategy_list, num_expanded_strategies);
}
}
@@ -652,25 +662,45 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out)
DML2_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
/* populate list */
- expand_base_strategies(pmo, base_strategy_list_1_display, base_strategy_list_1_display_size, 1);
+ pmo_dcn4_fams2_expand_base_pstate_strategies(
+ base_strategy_list_1_display,
+ base_strategy_list_1_display_size,
+ i,
+ pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display,
+ &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
break;
case 2:
DML2_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
/* populate list */
- expand_base_strategies(pmo, base_strategy_list_2_display, base_strategy_list_2_display_size, 2);
+ pmo_dcn4_fams2_expand_base_pstate_strategies(
+ base_strategy_list_2_display,
+ base_strategy_list_2_display_size,
+ i,
+ pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display,
+ &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
break;
case 3:
DML2_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
/* populate list */
- expand_base_strategies(pmo, base_strategy_list_3_display, base_strategy_list_3_display_size, 3);
+ pmo_dcn4_fams2_expand_base_pstate_strategies(
+ base_strategy_list_3_display,
+ base_strategy_list_3_display_size,
+ i,
+ pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display,
+ &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
break;
case 4:
DML2_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES);
/* populate list */
- expand_base_strategies(pmo, base_strategy_list_4_display, base_strategy_list_4_display_size, 4);
+ pmo_dcn4_fams2_expand_base_pstate_strategies(
+ base_strategy_list_4_display,
+ base_strategy_list_4_display_size,
+ i,
+ pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display,
+ &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]);
break;
}
}
@@ -783,8 +813,12 @@ static int find_highest_odm_load_stream_index(
int odm_load, highest_odm_load = -1, highest_odm_load_index = -1;
for (i = 0; i < display_config->num_streams; i++) {
- odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz
+ if (mode_support_result->cfg_support_info.stream_support_info[i].odms_used > 0)
+ odm_load = display_config->stream_descriptors[i].timing.pixel_clock_khz
/ mode_support_result->cfg_support_info.stream_support_info[i].odms_used;
+ else
+ odm_load = 0;
+
if (odm_load > highest_odm_load) {
highest_odm_load_index = i;
highest_odm_load = odm_load;
@@ -941,11 +975,8 @@ static void build_synchronized_timing_groups(
/* find synchronizable timing groups */
for (j = i + 1; j < display_config->display_config.num_streams; j++) {
if (memcmp(master_timing,
- &display_config->display_config.stream_descriptors[j].timing,
- sizeof(struct dml2_timing_cfg)) == 0 &&
- display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder &&
- (display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match
- display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) {
+ &display_config->display_config.stream_descriptors[j].timing,
+ sizeof(struct dml2_timing_cfg)) == 0) {
set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j);
set_bit_in_bitfield(&stream_mapped_mask, j);
}
@@ -959,7 +990,7 @@ static bool all_timings_support_vactive(const struct dml2_pmo_instance *pmo,
const struct display_configuation_with_meta *display_config,
unsigned int mask)
{
- unsigned char i;
+ unsigned int i;
bool valid = true;
// Create a remap array to enable simple iteration through only masked stream indicies
@@ -1008,7 +1039,7 @@ static bool all_timings_support_drr(const struct dml2_pmo_instance *pmo,
const struct display_configuation_with_meta *display_config,
unsigned int mask)
{
- unsigned char i;
+ unsigned int i;
for (i = 0; i < DML2_MAX_PLANES; i++) {
const struct dml2_stream_parameters *stream_descriptor;
const struct dml2_fams2_meta *stream_fams2_meta;
@@ -1050,7 +1081,7 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo,
const struct dml2_plane_parameters *plane_descriptor;
const struct dml2_fams2_meta *stream_fams2_meta;
unsigned int microschedule_vlines;
- unsigned char i;
+ unsigned int i;
unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 };
@@ -1106,24 +1137,73 @@ static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *ps
scratch->pmo_dcn4.num_pstate_candidates++;
}
-static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_method method)
+static enum dml2_pstate_method uclk_pstate_strategy_override_to_pstate_method(const enum dml2_uclk_pstate_change_strategy override_strategy)
{
- unsigned char i;
- enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na;
+ enum dml2_pstate_method method = dml2_pstate_method_na;
- if (method == dml2_pmo_pstate_strategy_vactive || method == dml2_pmo_pstate_strategy_fw_vactive_drr)
- matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive;
- else if (method == dml2_pmo_pstate_strategy_vblank || method == dml2_pmo_pstate_strategy_fw_vblank_drr)
- matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank;
- else if (method == dml2_pmo_pstate_strategy_fw_svp)
- matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp;
- else if (method == dml2_pmo_pstate_strategy_fw_drr)
- matching_strategy = dml2_uclk_pstate_change_strategy_force_drr;
+ switch (override_strategy) {
+ case dml2_uclk_pstate_change_strategy_force_vactive:
+ method = dml2_pstate_method_vactive;
+ break;
+ case dml2_uclk_pstate_change_strategy_force_vblank:
+ method = dml2_pstate_method_vblank;
+ break;
+ case dml2_uclk_pstate_change_strategy_force_drr:
+ method = dml2_pstate_method_fw_drr;
+ break;
+ case dml2_uclk_pstate_change_strategy_force_mall_svp:
+ method = dml2_pstate_method_fw_svp;
+ break;
+ case dml2_uclk_pstate_change_strategy_force_mall_full_frame:
+ case dml2_uclk_pstate_change_strategy_auto:
+ default:
+ method = dml2_pstate_method_na;
+ }
+
+ return method;
+}
+
+static enum dml2_uclk_pstate_change_strategy pstate_method_to_uclk_pstate_strategy_override(const enum dml2_pstate_method method)
+{
+ enum dml2_uclk_pstate_change_strategy override_strategy = dml2_uclk_pstate_change_strategy_auto;
+
+ switch (method) {
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
+ override_strategy = dml2_uclk_pstate_change_strategy_force_vactive;
+ break;
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
+ override_strategy = dml2_uclk_pstate_change_strategy_force_vblank;
+ break;
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
+ override_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp;
+ break;
+ case dml2_pstate_method_fw_drr:
+ override_strategy = dml2_uclk_pstate_change_strategy_force_drr;
+ break;
+ case dml2_pstate_method_reserved_hw:
+ case dml2_pstate_method_reserved_fw:
+ case dml2_pstate_method_reserved_fw_drr_clamped:
+ case dml2_pstate_method_reserved_fw_drr_var:
+ case dml2_pstate_method_count:
+ case dml2_pstate_method_na:
+ default:
+ override_strategy = dml2_uclk_pstate_change_strategy_auto;
+ }
+
+ return override_strategy;
+}
+
+static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method)
+{
+ unsigned int i;
for (i = 0; i < DML2_MAX_PLANES; i++) {
if (is_bit_set_in_bitfield(plane_mask, i)) {
if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto &&
- display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy)
+ display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != pstate_method_to_uclk_pstate_strategy_override(method))
return false;
}
}
@@ -1149,32 +1229,33 @@ static void build_method_scheduling_params(
static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta(
struct dml2_pmo_instance *pmo,
- enum dml2_pmo_pstate_method stream_pstate_method,
+ enum dml2_pstate_method stream_pstate_method,
int stream_idx)
{
struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL;
switch (stream_pstate_method) {
- case dml2_pmo_pstate_strategy_vactive:
- case dml2_pmo_pstate_strategy_fw_vactive_drr:
+ case dml2_pstate_method_vactive:
+ case dml2_pstate_method_fw_vactive_drr:
stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common;
break;
- case dml2_pmo_pstate_strategy_vblank:
- case dml2_pmo_pstate_strategy_fw_vblank_drr:
+ case dml2_pstate_method_vblank:
+ case dml2_pstate_method_fw_vblank_drr:
stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vblank.common;
break;
- case dml2_pmo_pstate_strategy_fw_svp:
- case dml2_pmo_pstate_strategy_fw_svp_drr:
+ case dml2_pstate_method_fw_svp:
+ case dml2_pstate_method_fw_svp_drr:
stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_subvp.common;
break;
- case dml2_pmo_pstate_strategy_fw_drr:
+ case dml2_pstate_method_fw_drr:
stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_drr.common;
break;
- case dml2_pmo_pstate_strategy_reserved_hw:
- case dml2_pmo_pstate_strategy_reserved_fw:
- case dml2_pmo_pstate_strategy_reserved_fw_drr_clamped:
- case dml2_pmo_pstate_strategy_reserved_fw_drr_var:
- case dml2_pmo_pstate_strategy_na:
+ case dml2_pstate_method_reserved_hw:
+ case dml2_pstate_method_reserved_fw:
+ case dml2_pstate_method_reserved_fw_drr_clamped:
+ case dml2_pstate_method_reserved_fw_drr_var:
+ case dml2_pstate_method_count:
+ case dml2_pstate_method_na:
default:
stream_method_fams2_meta = NULL;
}
@@ -1215,7 +1296,7 @@ static bool is_timing_group_schedulable(
if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) {
stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i);
if (!stream_method_fams2_meta)
- return false;
+ continue;
if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) {
/* set group allow start to larger otg vline */
@@ -1295,7 +1376,7 @@ static bool is_config_schedulable(
if (j_disallow_us < jp1_disallow_us) {
/* swap as A < B */
swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j],
- s->pmo_dcn4.sorted_group_gtl_disallow_index[j+1]);
+ s->pmo_dcn4.sorted_group_gtl_disallow_index[j + 1]);
swapped = true;
}
}
@@ -1354,7 +1435,7 @@ static bool is_config_schedulable(
if (j_period_us < jp1_period_us) {
/* swap as A < B */
swap(s->pmo_dcn4.sorted_group_gtl_period_index[j],
- s->pmo_dcn4.sorted_group_gtl_period_index[j+1]);
+ s->pmo_dcn4.sorted_group_gtl_period_index[j + 1]);
swapped = true;
}
}
@@ -1413,7 +1494,7 @@ static bool is_config_schedulable(
static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo,
const struct display_configuation_with_meta *display_cfg,
- const enum dml2_pmo_pstate_method stream_pstate_method,
+ const enum dml2_pstate_method stream_pstate_method,
unsigned int stream_index)
{
const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index];
@@ -1468,7 +1549,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins
{
struct dml2_pmo_scratch *s = &pmo->scratch;
- unsigned char stream_index = 0;
+ unsigned int stream_index = 0;
unsigned int svp_count = 0;
unsigned int svp_stream_mask = 0;
@@ -1494,19 +1575,19 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins
strategy_matches_drr_requirements &=
stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index);
- if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp ||
- pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) {
+ if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp ||
+ pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) {
svp_count++;
set_bit_in_bitfield(&svp_stream_mask, stream_index);
- } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) {
+ } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) {
drr_count++;
set_bit_in_bitfield(&drr_stream_mask, stream_index);
- } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive ||
- pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) {
+ } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive ||
+ pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) {
vactive_count++;
set_bit_in_bitfield(&vactive_stream_mask, stream_index);
- } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank ||
- pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) {
+ } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank ||
+ pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) {
vblank_count++;
set_bit_in_bitfield(&vblank_stream_mask, stream_index);
}
@@ -1532,7 +1613,7 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins
static int get_vactive_pstate_margin(const struct display_configuation_with_meta *display_cfg, int plane_mask)
{
- unsigned char i;
+ unsigned int i;
int min_vactive_margin_us = 0xFFFFFFF;
for (i = 0; i < DML2_MAX_PLANES; i++) {
@@ -1625,7 +1706,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo,
/* for single stream, guarantee at least an instant of allow */
stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor(
math_max2(0.0,
- timing->v_active - stream_fams2_meta->min_allow_width_otg_vlines - stream_fams2_meta->dram_clk_change_blackout_otg_vlines));
+ timing->v_active - math_max2(1.0, stream_fams2_meta->min_allow_width_otg_vlines) - stream_fams2_meta->dram_clk_change_blackout_otg_vlines));
} else {
/* for multi stream, bound to a max fill time defined by IP caps */
stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines =
@@ -1738,8 +1819,10 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp
struct display_configuation_with_meta *display_config;
const struct dml2_plane_parameters *plane_descriptor;
const struct dml2_pmo_pstate_strategy *strategy_list = NULL;
+ struct dml2_pmo_pstate_strategy override_base_strategy = { 0 };
unsigned int strategy_list_size = 0;
- unsigned char plane_index, stream_index, i;
+ unsigned int plane_index, stream_index, i;
+ bool build_override_strategy = true;
state->performed = true;
in_out->base_display_config->stage3.min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency;
@@ -1763,7 +1846,11 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp
set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index);
- state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive;
+ state->pstate_switch_modes[plane_index] = dml2_pstate_method_vactive;
+
+ build_override_strategy &= plane_descriptor->overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto;
+ override_base_strategy.per_stream_pstate_method[plane_descriptor->stream_index] =
+ uclk_pstate_strategy_override_to_pstate_method(plane_descriptor->overrides.uclk_pstate_change_strategy);
}
// Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta
@@ -1781,13 +1868,30 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp
/* get synchronized timing groups */
build_synchronized_timing_groups(pmo, display_config);
- strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams);
- if (!strategy_list)
- return false;
-
- strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams);
+ if (build_override_strategy) {
+ /* build expanded override strategy list (no permutations) */
+ override_base_strategy.allow_state_increase = true;
+ s->pmo_dcn4.num_expanded_override_strategies = 0;
+ insert_strategy_into_expanded_list(&override_base_strategy,
+ display_config->display_config.num_streams,
+ s->pmo_dcn4.expanded_override_strategy_list,
+ &s->pmo_dcn4.num_expanded_override_strategies);
+ expand_variant_strategy(&override_base_strategy,
+ display_config->display_config.num_streams,
+ false,
+ s->pmo_dcn4.expanded_override_strategy_list,
+ &s->pmo_dcn4.num_expanded_override_strategies);
+
+ /* use override strategy list */
+ strategy_list = s->pmo_dcn4.expanded_override_strategy_list;
+ strategy_list_size = s->pmo_dcn4.num_expanded_override_strategies;
+ } else {
+ /* use predefined strategy list */
+ strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams);
+ strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams);
+ }
- if (strategy_list_size == 0)
+ if (!strategy_list || strategy_list_size == 0)
return false;
s->pmo_dcn4.num_pstate_candidates = 0;
@@ -1799,7 +1903,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp
}
if (s->pmo_dcn4.num_pstate_candidates > 0) {
- s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates - 1].allow_state_increase = true;
+ s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates-1].allow_state_increase = true;
s->pmo_dcn4.cur_pstate_candidate = -1;
return true;
} else {
@@ -1832,7 +1936,7 @@ static void reset_display_configuration(struct display_configuation_with_meta *d
// Reset strategy to auto
plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto;
- display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_na;
}
}
@@ -1840,7 +1944,7 @@ static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *
struct dml2_pmo_instance *pmo,
int plane_mask)
{
- unsigned char plane_index;
+ unsigned int plane_index;
struct dml2_plane_parameters *plane;
for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
@@ -1849,7 +1953,7 @@ static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta *
plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr;
- display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_drr;
}
}
@@ -1861,13 +1965,13 @@ static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta *
{
struct dml2_pmo_scratch *scratch = &pmo->scratch;
- unsigned char plane_index;
+ unsigned int plane_index;
int stream_index = -1;
for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index;
- display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp;
}
}
@@ -1884,13 +1988,13 @@ static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_me
{
struct dml2_pmo_scratch *scratch = &pmo->scratch;
- unsigned char plane_index;
+ unsigned int plane_index;
int stream_index = -1;
for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index;
- display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom_drr;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp_drr;
}
}
@@ -1905,7 +2009,7 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met
struct dml2_pmo_instance *pmo,
int plane_mask)
{
- unsigned char plane_index;
+ unsigned int plane_index;
struct dml2_plane_parameters *plane;
for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
@@ -1915,7 +2019,7 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met
plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0,
plane->overrides.reserved_vblank_time_ns);
- display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vblank;
}
}
@@ -1925,7 +2029,7 @@ static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with
struct dml2_pmo_instance *pmo,
int plane_mask)
{
- unsigned char plane_index;
+ unsigned int plane_index;
struct dml2_plane_parameters *plane;
for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
@@ -1933,7 +2037,7 @@ static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with
plane = &display_config->display_config.plane_descriptors[plane_index];
plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000);
- display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_vblank_drr;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vblank_drr;
}
}
}
@@ -1942,14 +2046,14 @@ static void setup_planes_for_vactive_by_mask(struct display_configuation_with_me
struct dml2_pmo_instance *pmo,
int plane_mask)
{
- unsigned char plane_index;
+ unsigned int plane_index;
unsigned int stream_index;
for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index;
- display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vactive;
if (!pmo->options->disable_vactive_det_fill_bw_pad) {
display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us =
@@ -1963,14 +2067,14 @@ static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_wit
struct dml2_pmo_instance *pmo,
int plane_mask)
{
- unsigned char plane_index;
+ unsigned int plane_index;
unsigned int stream_index;
for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index;
- display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_vactive_drr;
+ display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vactive_drr;
if (!pmo->options->disable_vactive_det_fill_bw_pad) {
display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us =
@@ -1992,26 +2096,26 @@ static bool setup_display_config(struct display_configuation_with_meta *display_
for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) {
- if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) {
+ if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) {
success = false;
break;
- } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive) {
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive) {
setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
- } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank) {
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank) {
setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
- } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp) {
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp) {
fams2_required = true;
setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
- } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) {
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) {
fams2_required = true;
setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
- } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) {
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) {
fams2_required = true;
setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
- } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) {
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) {
fams2_required = true;
setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
- } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) {
+ } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) {
fams2_required = true;
setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]);
}
@@ -2031,7 +2135,7 @@ static bool setup_display_config(struct display_configuation_with_meta *display_
static int get_minimum_reserved_time_us_for_planes(struct display_configuation_with_meta *display_config, int plane_mask)
{
int min_time_us = 0xFFFFFF;
- unsigned char plane_index = 0;
+ unsigned int plane_index = 0;
for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) {
if (is_bit_set_in_bitfield(plane_mask, plane_index)) {
@@ -2066,34 +2170,34 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp
for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) {
struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index];
- if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive ||
- s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) {
+ if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive ||
+ s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) {
if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) ||
get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) {
p_state_supported = false;
break;
}
- } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank ||
- s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) {
+ } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank ||
+ s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) {
if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) <
REQUIRED_RESERVED_TIME ||
get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) {
p_state_supported = false;
break;
}
- } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp ||
- s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) {
+ } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp ||
+ s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) {
if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) {
p_state_supported = false;
break;
}
- } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) {
- if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) ||
+ } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) {
+ if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pstate_method_fw_drr) ||
get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) {
p_state_supported = false;
break;
}
- } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) {
+ } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) {
p_state_supported = false;
break;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h
index 0c25bd3e9ac0..6baab7ad6ecc 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h
@@ -23,4 +23,11 @@ bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in
bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out);
bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out);
+void pmo_dcn4_fams2_expand_base_pstate_strategies(
+ const struct dml2_pmo_pstate_strategy *base_strategies_list,
+ const unsigned int num_base_strategies,
+ const unsigned int stream_count,
+ struct dml2_pmo_pstate_strategy *expanded_strategy_list,
+ unsigned int *num_expanded_strategies);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c
index add51d41a515..7ed0242a4b33 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c
@@ -72,7 +72,6 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance *
out->init_for_stutter = pmo_dcn4_fams2_init_for_stutter;
out->test_for_stutter = pmo_dcn4_fams2_test_for_stutter;
out->optimize_for_stutter = pmo_dcn4_fams2_optimize_for_stutter;
-
result = true;
break;
case dml2_project_invalid:
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c
new file mode 100644
index 000000000000..f88931ccbc5e
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml_top.h"
+#include "dml2_internal_shared_types.h"
+#include "dml2_top_soc15.h"
+
+unsigned int dml2_get_instance_size_bytes(void)
+{
+ return sizeof(struct dml2_instance);
+}
+
+bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out)
+{
+ switch (in_out->options.project_id) {
+ case dml2_project_dcn4x_stage1:
+ case dml2_project_dcn4x_stage2:
+ case dml2_project_dcn4x_stage2_auto_drr_svp:
+ return dml2_top_soc15_initialize_instance(in_out);
+ case dml2_project_invalid:
+ default:
+ return false;
+ }
+}
+
+bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out)
+{
+ if (!in_out->dml2_instance->funcs.check_mode_supported)
+ return false;
+
+ return in_out->dml2_instance->funcs.check_mode_supported(in_out);
+}
+
+bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out)
+{
+ if (!in_out->dml2_instance->funcs.build_mode_programming)
+ return false;
+
+ return in_out->dml2_instance->funcs.build_mode_programming(in_out);
+}
+
+bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out)
+{
+ if (!in_out->dml2_instance->funcs.build_mcache_programming)
+ return false;
+
+ return in_out->dml2_instance->funcs.build_mcache_programming(in_out);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c
new file mode 100644
index 000000000000..5e14d85821e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_top_legacy.h"
+#include "dml2_top_soc15.h"
+#include "dml2_core_factory.h"
+#include "dml2_pmo_factory.h"
+#include "display_mode_core_structs.h"
+
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h
new file mode 100644
index 000000000000..14d0ae03dce6
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#ifndef __DML2_TOP_LEGACY_H__
+#define __DML2_TOP_LEGACY_H__
+#include "dml2_internal_shared_types.h"
+bool dml2_top_legacy_initialize_instance(struct dml2_initialize_instance_in_out *in_out);
+#endif /* __DML2_TOP_LEGACY_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c
deleted file mode 100644
index d0e026d981b5..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c
+++ /dev/null
@@ -1,307 +0,0 @@
-// SPDX-License-Identifier: MIT
-//
-// Copyright 2024 Advanced Micro Devices, Inc.
-
-#include "dml2_top_optimization.h"
-#include "dml2_internal_shared_types.h"
-#include "dml_top_mcache.h"
-
-static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src)
-{
- memcpy(dst, src, sizeof(struct display_configuation_with_meta));
-}
-
-bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params)
-{
- struct dml2_optimization_stage1_state *state = &params->display_config->stage1;
-
- state->performed = true;
-
- return true;
-}
-
-bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params)
-{
- struct dml2_optimization_stage1_state *state = &params->display_config->stage1;
-
- return state->min_clk_index_for_latency == 0;
-}
-
-bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params)
-{
- bool result = false;
-
- if (params->display_config->stage1.min_clk_index_for_latency > 0) {
- copy_display_configuration_with_meta(params->optimized_display_config, params->display_config);
- params->optimized_display_config->stage1.min_clk_index_for_latency--;
- result = true;
- }
-
- return result;
-}
-
-bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params)
-{
- struct dml2_optimization_test_function_locals *l = params->locals;
- bool mcache_success = false;
- bool result = false;
-
- memset(l, 0, sizeof(struct dml2_optimization_test_function_locals));
-
- l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml;
- l->test_mcache.calc_mcache_count_params.display_config = &params->display_config->display_config;
- l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations;
-
- result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations
-
- if (result) {
- l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations;
- l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes;
-
- dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params);
-
- l->test_mcache.validate_admissibility_params.dml2_instance = params->dml;
- l->test_mcache.validate_admissibility_params.display_cfg = &params->display_config->display_config;
- l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations;
- l->test_mcache.validate_admissibility_params.cfg_support_info = &params->display_config->mode_support_result.cfg_support_info;
-
- mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works
-
- memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES);
- }
-
- return mcache_success;
-}
-
-bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params)
-{
- struct dml2_optimization_optimize_function_locals *l = params->locals;
- bool optimize_success = false;
-
- if (params->last_candidate_supported == false)
- return false;
-
- copy_display_configuration_with_meta(params->optimized_display_config, params->display_config);
-
- l->optimize_mcache.optimize_mcache_params.instance = &params->dml->pmo_instance;
- l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support;
- l->optimize_mcache.optimize_mcache_params.display_config = &params->display_config->display_config;
- l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = &params->optimized_display_config->display_config;
- l->optimize_mcache.optimize_mcache_params.cfg_support_info = &params->optimized_display_config->mode_support_result.cfg_support_info;
-
- optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params);
-
- return optimize_success;
-}
-
-bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params)
-{
- struct dml2_optimization_init_function_locals *l = params->locals;
-
- l->vmin.init_params.instance = &params->dml->pmo_instance;
- l->vmin.init_params.base_display_config = params->display_config;
- return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params);
-}
-
-bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params)
-{
- struct dml2_optimization_test_function_locals *l = params->locals;
-
- l->test_vmin.pmo_test_vmin_params.instance = &params->dml->pmo_instance;
- l->test_vmin.pmo_test_vmin_params.display_config = params->display_config;
- l->test_vmin.pmo_test_vmin_params.vmin_limits = &params->dml->soc_bbox.vmin_limit;
- return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params);
-}
-
-bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params)
-{
- struct dml2_optimization_optimize_function_locals *l = params->locals;
-
- if (params->last_candidate_supported == false)
- return false;
-
- l->optimize_vmin.pmo_optimize_vmin_params.instance = &params->dml->pmo_instance;
- l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config;
- l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config;
- return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params);
-}
-
-bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params)
-{
- bool test_passed = false;
- bool optimize_succeeded = true;
- bool candidate_validation_passed = true;
- struct optimization_init_function_params init_params = { 0 };
- struct optimization_test_function_params test_params = { 0 };
- struct optimization_optimize_function_params optimize_params = { 0 };
-
- if (!params->dml ||
- !params->optimize_function ||
- !params->test_function ||
- !params->display_config ||
- !params->optimized_display_config)
- return false;
-
- copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config);
-
- init_params.locals = &l->init_function_locals;
- init_params.dml = params->dml;
- init_params.display_config = &l->cur_candidate_display_cfg;
-
- if (params->init_function && !params->init_function(&init_params))
- return false;
-
- test_params.locals = &l->test_function_locals;
- test_params.dml = params->dml;
- test_params.display_config = &l->cur_candidate_display_cfg;
-
- test_passed = params->test_function(&test_params);
-
- while (!test_passed && optimize_succeeded) {
- memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params));
-
- optimize_params.locals = &l->optimize_function_locals;
- optimize_params.dml = params->dml;
- optimize_params.display_config = &l->cur_candidate_display_cfg;
- optimize_params.optimized_display_config = &l->next_candidate_display_cfg;
- optimize_params.last_candidate_supported = candidate_validation_passed;
-
- optimize_succeeded = params->optimize_function(&optimize_params);
-
- if (optimize_succeeded) {
- l->mode_support_params.instance = &params->dml->core_instance;
- l->mode_support_params.display_cfg = &l->next_candidate_display_cfg;
- l->mode_support_params.min_clk_table = &params->dml->min_clk_table;
-
- if (l->next_candidate_display_cfg.stage3.performed)
- l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency;
- else
- l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency;
-
- candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params);
-
- l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result;
- }
-
- if (optimize_succeeded && candidate_validation_passed) {
- memset(&test_params, 0, sizeof(struct optimization_test_function_params));
- test_params.locals = &l->test_function_locals;
- test_params.dml = params->dml;
- test_params.display_config = &l->next_candidate_display_cfg;
- test_passed = params->test_function(&test_params);
-
- copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg);
-
- // If optimization is not all or nothing, then store partial progress in output
- if (!params->all_or_nothing)
- copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg);
- }
- }
-
- if (test_passed)
- copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg);
-
- return test_passed;
-}
-
-bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params)
-{
- int highest_state, lowest_state, cur_state;
- bool supported = false;
-
- if (!params->dml ||
- !params->optimize_function ||
- !params->test_function ||
- !params->display_config ||
- !params->optimized_display_config)
- return false;
-
- copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config);
- highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency;
- lowest_state = 0;
-
- while (highest_state > lowest_state) {
- cur_state = (highest_state + lowest_state) / 2;
-
- l->mode_support_params.instance = &params->dml->core_instance;
- l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg;
- l->mode_support_params.min_clk_table = &params->dml->min_clk_table;
- l->mode_support_params.min_clk_index = cur_state;
-
- supported = params->dml->core_instance.mode_support(&l->mode_support_params);
-
- if (supported) {
- l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result;
- highest_state = cur_state;
- } else {
- lowest_state = cur_state + 1;
- }
- }
- l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state;
-
- copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg);
-
- return true;
-}
-
-bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params)
-{
- struct dml2_optimization_init_function_locals *l = params->locals;
-
- l->uclk_pstate.init_params.instance = &params->dml->pmo_instance;
- l->uclk_pstate.init_params.base_display_config = params->display_config;
-
- return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params);
-}
-
-bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params)
-{
- struct dml2_optimization_test_function_locals *l = params->locals;
-
- l->uclk_pstate.test_params.instance = &params->dml->pmo_instance;
- l->uclk_pstate.test_params.base_display_config = params->display_config;
-
- return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params);
-}
-
-bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params)
-{
- struct dml2_optimization_optimize_function_locals *l = params->locals;
-
- l->uclk_pstate.optimize_params.instance = &params->dml->pmo_instance;
- l->uclk_pstate.optimize_params.base_display_config = params->display_config;
- l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config;
- l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported;
-
- return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params);
-}
-
-bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params)
-{
- struct dml2_optimization_init_function_locals *l = params->locals;
-
- l->uclk_pstate.init_params.instance = &params->dml->pmo_instance;
- l->uclk_pstate.init_params.base_display_config = params->display_config;
-
- return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params);
-}
-
-bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params)
-{
- struct dml2_optimization_test_function_locals *l = params->locals;
-
- l->stutter.stutter_params.instance = &params->dml->pmo_instance;
- l->stutter.stutter_params.base_display_config = params->display_config;
- return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params);
-}
-
-bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params)
-{
- struct dml2_optimization_optimize_function_locals *l = params->locals;
-
- l->stutter.stutter_params.instance = &params->dml->pmo_instance;
- l->stutter.stutter_params.base_display_config = params->display_config;
- l->stutter.stutter_params.optimized_display_config = params->optimized_display_config;
- l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported;
- return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params);
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h
deleted file mode 100644
index 9f22ab33eab1..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: MIT
-//
-// Copyright 2024 Advanced Micro Devices, Inc.
-
-#ifndef __DML2_TOP_OPTIMIZATION_H__
-#define __DML2_TOP_OPTIMIZATION_H__
-
-#include "dml2_external_lib_deps.h"
-#include "dml2_internal_shared_types.h"
-
-bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params);
-bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params);
-
-bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params);
-bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params);
-bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params);
-
-bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params);
-bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params);
-
-bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params);
-bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params);
-bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params);
-
-bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params);
-bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params);
-bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params);
-
-bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params);
-bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params);
-bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params);
-
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c
new file mode 100644
index 000000000000..a8f58f8448e4
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c
@@ -0,0 +1,1178 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright 2024 Advanced Micro Devices, Inc.
+
+#include "dml2_top_soc15.h"
+#include "dml2_mcg_factory.h"
+#include "dml2_dpmm_factory.h"
+#include "dml2_core_factory.h"
+#include "dml2_pmo_factory.h"
+#include "lib_float_math.h"
+#include "dml2_debug.h"
+static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config)
+{
+ memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg));
+ out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1;
+}
+
+static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config)
+{
+ memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg));
+ out->stage1.min_clk_index_for_latency = 0;
+}
+
+static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src)
+{
+ memcpy(dst, src, sizeof(struct display_configuation_with_meta));
+}
+
+static bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params)
+{
+ struct dml2_optimization_stage1_state *state = &params->display_config->stage1;
+
+ state->performed = true;
+
+ return true;
+}
+
+static bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_stage1_state *state = &params->display_config->stage1;
+
+ return state->min_clk_index_for_latency == 0;
+}
+
+static bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params)
+{
+ bool result = false;
+
+ if (params->display_config->stage1.min_clk_index_for_latency > 0) {
+ copy_display_configuration_with_meta(params->optimized_display_config, params->display_config);
+ params->optimized_display_config->stage1.min_clk_index_for_latency--;
+ result = true;
+ }
+
+ return result;
+}
+
+static bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_test_function_locals *l = params->locals;
+ bool mcache_success = false;
+ bool result = false;
+
+ memset(l, 0, sizeof(struct dml2_optimization_test_function_locals));
+
+ l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml;
+ l->test_mcache.calc_mcache_count_params.display_config = &params->display_config->display_config;
+ l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations;
+
+ result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations
+
+ if (result) {
+ l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations;
+ l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes;
+
+ dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params);
+
+ l->test_mcache.validate_admissibility_params.dml2_instance = params->dml;
+ l->test_mcache.validate_admissibility_params.display_cfg = &params->display_config->display_config;
+ l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations;
+ l->test_mcache.validate_admissibility_params.cfg_support_info = &params->display_config->mode_support_result.cfg_support_info;
+
+ mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works
+
+ memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES);
+ }
+
+ return mcache_success;
+}
+
+static bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params)
+{
+ struct dml2_optimization_optimize_function_locals *l = params->locals;
+ bool optimize_success = false;
+
+ if (params->last_candidate_supported == false)
+ return false;
+
+ copy_display_configuration_with_meta(params->optimized_display_config, params->display_config);
+
+ l->optimize_mcache.optimize_mcache_params.instance = &params->dml->pmo_instance;
+ l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support;
+ l->optimize_mcache.optimize_mcache_params.display_config = &params->display_config->display_config;
+ l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = &params->optimized_display_config->display_config;
+ l->optimize_mcache.optimize_mcache_params.cfg_support_info = &params->optimized_display_config->mode_support_result.cfg_support_info;
+
+ optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params);
+
+ return optimize_success;
+}
+
+static bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params)
+{
+ struct dml2_optimization_init_function_locals *l = params->locals;
+
+ l->vmin.init_params.instance = &params->dml->pmo_instance;
+ l->vmin.init_params.base_display_config = params->display_config;
+ return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params);
+}
+
+static bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_test_function_locals *l = params->locals;
+
+ l->test_vmin.pmo_test_vmin_params.instance = &params->dml->pmo_instance;
+ l->test_vmin.pmo_test_vmin_params.display_config = params->display_config;
+ l->test_vmin.pmo_test_vmin_params.vmin_limits = &params->dml->soc_bbox.vmin_limit;
+ return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params);
+}
+
+static bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params)
+{
+ struct dml2_optimization_optimize_function_locals *l = params->locals;
+
+ if (params->last_candidate_supported == false)
+ return false;
+
+ l->optimize_vmin.pmo_optimize_vmin_params.instance = &params->dml->pmo_instance;
+ l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config;
+ l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config;
+ return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params);
+}
+
+static bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params)
+{
+ struct dml2_optimization_init_function_locals *l = params->locals;
+
+ l->uclk_pstate.init_params.instance = &params->dml->pmo_instance;
+ l->uclk_pstate.init_params.base_display_config = params->display_config;
+
+ return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params);
+}
+
+static bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_test_function_locals *l = params->locals;
+
+ l->uclk_pstate.test_params.instance = &params->dml->pmo_instance;
+ l->uclk_pstate.test_params.base_display_config = params->display_config;
+
+ return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params);
+}
+
+static bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params)
+{
+ struct dml2_optimization_optimize_function_locals *l = params->locals;
+
+ l->uclk_pstate.optimize_params.instance = &params->dml->pmo_instance;
+ l->uclk_pstate.optimize_params.base_display_config = params->display_config;
+ l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config;
+ l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported;
+
+ return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params);
+}
+
+static bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params)
+{
+ struct dml2_optimization_init_function_locals *l = params->locals;
+
+ l->uclk_pstate.init_params.instance = &params->dml->pmo_instance;
+ l->uclk_pstate.init_params.base_display_config = params->display_config;
+
+ return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params);
+}
+
+static bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params)
+{
+ struct dml2_optimization_test_function_locals *l = params->locals;
+
+ l->stutter.stutter_params.instance = &params->dml->pmo_instance;
+ l->stutter.stutter_params.base_display_config = params->display_config;
+ return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params);
+}
+
+static bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params)
+{
+ struct dml2_optimization_optimize_function_locals *l = params->locals;
+
+ l->stutter.stutter_params.instance = &params->dml->pmo_instance;
+ l->stutter.stutter_params.base_display_config = params->display_config;
+ l->stutter.stutter_params.optimized_display_config = params->optimized_display_config;
+ l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported;
+ return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params);
+}
+
+static bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params)
+{
+ bool test_passed = false;
+ bool optimize_succeeded = true;
+ bool candidate_validation_passed = true;
+ struct optimization_init_function_params init_params = { 0 };
+ struct optimization_test_function_params test_params = { 0 };
+ struct optimization_optimize_function_params optimize_params = { 0 };
+
+ if (!params->dml ||
+ !params->optimize_function ||
+ !params->test_function ||
+ !params->display_config ||
+ !params->optimized_display_config)
+ return false;
+
+ copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config);
+
+ init_params.locals = &l->init_function_locals;
+ init_params.dml = params->dml;
+ init_params.display_config = &l->cur_candidate_display_cfg;
+
+ if (params->init_function && !params->init_function(&init_params))
+ return false;
+
+ test_params.locals = &l->test_function_locals;
+ test_params.dml = params->dml;
+ test_params.display_config = &l->cur_candidate_display_cfg;
+
+ test_passed = params->test_function(&test_params);
+
+ while (!test_passed && optimize_succeeded) {
+ memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params));
+
+ optimize_params.locals = &l->optimize_function_locals;
+ optimize_params.dml = params->dml;
+ optimize_params.display_config = &l->cur_candidate_display_cfg;
+ optimize_params.optimized_display_config = &l->next_candidate_display_cfg;
+ optimize_params.last_candidate_supported = candidate_validation_passed;
+
+ optimize_succeeded = params->optimize_function(&optimize_params);
+
+ if (optimize_succeeded) {
+ l->mode_support_params.instance = &params->dml->core_instance;
+ l->mode_support_params.display_cfg = &l->next_candidate_display_cfg;
+ l->mode_support_params.min_clk_table = &params->dml->min_clk_table;
+
+ if (l->next_candidate_display_cfg.stage3.performed)
+ l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency;
+ else
+ l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency;
+ candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params);
+ l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result;
+ }
+
+ if (optimize_succeeded && candidate_validation_passed) {
+ memset(&test_params, 0, sizeof(struct optimization_test_function_params));
+ test_params.locals = &l->test_function_locals;
+ test_params.dml = params->dml;
+ test_params.display_config = &l->next_candidate_display_cfg;
+ test_passed = params->test_function(&test_params);
+
+ copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg);
+
+ // If optimization is not all or nothing, then store partial progress in output
+ if (!params->all_or_nothing)
+ copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg);
+ }
+ }
+
+ if (test_passed)
+ copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg);
+
+ return test_passed;
+}
+
+static bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params)
+{
+ int highest_state, lowest_state, cur_state;
+ bool supported = false;
+
+ if (!params->dml ||
+ !params->optimize_function ||
+ !params->test_function ||
+ !params->display_config ||
+ !params->optimized_display_config)
+ return false;
+
+ copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config);
+ highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency;
+ lowest_state = 0;
+
+ while (highest_state > lowest_state) {
+ cur_state = (highest_state + lowest_state) / 2;
+
+ l->mode_support_params.instance = &params->dml->core_instance;
+ l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg;
+ l->mode_support_params.min_clk_table = &params->dml->min_clk_table;
+ l->mode_support_params.min_clk_index = cur_state;
+ supported = params->dml->core_instance.mode_support(&l->mode_support_params);
+
+ if (supported) {
+ l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result;
+ highest_state = cur_state;
+ } else {
+ lowest_state = cur_state + 1;
+ }
+ }
+ l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state;
+
+ copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg);
+
+ return true;
+}
+
+/*
+* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming.
+* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means
+* that the horizontal viewport does not span more than 2 cache slices.
+*
+* It optionally also can apply a constant shift to all the cache boundaries.
+*/
+static const uint32_t MCACHE_ID_UNASSIGNED = 0xF;
+static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF;
+
+static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift,
+ int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset)
+{
+ const int MAX_VP = 0xFFFFFF;
+ int left_cache_id;
+ int right_cache_id;
+ int range_start;
+ int range_end;
+ bool success = false;
+
+ if (num_boundaries <= 1) {
+ if (first_offset && second_offset) {
+ *first_offset = 0;
+ *second_offset = -1;
+ }
+ success = true;
+ return success;
+ } else {
+ range_start = 0;
+ for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) {
+ range_end = mcache_boundaries[left_cache_id] - shift - 1;
+
+ if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end)
+ break;
+
+ range_start = range_end + 1;
+ }
+
+ range_end = MAX_VP;
+ for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) {
+ if (right_cache_id >= 0)
+ range_start = mcache_boundaries[right_cache_id] - shift;
+ else
+ range_start = 0;
+
+ if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) {
+ break;
+ }
+ range_end = range_start - 1;
+ }
+ right_cache_id = (right_cache_id + 1) % num_boundaries;
+
+ if (right_cache_id == left_cache_id) {
+ if (first_offset && second_offset) {
+ *first_offset = left_cache_id;
+ *second_offset = -1;
+ }
+ success = true;
+ } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) {
+ if (first_offset && second_offset) {
+ *first_offset = left_cache_id;
+ *second_offset = right_cache_id;
+ }
+ success = true;
+ }
+ }
+
+ return success;
+}
+
+/*
+* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting.
+* It also attempts to "optimize" by finding a shift if the default 0 shift does not work.
+*/
+static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries,
+ int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift)
+{
+ int max_shift = 0xFFFF;
+ unsigned int pipe_index;
+ unsigned int i, slice_width;
+ bool success = false;
+
+ for (i = 0; i < num_boundaries; i++) {
+ if (i == 0)
+ slice_width = mcache_boundaries[i];
+ else
+ slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1];
+
+ if (max_shift > (int)slice_width) {
+ max_shift = slice_width;
+ }
+ }
+
+ for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) {
+ success = true;
+ for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) {
+ if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift,
+ pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) {
+ success = false;
+ break;
+ }
+ }
+ if (success)
+ break;
+ }
+
+ return success;
+}
+
+/*
+* Counts the number of elements inside input array within the given span length.
+* Formally, what is the size of the largest subset of the array where the largest and smallest element
+* differ no more than the span.
+*/
+static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span)
+{
+ unsigned int i;
+ unsigned int span_start_value;
+ unsigned int span_start_index;
+ unsigned int greatest_element_count;
+
+ if (array_size == 0)
+ return 1;
+
+ if (span == 0)
+ return array_size > 0 ? 1 : 0;
+
+ span_start_value = 0;
+ span_start_index = 0;
+ greatest_element_count = 0;
+
+ while (span_start_index < array_size) {
+ for (i = span_start_index; i < array_size; i++) {
+ if (array[i] - span_start_value <= span) {
+ if (i - span_start_index + 1 > greatest_element_count) {
+ greatest_element_count = i - span_start_index + 1;
+ }
+ } else
+ break;
+ }
+
+ span_start_index++;
+
+ if (span_start_index < array_size) {
+ span_start_value = array[span_start_index - 1] + 1;
+ }
+ }
+
+ return greatest_element_count;
+}
+
+static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes,
+ enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end)
+{
+ int i, slice_width;
+ const char MAX_SCL_VP_OVERLAP = 3;
+ bool success = false;
+
+ switch (scaling_transform) {
+ case dml2_scaling_transform_centered:
+ case dml2_scaling_transform_aspect_ratio:
+ case dml2_scaling_transform_fullscreen:
+ slice_width = full_vp_width / num_pipes;
+ for (i = 0; i < num_pipes; i++) {
+ pipe_vp_x_start[i] = i * slice_width;
+ pipe_vp_x_end[i] = (i + 1) * slice_width - 1;
+
+ if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP)
+ pipe_vp_x_start[i] = 0;
+ else
+ pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP;
+
+ if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1)
+ pipe_vp_x_end[i] = full_vp_width - 1;
+ else
+ pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP;
+ }
+ break;
+ case dml2_scaling_transform_explicit:
+ default:
+ success = false;
+ break;
+ }
+
+ return success;
+}
+
+bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance;
+ struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals;
+
+ const int MAX_PIXEL_OVERLAP = 6;
+ int max_per_pipe_vp_p0 = 0;
+ int max_per_pipe_vp_p1 = 0;
+ int temp, p0shift, p1shift;
+ unsigned int plane_index = 0;
+ unsigned int i;
+ unsigned int odm_combine_factor;
+ unsigned int mpc_combine_factor;
+ unsigned int num_dpps;
+ unsigned int num_boundaries;
+ enum dml2_scaling_transform scaling_transform;
+ const struct dml2_plane_parameters *plane;
+ const struct dml2_stream_parameters *stream;
+
+ bool p0pass = false;
+ bool p1pass = false;
+ bool all_pass = true;
+
+ for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) {
+ if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable)
+ continue;
+
+ plane = &params->display_cfg->plane_descriptors[plane_index];
+ stream = &params->display_cfg->stream_descriptors[plane->stream_index];
+
+ num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used;
+
+ if (odm_combine_factor == 1)
+ num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used;
+ else
+ mpc_combine_factor = 1;
+
+ if (odm_combine_factor > 1) {
+ max_per_pipe_vp_p0 = plane->surface.plane0.width;
+ temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor);
+
+ if (temp < max_per_pipe_vp_p0)
+ max_per_pipe_vp_p0 = temp;
+
+ max_per_pipe_vp_p1 = plane->surface.plane1.width;
+ temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor);
+
+ if (temp < max_per_pipe_vp_p1)
+ max_per_pipe_vp_p1 = temp;
+ } else {
+ max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor;
+ max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor;
+ }
+
+ max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP;
+ max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP;
+
+ p0shift = 0;
+ p1shift = 0;
+
+ // The last element in the unshifted boundary array will always be the first pixel outside the
+ // plane, which means theres no mcache associated with it, so -1
+ num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1;
+ if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0,
+ num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) {
+ p0pass = true;
+ }
+ num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1;
+ if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1,
+ num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) {
+ p1pass = true;
+ }
+
+ if (!p0pass || !p1pass) {
+ if (odm_combine_factor > 1) {
+ num_dpps = odm_combine_factor;
+ scaling_transform = plane->composition.scaling_transform;
+ } else {
+ num_dpps = mpc_combine_factor;
+ scaling_transform = dml2_scaling_transform_fullscreen;
+ }
+
+ if (!p0pass) {
+ if (plane->composition.viewport.stationary) {
+ calculate_h_split_for_scaling_transform(plane->surface.plane0.width,
+ stream->timing.h_active, num_dpps, scaling_transform,
+ &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]);
+ p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0,
+ params->mcache_allocations[plane_index].num_mcaches_plane0,
+ &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps,
+ params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift);
+ }
+ }
+ if (!p1pass) {
+ if (plane->composition.viewport.stationary) {
+ calculate_h_split_for_scaling_transform(plane->surface.plane1.width,
+ stream->timing.h_active, num_dpps, scaling_transform,
+ &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]);
+ p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1,
+ params->mcache_allocations[plane_index].num_mcaches_plane1,
+ &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps,
+ params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift);
+ }
+ }
+ }
+
+ if (p0pass && p1pass) {
+ for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) {
+ params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift;
+ }
+ for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) {
+ params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift;
+ }
+ }
+
+ params->per_plane_status[plane_index] = p0pass && p1pass;
+ all_pass &= p0pass && p1pass;
+ }
+
+ return all_pass;
+}
+
+static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs)
+{
+ // Initialize all entries to special valid MCache ID and special valid split coordinate
+ per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED;
+
+ per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED;
+
+ per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED;
+
+ per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED;
+ per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED;
+}
+
+void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params)
+{
+ int i;
+ unsigned int j;
+ int next_unused_cache_id = 0;
+
+ for (i = 0; i < params->num_allocations; i++) {
+ if (!params->allocations[i].valid)
+ continue;
+
+ for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) {
+ params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++;
+ }
+ for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) {
+ params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++;
+ }
+
+ // The "psuedo-last" slice is always wrapped around
+ params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] =
+ params->allocations[i].global_mcache_ids_plane0[0];
+ params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] =
+ params->allocations[i].global_mcache_ids_plane1[0];
+
+ // If we need dedicated caches for mall requesting, then we assign them here.
+ if (params->allocations[i].requires_dedicated_mall_mcache) {
+ for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) {
+ params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++;
+ }
+ for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) {
+ params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++;
+ }
+
+ // The "psuedo-last" slice is always wrapped around
+ params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] =
+ params->allocations[i].global_mcache_ids_mall_plane0[0];
+ params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] =
+ params->allocations[i].global_mcache_ids_mall_plane1[0];
+ }
+
+ // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same
+ // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the
+ // largest mcacheID of P0
+ if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 &&
+ params->allocations[i].last_slice_sharing.plane0_plane1) {
+ params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
+ params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1];
+ }
+
+ // If we need dedicated caches handle last slice sharing
+ if (params->allocations[i].requires_dedicated_mall_mcache) {
+ if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 &&
+ params->allocations[i].last_slice_sharing.plane0_plane1) {
+ params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
+ params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1];
+ }
+ // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0
+ if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) {
+ params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] =
+ params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1];
+ }
+ // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular
+ // read p1 (which can be same as regular read p0 if plane0_plane1 is also set)
+ if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) {
+ params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
+ params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1];
+ }
+ }
+
+ // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting
+ if (!params->allocations[i].requires_dedicated_mall_mcache) {
+ memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0,
+ sizeof(params->allocations[i].global_mcache_ids_mall_plane0));
+ memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1,
+ sizeof(params->allocations[i].global_mcache_ids_mall_plane1));
+ }
+ }
+}
+
+bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance;
+ struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals;
+
+ unsigned int total_mcaches_required;
+ unsigned int i;
+ bool result = false;
+
+ if (dml->soc_bbox.num_dcc_mcaches == 0) {
+ return true;
+ }
+
+ total_mcaches_required = 0;
+ l->calc_mcache_params.instance = &dml->core_instance;
+ for (i = 0; i < params->display_config->num_planes; i++) {
+ if (!params->display_config->plane_descriptors[i].surface.dcc.enable) {
+ memset(&params->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation));
+ continue;
+ }
+
+ l->calc_mcache_params.plane_descriptor = &params->display_config->plane_descriptors[i];
+ l->calc_mcache_params.mcache_allocation = &params->mcache_allocations[i];
+ l->calc_mcache_params.plane_index = i;
+
+ if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) {
+ result = false;
+ break;
+ }
+
+ if (params->mcache_allocations[i].valid) {
+ total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1;
+ if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1)
+ total_mcaches_required--;
+ }
+ }
+ dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required);
+
+ if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) {
+ result = false;
+ } else {
+ result = true;
+ }
+
+ return result;
+}
+
+static bool dml2_top_soc15_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
+ struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals;
+ struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming;
+
+ bool result = false;
+ bool mcache_success = false;
+ memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming));
+
+ setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
+
+ l->mode_support_params.instance = &dml->core_instance;
+ l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
+ l->mode_support_params.min_clk_table = &dml->min_clk_table;
+ l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
+ result = dml->core_instance.mode_support(&l->mode_support_params);
+ l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
+
+ if (result) {
+ struct optimization_phase_params mcache_phase = {
+ .dml = dml,
+ .display_config = &l->base_display_config_with_meta,
+ .test_function = dml2_top_optimization_test_function_mcache,
+ .optimize_function = dml2_top_optimization_optimize_function_mcache,
+ .optimized_display_config = &l->optimized_display_config_with_meta,
+ .all_or_nothing = false,
+ };
+ mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase);
+ }
+
+ /*
+ * Call DPMM to map all requirements to minimum clock state
+ */
+ if (result) {
+ l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table;
+ l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta;
+ l->dppm_map_mode_params.programming = dpmm_programming;
+ l->dppm_map_mode_params.soc_bb = &dml->soc_bbox;
+ l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip;
+ result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params);
+ }
+
+ in_out->is_supported = mcache_success;
+ result = result && in_out->is_supported;
+
+ return result;
+}
+
+static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
+ struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals;
+
+ bool result = false;
+ bool mcache_success = false;
+ bool uclk_pstate_success = false;
+ bool vmin_success = false;
+ bool stutter_success = false;
+ unsigned int i;
+
+ memset(l, 0, sizeof(struct dml2_build_mode_programming_locals));
+ memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming));
+
+ memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg));
+
+ setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
+
+ l->mode_support_params.instance = &dml->core_instance;
+ l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
+ l->mode_support_params.min_clk_table = &dml->min_clk_table;
+ l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
+ result = dml->core_instance.mode_support(&l->mode_support_params);
+
+ l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
+
+ if (!result) {
+ setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config);
+
+ l->mode_support_params.instance = &dml->core_instance;
+ l->mode_support_params.display_cfg = &l->base_display_config_with_meta;
+ l->mode_support_params.min_clk_table = &dml->min_clk_table;
+ l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency;
+ result = dml->core_instance.mode_support(&l->mode_support_params);
+ l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result;
+
+ if (!result) {
+ l->informative_params.instance = &dml->core_instance;
+ l->informative_params.programming = in_out->programming;
+ l->informative_params.mode_is_supported = false;
+ dml->core_instance.populate_informative(&l->informative_params);
+
+ return false;
+ }
+
+ /*
+ * Phase 1: Determine minimum clocks to satisfy latency requirements for this mode
+ */
+ memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params));
+ l->min_clock_for_latency_phase.dml = dml;
+ l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta;
+ l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency;
+ l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency;
+ l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency;
+ l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->min_clock_for_latency_phase.all_or_nothing = false;
+
+ dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase);
+
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+ }
+
+ /*
+ * Phase 2: Satisfy DCC mcache requirements
+ */
+ memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params));
+ l->mcache_phase.dml = dml;
+ l->mcache_phase.display_config = &l->base_display_config_with_meta;
+ l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache;
+ l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache;
+ l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->mcache_phase.all_or_nothing = true;
+
+ mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase);
+
+ if (!mcache_success) {
+ l->informative_params.instance = &dml->core_instance;
+ l->informative_params.programming = in_out->programming;
+ l->informative_params.mode_is_supported = false;
+
+ dml->core_instance.populate_informative(&l->informative_params);
+
+ in_out->programming->informative.failed_mcache_validation = true;
+ return false;
+ }
+
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+
+ /*
+ * Phase 3: Optimize for Pstate
+ */
+ memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params));
+ l->uclk_pstate_phase.dml = dml;
+ l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta;
+ l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate;
+ l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate;
+ l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate;
+ l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->uclk_pstate_phase.all_or_nothing = true;
+
+ uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase);
+
+ if (uclk_pstate_success) {
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+ l->base_display_config_with_meta.stage3.success = true;
+ }
+
+ /*
+ * Phase 4: Optimize for Vmin
+ */
+ memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params));
+ l->vmin_phase.dml = dml;
+ l->vmin_phase.display_config = &l->base_display_config_with_meta;
+ l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin;
+ l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin;
+ l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin;
+ l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->vmin_phase.all_or_nothing = false;
+
+ vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase);
+
+ if (l->optimized_display_config_with_meta.stage4.performed) {
+ /*
+ * when performed is true, optimization has applied to
+ * optimized_display_config_with_meta and it has passed mode
+ * support. However it may or may not pass the test function to
+ * reach actual Vmin. As long as voltage is optimized even if it
+ * doesn't reach Vmin level, there is still power benefit so in
+ * this case we will still copy this optimization into base
+ * display config.
+ */
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+ l->base_display_config_with_meta.stage4.success = vmin_success;
+ }
+
+ /*
+ * Phase 5: Optimize for Stutter
+ */
+ memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params));
+ l->stutter_phase.dml = dml;
+ l->stutter_phase.display_config = &l->base_display_config_with_meta;
+ l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter;
+ l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter;
+ l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter;
+ l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta;
+ l->stutter_phase.all_or_nothing = true;
+
+ stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase);
+
+ if (stutter_success) {
+ memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta));
+ l->base_display_config_with_meta.stage5.success = true;
+ }
+
+ /*
+ * Populate mcache programming
+ */
+ for (i = 0; i < in_out->display_config->num_planes; i++) {
+ in_out->programming->plane_programming[i].mcache_allocation = l->base_display_config_with_meta.stage2.mcache_allocations[i];
+ }
+
+ /*
+ * Call DPMM to map all requirements to minimum clock state
+ */
+ if (result) {
+ l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table;
+ l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta;
+ l->dppm_map_mode_params.programming = in_out->programming;
+ l->dppm_map_mode_params.soc_bb = &dml->soc_bbox;
+ l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip;
+ result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params);
+ if (!result)
+ in_out->programming->informative.failed_dpmm = true;
+ }
+
+ if (result) {
+ l->mode_programming_params.instance = &dml->core_instance;
+ l->mode_programming_params.display_cfg = &l->base_display_config_with_meta;
+ l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info;
+ l->mode_programming_params.programming = in_out->programming;
+ result = dml->core_instance.mode_programming(&l->mode_programming_params);
+ if (!result)
+ in_out->programming->informative.failed_mode_programming = true;
+ }
+
+ if (result) {
+ l->dppm_map_watermarks_params.core = &dml->core_instance;
+ l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta;
+ l->dppm_map_watermarks_params.programming = in_out->programming;
+ result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params);
+ }
+
+ l->informative_params.instance = &dml->core_instance;
+ l->informative_params.programming = in_out->programming;
+ l->informative_params.mode_is_supported = result;
+
+ dml->core_instance.populate_informative(&l->informative_params);
+
+ return result;
+}
+
+bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params)
+{
+ bool success = true;
+ int config_index, pipe_index;
+ int first_offset, second_offset;
+ int free_per_plane_reg_index = 0;
+
+ memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *));
+
+ for (config_index = 0; config_index < params->num_configurations; config_index++) {
+ for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) {
+ // Allocate storage for the mcache regs
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index] = &params->mcache_regs_set[free_per_plane_reg_index++];
+
+ reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]);
+
+ if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) {
+ // P0 always enabled
+ if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0,
+ params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0,
+ 0,
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start,
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start +
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1,
+ &first_offset, &second_offset)) {
+ success = false;
+ break;
+ }
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset];
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset];
+
+ if (second_offset >= 0) {
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset];
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location =
+ params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1;
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset];
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location =
+ params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1;
+ }
+
+ // Populate P1 if enabled
+ if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) {
+ if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1,
+ params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1,
+ 0,
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start,
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start +
+ params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1,
+ &first_offset, &second_offset)) {
+ success = false;
+ break;
+ }
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset];
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset];
+
+ if (second_offset >= 0) {
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset];
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location =
+ params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1;
+
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second =
+ params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset];
+ params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location =
+ params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1;
+ }
+ }
+ }
+ }
+ }
+
+ return success;
+}
+
+static const struct dml2_top_funcs soc15_funcs = {
+ .check_mode_supported = dml2_top_soc15_check_mode_supported,
+ .build_mode_programming = dml2_top_soc15_build_mode_programming,
+ .build_mcache_programming = dml2_top_soc15_build_mcache_programming,
+};
+
+bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out)
+{
+ struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance;
+ struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals;
+ struct dml2_core_initialize_in_out core_init_params = { 0 };
+ struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 };
+ struct dml2_pmo_initialize_in_out pmo_init_params = { 0 };
+ bool result = false;
+
+ memset(l, 0, sizeof(struct dml2_initialize_instance_locals));
+ memset(dml, 0, sizeof(struct dml2_instance));
+
+ memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities));
+ memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb));
+
+ dml->project_id = in_out->options.project_id;
+ dml->pmo_options = in_out->options.pmo_options;
+
+ // Initialize All Components
+ result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance);
+
+ if (result)
+ result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance);
+
+ if (result)
+ result = dml2_core_create(in_out->options.project_id, &dml->core_instance);
+
+ if (result) {
+ mcg_build_min_clk_params.soc_bb = &in_out->soc_bb;
+ mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table;
+ result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params);
+ }
+
+ if (result) {
+ core_init_params.project_id = in_out->options.project_id;
+ core_init_params.instance = &dml->core_instance;
+ core_init_params.minimum_clock_table = &dml->min_clk_table;
+ core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb;
+ core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size;
+ core_init_params.ip_caps = &in_out->ip_caps;
+ core_init_params.soc_bb = &in_out->soc_bb;
+ result = dml->core_instance.initialize(&core_init_params);
+
+ if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) {
+ memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities));
+ }
+ }
+
+ if (result)
+ result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance);
+
+ if (result) {
+ pmo_init_params.instance = &dml->pmo_instance;
+ pmo_init_params.soc_bb = &dml->soc_bbox;
+ pmo_init_params.ip_caps = &dml->ip_caps;
+ pmo_init_params.mcg_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries;
+ pmo_init_params.options = &dml->pmo_options;
+ dml->pmo_instance.initialize(&pmo_init_params);
+ }
+ dml->funcs = soc15_funcs;
+ return result;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h
index 7b1f6f7143d0..53bd8602f9ef 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h
@@ -2,22 +2,13 @@
//
// Copyright 2024 Advanced Micro Devices, Inc.
-#ifndef __DML_TOP_MCACHE_H__
-#define __DML_TOP_MCACHE_H__
-
-#include "dml2_external_lib_deps.h"
-#include "dml_top_display_cfg_types.h"
-#include "dml_top_types.h"
+#ifndef __DML2_TOP_SOC15_H__
+#define __DML2_TOP_SOC15_H__
#include "dml2_internal_shared_types.h"
+bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out);
bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params);
-
void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params);
-
bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params);
-
-bool dml2_top_mcache_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params);
-
-bool dml2_top_mcache_unit_test(void);
-
-#endif
+bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params);
+#endif /* __DML2_TOP_SOC15_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c
deleted file mode 100644
index a342ebfbe4e7..000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c
+++ /dev/null
@@ -1,549 +0,0 @@
-// SPDX-License-Identifier: MIT
-//
-// Copyright 2024 Advanced Micro Devices, Inc.
-
-#include "dml2_debug.h"
-
-#include "dml_top_mcache.h"
-#include "lib_float_math.h"
-
-#include "dml2_internal_shared_types.h"
-
-/*
-* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming.
-* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means
-* that the horizontal viewport does not span more than 2 cache slices.
-*
-* It optionally also can apply a constant shift to all the cache boundaries.
-*/
-static const uint32_t MCACHE_ID_UNASSIGNED = 0xF;
-static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF;
-
-static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift,
- int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset)
-{
- const int MAX_VP = 0xFFFFFF;
- int left_cache_id;
- int right_cache_id;
- int range_start;
- int range_end;
- bool success = false;
-
- if (num_boundaries <= 1) {
- if (first_offset && second_offset) {
- *first_offset = 0;
- *second_offset = -1;
- }
- success = true;
- return success;
- } else {
- range_start = 0;
- for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) {
- range_end = mcache_boundaries[left_cache_id] - shift - 1;
-
- if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end)
- break;
-
- range_start = range_end + 1;
- }
-
- range_end = MAX_VP;
- for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) {
- if (right_cache_id >= 0)
- range_start = mcache_boundaries[right_cache_id] - shift;
- else
- range_start = 0;
-
- if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) {
- break;
- }
- range_end = range_start - 1;
- }
- right_cache_id = (right_cache_id + 1) % num_boundaries;
-
- if (right_cache_id == left_cache_id) {
- if (first_offset && second_offset) {
- *first_offset = left_cache_id;
- *second_offset = -1;
- }
- success = true;
- } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) {
- if (first_offset && second_offset) {
- *first_offset = left_cache_id;
- *second_offset = right_cache_id;
- }
- success = true;
- }
- }
-
- return success;
-}
-
-/*
-* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting.
-* It also attempts to "optimize" by finding a shift if the default 0 shift does not work.
-*/
-static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries,
- int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift)
-{
- int max_shift = 0xFFFF;
- unsigned int pipe_index;
- unsigned int i, slice_width;
- bool success = false;
-
- for (i = 0; i < num_boundaries; i++) {
- if (i == 0)
- slice_width = mcache_boundaries[i];
- else
- slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1];
-
- if (max_shift > (int)slice_width) {
- max_shift = slice_width;
- }
- }
-
- for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) {
- success = true;
- for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) {
- if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift,
- pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) {
- success = false;
- break;
- }
- }
- if (success)
- break;
- }
-
- return success;
-}
-
-/*
-* Counts the number of elements inside input array within the given span length.
-* Formally, what is the size of the largest subset of the array where the largest and smallest element
-* differ no more than the span.
-*/
-static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span)
-{
- unsigned int i;
- unsigned int span_start_value;
- unsigned int span_start_index;
- unsigned int greatest_element_count;
-
- if (array_size == 0)
- return 1;
-
- if (span == 0)
- return array_size > 0 ? 1 : 0;
-
- span_start_value = 0;
- span_start_index = 0;
- greatest_element_count = 0;
-
- while (span_start_index < array_size) {
- for (i = span_start_index; i < array_size; i++) {
- if (array[i] - span_start_value <= span) {
- if (i - span_start_index + 1 > greatest_element_count) {
- greatest_element_count = i - span_start_index + 1;
- }
- } else
- break;
- }
-
- span_start_index++;
-
- if (span_start_index < array_size) {
- span_start_value = array[span_start_index - 1] + 1;
- }
- }
-
- return greatest_element_count;
-}
-
-static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes,
- enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end)
-{
- int i, slice_width;
- const char MAX_SCL_VP_OVERLAP = 3;
- bool success = false;
-
- switch (scaling_transform) {
- case dml2_scaling_transform_centered:
- case dml2_scaling_transform_aspect_ratio:
- case dml2_scaling_transform_fullscreen:
- slice_width = full_vp_width / num_pipes;
- for (i = 0; i < num_pipes; i++) {
- pipe_vp_x_start[i] = i * slice_width;
- pipe_vp_x_end[i] = (i + 1) * slice_width - 1;
-
- if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP)
- pipe_vp_x_start[i] = 0;
- else
- pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP;
-
- if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1)
- pipe_vp_x_end[i] = full_vp_width - 1;
- else
- pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP;
- }
- break;
- case dml2_scaling_transform_explicit:
- default:
- success = false;
- break;
- }
-
- return success;
-}
-
-bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params)
-{
- struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance;
- struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals;
-
- const int MAX_PIXEL_OVERLAP = 6;
- int max_per_pipe_vp_p0 = 0;
- int max_per_pipe_vp_p1 = 0;
- int temp, p0shift, p1shift;
- unsigned int plane_index = 0;
- unsigned int i;
- unsigned int odm_combine_factor;
- unsigned int mpc_combine_factor;
- unsigned int num_dpps;
- unsigned int num_boundaries;
- enum dml2_scaling_transform scaling_transform;
- const struct dml2_plane_parameters *plane;
- const struct dml2_stream_parameters *stream;
-
- bool p0pass = false;
- bool p1pass = false;
- bool all_pass = true;
-
- for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) {
- if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable)
- continue;
-
- plane = &params->display_cfg->plane_descriptors[plane_index];
- stream = &params->display_cfg->stream_descriptors[plane->stream_index];
-
- num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used;
-
- if (odm_combine_factor == 1)
- num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used;
- else
- mpc_combine_factor = 1;
-
- if (odm_combine_factor > 1) {
- max_per_pipe_vp_p0 = plane->surface.plane0.width;
- temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor);
-
- if (temp < max_per_pipe_vp_p0)
- max_per_pipe_vp_p0 = temp;
-
- max_per_pipe_vp_p1 = plane->surface.plane1.width;
- temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor);
-
- if (temp < max_per_pipe_vp_p1)
- max_per_pipe_vp_p1 = temp;
- } else {
- max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor;
- max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor;
- }
-
- max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP;
- max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP;
-
- p0shift = 0;
- p1shift = 0;
-
- // The last element in the unshifted boundary array will always be the first pixel outside the
- // plane, which means theres no mcache associated with it, so -1
- num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1;
- if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0,
- num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) {
- p0pass = true;
- }
- num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1;
- if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1,
- num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) {
- p1pass = true;
- }
-
- if (!p0pass || !p1pass) {
- if (odm_combine_factor > 1) {
- num_dpps = odm_combine_factor;
- scaling_transform = plane->composition.scaling_transform;
- } else {
- num_dpps = mpc_combine_factor;
- scaling_transform = dml2_scaling_transform_fullscreen;
- }
-
- if (!p0pass) {
- if (plane->composition.viewport.stationary) {
- calculate_h_split_for_scaling_transform(plane->surface.plane0.width,
- stream->timing.h_active, num_dpps, scaling_transform,
- &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]);
- p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0,
- params->mcache_allocations[plane_index].num_mcaches_plane0,
- &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps,
- params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift);
- }
- }
- if (!p1pass) {
- if (plane->composition.viewport.stationary) {
- calculate_h_split_for_scaling_transform(plane->surface.plane1.width,
- stream->timing.h_active, num_dpps, scaling_transform,
- &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]);
- p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1,
- params->mcache_allocations[plane_index].num_mcaches_plane1,
- &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps,
- params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift);
- }
- }
- }
-
- if (p0pass && p1pass) {
- for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) {
- params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift;
- }
- for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) {
- params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift;
- }
- }
-
- params->per_plane_status[plane_index] = p0pass && p1pass;
- all_pass &= p0pass && p1pass;
- }
-
- return all_pass;
-}
-
-static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs)
-{
- // Initialize all entries to special valid MCache ID and special valid split coordinate
- per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED;
- per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED;
- per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED;
-
- per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED;
- per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED;
- per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED;
-
- per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED;
- per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED;
- per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED;
-
- per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED;
- per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED;
- per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED;
-}
-
-bool dml2_top_mcache_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params)
-{
- bool success = true;
- int config_index, pipe_index;
- int first_offset, second_offset;
- int free_per_plane_reg_index = 0;
-
- memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *));
-
- for (config_index = 0; config_index < params->num_configurations; config_index++) {
- for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) {
- // Allocate storage for the mcache regs
- params->per_plane_pipe_mcache_regs[config_index][pipe_index] = &params->mcache_regs_set[free_per_plane_reg_index++];
-
- reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]);
-
- if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) {
- // P0 always enabled
- if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0,
- params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0,
- 0,
- params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start,
- params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start +
- params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1,
- &first_offset, &second_offset)) {
- success = false;
- break;
- }
-
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first =
- params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset];
-
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first =
- params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset];
-
- if (second_offset >= 0) {
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second =
- params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset];
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location =
- params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1;
-
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second =
- params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset];
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location =
- params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1;
- }
-
- // Populate P1 if enabled
- if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) {
- if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1,
- params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1,
- 0,
- params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start,
- params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start +
- params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1,
- &first_offset, &second_offset)) {
- success = false;
- break;
- }
-
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first =
- params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset];
-
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first =
- params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset];
-
- if (second_offset >= 0) {
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second =
- params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset];
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location =
- params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1;
-
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second =
- params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset];
- params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location =
- params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1;
- }
- }
- }
- }
- }
-
- return success;
-}
-
-void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params)
-{
- int i;
- unsigned int j;
- int next_unused_cache_id = 0;
-
- for (i = 0; i < params->num_allocations; i++) {
- if (!params->allocations[i].valid)
- continue;
-
- for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) {
- params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++;
- }
- for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) {
- params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++;
- }
-
- // The "psuedo-last" slice is always wrapped around
- params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] =
- params->allocations[i].global_mcache_ids_plane0[0];
- params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] =
- params->allocations[i].global_mcache_ids_plane1[0];
-
- // If we need dedicated caches for mall requesting, then we assign them here.
- if (params->allocations[i].requires_dedicated_mall_mcache) {
- for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) {
- params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++;
- }
- for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) {
- params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++;
- }
-
- // The "psuedo-last" slice is always wrapped around
- params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] =
- params->allocations[i].global_mcache_ids_mall_plane0[0];
- params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] =
- params->allocations[i].global_mcache_ids_mall_plane1[0];
- }
-
- // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same
- // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the
- // largest mcacheID of P0
- if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 &&
- params->allocations[i].last_slice_sharing.plane0_plane1) {
- params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
- params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1];
- }
-
- // If we need dedicated caches handle last slice sharing
- if (params->allocations[i].requires_dedicated_mall_mcache) {
- if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 &&
- params->allocations[i].last_slice_sharing.plane0_plane1) {
- params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
- params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1];
- }
- // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0
- if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) {
- params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] =
- params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1];
- }
- // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular
- // read p1 (which can be same as regular read p0 if plane0_plane1 is also set)
- if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) {
- params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] =
- params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1];
- }
- }
-
- // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting
- if (!params->allocations[i].requires_dedicated_mall_mcache) {
- memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0,
- sizeof(params->allocations[i].global_mcache_ids_mall_plane0));
- memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1,
- sizeof(params->allocations[i].global_mcache_ids_mall_plane1));
- }
- }
-}
-
-bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params)
-{
- struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance;
- struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals;
-
- unsigned int total_mcaches_required;
- unsigned int i;
- bool result = false;
-
- if (dml->soc_bbox.num_dcc_mcaches == 0) {
- return true;
- }
-
- total_mcaches_required = 0;
- l->calc_mcache_params.instance = &dml->core_instance;
- for (i = 0; i < params->display_config->num_planes; i++) {
- if (!params->display_config->plane_descriptors[i].surface.dcc.enable) {
- memset(&params->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation));
- continue;
- }
-
- l->calc_mcache_params.plane_descriptor = &params->display_config->plane_descriptors[i];
- l->calc_mcache_params.mcache_allocation = &params->mcache_allocations[i];
- l->calc_mcache_params.plane_index = i;
-
- if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) {
- result = false;
- break;
- }
-
- if (params->mcache_allocations[i].valid) {
- total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1;
- if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1)
- total_mcaches_required--;
- }
- }
- dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required);
-
- if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) {
- result = false;
- } else {
- result = true;
- }
-
- return result;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c
index e9b8e10695ae..f95c7ff56f15 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c
@@ -4,6 +4,11 @@
#include "dml2_debug.h"
+int dml2_log_internal(const char *format, ...)
+{
+ return 0;
+}
+
int dml2_printf(const char *format, ...)
{
#ifdef _DEBUG
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
index d51a1b6c62f2..a27792b56f7e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h
@@ -8,9 +8,53 @@
#ifdef _DEBUG
#define DML2_ASSERT(condition) dml2_assert(condition)
#else
-#define DML2_ASSERT(condition)
+#define DML2_ASSERT(condition) ((void)0)
+#endif
+/*
+ * DML_LOG_FATAL - fatal errors for unrecoverable DML states until a restart.
+ * DML_LOG_ERROR - unexpected but recoverable failures inside DML
+ * DML_LOG_WARN - unexpected inputs or events to DML
+ * DML_LOG_INFO - high level tracing of DML interfaces
+ * DML_LOG_DEBUG - detailed tracing of DML internal components
+ * DML_LOG_VERBOSE - detailed tracing of DML calculation procedure
+ */
+#if !defined(DML_LOG_LEVEL)
+#if defined(_DEBUG) && defined(_DEBUG_PRINTS)
+/* for backward compatibility with old macros */
+#define DML_LOG_LEVEL 5
+#else
+#define DML_LOG_LEVEL 0
+#endif
+#endif
+
+#define DML_LOG_FATAL(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#if DML_LOG_LEVEL >= 1
+#define DML_LOG_ERROR(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#else
+#define DML_LOG_ERROR(fmt, ...) ((void)0)
+#endif
+#if DML_LOG_LEVEL >= 2
+#define DML_LOG_WARN(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#else
+#define DML_LOG_WARN(fmt, ...) ((void)0)
+#endif
+#if DML_LOG_LEVEL >= 3
+#define DML_LOG_INFO(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#else
+#define DML_LOG_INFO(fmt, ...) ((void)0)
+#endif
+#if DML_LOG_LEVEL >= 4
+#define DML_LOG_DEBUG(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#else
+#define DML_LOG_DEBUG(fmt, ...) ((void)0)
+#endif
+#if DML_LOG_LEVEL >= 5
+#define DML_LOG_VERBOSE(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__)
+#else
+#define DML_LOG_VERBOSE(fmt, ...) ((void)0)
#endif
+int dml2_log_internal(const char *format, ...);
int dml2_printf(const char *format, ...);
void dml2_assert(int condition);
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
index aeac9f159fa5..7fb6026bcb49 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h
@@ -8,7 +8,6 @@
#include "dml2_external_lib_deps.h"
#include "dml_top_types.h"
#include "dml2_core_shared_types.h"
-
/*
* DML2 MCG Types and Interfaces
*/
@@ -63,7 +62,6 @@ struct dml2_mcg_build_min_clock_table_params_in_out {
*/
struct dml2_mcg_min_clock_table *min_clk_table;
};
-
struct dml2_mcg_instance {
bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out);
bool (*unit_test)(void);
@@ -81,7 +79,6 @@ struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out {
struct dml2_soc_bb *soc_bb;
struct dml2_mcg_min_clock_table *min_clk_table;
const struct display_configuation_with_meta *display_cfg;
-
struct {
bool perform_pseudo_map;
struct dml2_core_internal_soc_bb *soc_bb;
@@ -309,7 +306,7 @@ struct dml2_optimization_stage3_state {
// The pstate support mode for each plane
// The number of valid elements == display_cfg.num_planes
// The indexing of pstate_switch_modes matches plane_descriptors[]
- enum dml2_uclk_pstate_support_method pstate_switch_modes[DML2_MAX_PLANES];
+ enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES];
// Meta-data for implicit SVP generation, indexed by stream index
struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES];
@@ -356,6 +353,10 @@ struct display_configuation_with_meta {
struct dml2_optimization_stage5_state stage5;
};
+struct dml2_pmo_pstate_strategy {
+ enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES];
+ bool allow_state_increase;
+};
struct dml2_core_mode_support_in_out {
/*
* Inputs
@@ -365,7 +366,6 @@ struct dml2_core_mode_support_in_out {
struct dml2_mcg_min_clock_table *min_clk_table;
int min_clk_index;
-
/*
* Outputs
*/
@@ -395,7 +395,6 @@ struct dml2_core_mode_programming_in_out {
struct dml2_core_instance *instance;
const struct display_configuation_with_meta *display_cfg;
const struct core_display_cfg_support_info *cfg_support_info;
-
/*
* Outputs (also Input the clk freq are also from programming struct)
*/
@@ -445,6 +444,7 @@ struct dml2_core_internal_state_intermediates {
struct dml2_core_mode_support_locals {
struct dml2_core_calcs_mode_support_ex mode_support_ex_params;
struct dml2_display_cfg svp_expanded_display_cfg;
+ struct dml2_calculate_mcache_allocation_in_out calc_mcache_allocation_params;
};
struct dml2_core_mode_programming_locals {
@@ -600,34 +600,11 @@ struct dml2_pmo_optimize_for_stutter_in_out {
struct display_configuation_with_meta *optimized_display_config;
};
-enum dml2_pmo_pstate_method {
- dml2_pmo_pstate_strategy_na = 0,
- /* hw exclusive modes */
- dml2_pmo_pstate_strategy_vactive = 1,
- dml2_pmo_pstate_strategy_vblank = 2,
- dml2_pmo_pstate_strategy_reserved_hw = 5,
- /* fw assisted exclusive modes */
- dml2_pmo_pstate_strategy_fw_svp = 6,
- dml2_pmo_pstate_strategy_reserved_fw = 10,
- /* fw assisted modes requiring drr modulation */
- dml2_pmo_pstate_strategy_fw_vactive_drr = 11,
- dml2_pmo_pstate_strategy_fw_vblank_drr = 12,
- dml2_pmo_pstate_strategy_fw_svp_drr = 13,
- dml2_pmo_pstate_strategy_reserved_fw_drr_clamped = 20,
- dml2_pmo_pstate_strategy_fw_drr = 21,
- dml2_pmo_pstate_strategy_reserved_fw_drr_var = 22,
-};
-
-struct dml2_pmo_pstate_strategy {
- enum dml2_pmo_pstate_method per_stream_pstate_method[DML2_MAX_PLANES];
- bool allow_state_increase;
-};
-
-#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw - dml2_pmo_pstate_strategy_na + 1)) - 1) << dml2_pmo_pstate_strategy_na)
-#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr)
-#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_clamped - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr)
-#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_drr)
-#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_svp + 1)) - 1) << dml2_pmo_pstate_strategy_fw_svp)
+#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw - dml2_pstate_method_na + 1)) - 1) << dml2_pstate_method_na)
+#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr)
+#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_clamped - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr)
+#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_drr + 1)) - 1) << dml2_pstate_method_fw_drr)
+#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_svp + 1)) - 1) << dml2_pstate_method_fw_svp)
#define PMO_DCN4_MAX_DISPLAYS 4
#define PMO_DCN4_MAX_NUM_VARIANTS 2
@@ -645,6 +622,8 @@ struct dml2_pmo_scratch {
int stream_mask;
} pmo_dcn3;
struct {
+ struct dml2_pmo_pstate_strategy expanded_override_strategy_list[2 * 2 * 2 * 2];
+ unsigned int num_expanded_override_strategies;
struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE];
int num_pstate_candidates;
int cur_pstate_candidate;
@@ -706,7 +685,6 @@ struct dml2_pmo_instance {
int mpc_combine_limit;
int odm_combine_limit;
int mcg_clock_table_size;
-
union {
struct {
struct {
@@ -963,7 +941,13 @@ struct dml2_top_mcache_validate_admissability_locals {
struct dml2_top_display_cfg_support_info {
const struct dml2_display_cfg *display_config;
struct core_display_cfg_support_info core_info;
- enum dml2_pstate_support_method per_plane_pstate_method[DML2_MAX_PLANES];
+};
+
+struct dml2_top_funcs {
+ bool (*check_mode_supported)(struct dml2_check_mode_supported_in_out *in_out);
+ bool (*build_mode_programming)(struct dml2_build_mode_programming_in_out *in_out);
+ bool (*build_mcache_programming)(struct dml2_build_mcache_programming_in_out *in_out);
+ bool (*unit_test)(void);
};
struct dml2_instance {
@@ -978,8 +962,8 @@ struct dml2_instance {
struct dml2_ip_capabilities ip_caps;
struct dml2_mcg_min_clock_table min_clk_table;
-
struct dml2_pmo_options pmo_options;
+ struct dml2_top_funcs funcs;
struct {
struct dml2_initialize_instance_locals initialize_instance_locals;
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
index 3d29169dd6bb..6b3b8803e0ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_mall_phantom.c
@@ -813,7 +813,7 @@ static bool remove_all_phantom_planes_for_stream(struct dml2_context *ctx, struc
{
int i, old_plane_count;
struct dc_stream_status *stream_status = NULL;
- struct dc_plane_state *del_planes[MAX_SURFACE_NUM] = { 0 };
+ struct dc_plane_state *del_planes[MAX_SURFACES] = { 0 };
for (i = 0; i < context->stream_count; i++)
if (context->streams[i] == stream) {
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index bde4250853b1..b416320873e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -553,13 +553,53 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc,
}
}
- dml2_policy_build_synthetic_soc_states(s, p);
- if (dml2->v20.dml_core_ctx.project == dml_project_dcn35) {
- // Override last out_state with data from last in_state
- // This will ensure that out_state contains max fclk
- memcpy(&p->out_states->state_array[p->out_states->num_states - 1],
- &p->in_states->state_array[p->in_states->num_states - 1],
- sizeof(struct soc_state_bounding_box_st));
+ if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 ||
+ dml2->v20.dml_core_ctx.project == dml_project_dcn351) {
+ int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0,
+ max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0, max_socclk_mhz = 0;
+
+ for (i = 0; i < p->in_states->num_states; i++) {
+ if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz)
+ max_dcfclk_mhz = (int)p->in_states->state_array[i].dcfclk_mhz;
+ if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz)
+ max_fclk_mhz = (int)p->in_states->state_array[i].fabricclk_mhz;
+ if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz)
+ max_socclk_mhz = (int)p->in_states->state_array[i].socclk_mhz;
+ if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz)
+ max_uclk_mhz = (int)p->in_states->state_array[i].dram_speed_mts;
+ if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = (int)p->in_states->state_array[i].dispclk_mhz;
+ if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = (int)p->in_states->state_array[i].dppclk_mhz;
+ if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz)
+ max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz;
+ if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz)
+ max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz;
+ }
+
+ for (i = 0; i < p->in_states->num_states; i++) {
+ /* Independent states - including base (unlisted) parameters from state 0. */
+ p->out_states->state_array[i] = p->in_states->state_array[0];
+
+ p->out_states->state_array[i].dispclk_mhz = max_dispclk_mhz;
+ p->out_states->state_array[i].dppclk_mhz = max_dppclk_mhz;
+ p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz;
+ p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz;
+
+ p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0;
+ p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz;
+ p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz;
+
+ /* Dependent states. */
+ p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts;
+ p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz;
+ p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz;
+ p->out_states->state_array[i].dcfclk_mhz = p->in_states->state_array[i].dcfclk_mhz;
+ }
+
+ p->out_states->num_states = p->in_states->num_states;
+ } else {
+ dml2_policy_build_synthetic_soc_states(s, p);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
index 9190c1328d5b..68b882d28195 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c
@@ -531,14 +531,21 @@ static bool optimize_pstate_with_svp_and_drr(struct dml2_context *dml2, struct d
static bool call_dml_mode_support_and_programming(struct dc_state *context)
{
unsigned int result = 0;
- unsigned int min_state;
+ unsigned int min_state = 0;
int min_state_for_g6_temp_read = 0;
+
+
+ if (!context)
+ return false;
+
struct dml2_context *dml2 = context->bw_ctx.dml2;
struct dml2_wrapper_scratch *s = &dml2->v20.scratch;
- min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context);
+ if (!context->streams[0]->sink->link->dc->caps.is_apu) {
+ min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context);
- ASSERT(min_state_for_g6_temp_read >= 0);
+ ASSERT(min_state_for_g6_temp_read >= 0);
+ }
if (!dml2->config.use_native_pstate_optimization) {
result = optimize_pstate_with_svp_and_drr(dml2, context);
@@ -549,14 +556,20 @@ static bool call_dml_mode_support_and_programming(struct dc_state *context)
/* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on.
* Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly.
*/
- if (min_state_for_g6_temp_read >= 0)
- min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx;
- else
- min_state = s->mode_support_params.out_lowest_state_idx;
-
- if (result)
- result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true);
+ if (!context->streams[0]->sink->link->dc->caps.is_apu) {
+ if (min_state_for_g6_temp_read >= 0)
+ min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx;
+ else
+ min_state = s->mode_support_params.out_lowest_state_idx;
+ }
+ if (result) {
+ if (!context->streams[0]->sink->link->dc->caps.is_apu) {
+ result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true);
+ } else {
+ result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true);
+ }
+ }
return result;
}
@@ -685,6 +698,8 @@ static bool dml2_validate_only(struct dc_state *context)
build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy);
map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config);
+ if (!dml2->config.skip_hw_state_mapping)
+ dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config);
result = pack_and_call_dml_mode_support_ex(dml2,
&dml2->v20.scratch.cur_display_config,
@@ -732,11 +747,10 @@ static inline struct dml2_context *dml2_allocate_memory(void)
static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)
{
- // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete.
- if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) {
- dml21_reinit(in_dc, dml2, config);
+ if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) {
+ dml21_reinit(in_dc, dml2, config);
return;
- }
+ }
// Store config options
(*dml2)->config = *config;
@@ -771,10 +785,8 @@ static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_op
bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)
{
- // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete.
- if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) {
+ if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01))
return dml21_create(in_dc, dml2, config);
- }
// Allocate Mode Lib Ctx
*dml2 = dml2_allocate_memory();
@@ -842,8 +854,7 @@ void dml2_reinit(const struct dc *in_dc,
const struct dml2_configuration_options *config,
struct dml2_context **dml2)
{
- // TODO : Temporarily add DCN_VERSION_3_2 for N-1 validation. Remove DCN_VERSION_3_2 after N-1 validation phase is complete.
- if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01 || in_dc->ctx->dce_version == DCN_VERSION_3_2)) {
+ if ((in_dc->debug.using_dml21) && (in_dc->ctx->dce_version == DCN_VERSION_4_01)) {
dml21_reinit(in_dc, dml2, config);
return;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c
index 377ef6d01ae5..00d22e542469 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c
@@ -427,18 +427,6 @@ void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *disp_dlg_regs,
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
- // hack for FPGA
- /* NOTE: We dont have getenv defined in driver and it does not make any sense in the driver */
- /*char* fpga_env = getenv("FPGA_FPDIV");
- if(fpga_env !=NULL)
- {
- if(disp_dlg_regs->vratio_prefetch >= (dml_uint_t)dml_pow(2, 22))
- {
- disp_dlg_regs->vratio_prefetch = (dml_uint_t)dml_pow(2, 22)-1;
- dml_print("FPGA msg: vratio_prefetch exceed the max value, the register field is [21:0]\n");
- }
- }*/
-
disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_group_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz);
disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_get_refcyc_per_vm_group_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz);
disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_req_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10));
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
index d9aaebfa3a0a..11535922b5ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c
@@ -30,6 +30,9 @@
#include "rc_calc.h"
#include "fixed31_32.h"
+#define DC_LOGGER \
+ dsc->ctx->logger
+
/* This module's internal functions */
/* default DSC policy target bitrate limit is 16bpp */
@@ -480,6 +483,48 @@ bool dc_dsc_compute_bandwidth_range(
return is_dsc_possible;
}
+void dc_dsc_dump_encoder_caps(const struct display_stream_compressor *dsc,
+ const struct dc_crtc_timing *timing)
+{
+ struct dsc_enc_caps dsc_enc_caps;
+
+ get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz);
+
+ DC_LOG_DSC("dsc encoder caps:");
+ DC_LOG_DSC("\tdsc_version 0x%x", dsc_enc_caps.dsc_version);
+ DC_LOG_DSC("\tslice_caps 0x%x", dsc_enc_caps.slice_caps.raw);
+ DC_LOG_DSC("\tlb_bit_depth %d", dsc_enc_caps.lb_bit_depth);
+ DC_LOG_DSC("\tis_block_pred_supported %d", dsc_enc_caps.is_block_pred_supported);
+ DC_LOG_DSC("\tcolor_formats 0x%x", dsc_enc_caps.color_formats.raw);
+ DC_LOG_DSC("\tcolor_depth 0x%x", dsc_enc_caps.color_depth.raw);
+ DC_LOG_DSC("\tmax_total_throughput_mps %d", dsc_enc_caps.max_total_throughput_mps);
+ DC_LOG_DSC("\tmax_slice_width %d", dsc_enc_caps.max_slice_width);
+ DC_LOG_DSC("\tbpp_increment_div %d", dsc_enc_caps.bpp_increment_div);
+}
+
+void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc,
+ const struct dsc_dec_dpcd_caps *dsc_sink_caps)
+{
+ DC_LOG_DSC("dsc decoder caps:");
+ DC_LOG_DSC("\tis_dsc_supported %d", dsc_sink_caps->is_dsc_supported);
+ DC_LOG_DSC("\tdsc_version 0x%x", dsc_sink_caps->dsc_version);
+ DC_LOG_DSC("\trc_buffer_size %d", dsc_sink_caps->rc_buffer_size);
+ DC_LOG_DSC("\tslice_caps1 0x%x", dsc_sink_caps->slice_caps1.raw);
+ DC_LOG_DSC("\tslice_caps2 0x%x", dsc_sink_caps->slice_caps2.raw);
+ DC_LOG_DSC("\tlb_bit_depth %d", dsc_sink_caps->lb_bit_depth);
+ DC_LOG_DSC("\tis_block_pred_supported %d", dsc_sink_caps->is_block_pred_supported);
+ DC_LOG_DSC("\tedp_max_bits_per_pixel %d", dsc_sink_caps->edp_max_bits_per_pixel);
+ DC_LOG_DSC("\tcolor_formats 0x%x", dsc_sink_caps->color_formats.raw);
+ DC_LOG_DSC("\tthroughput_mode_0_mps %d", dsc_sink_caps->throughput_mode_0_mps);
+ DC_LOG_DSC("\tthroughput_mode_1_mps %d", dsc_sink_caps->throughput_mode_1_mps);
+ DC_LOG_DSC("\tmax_slice_width %d", dsc_sink_caps->max_slice_width);
+ DC_LOG_DSC("\tbpp_increment_div %d", dsc_sink_caps->bpp_increment_div);
+ DC_LOG_DSC("\tbranch_overall_throughput_0_mps %d", dsc_sink_caps->branch_overall_throughput_0_mps);
+ DC_LOG_DSC("\tbranch_overall_throughput_1_mps %d", dsc_sink_caps->branch_overall_throughput_1_mps);
+ DC_LOG_DSC("\tbranch_max_line_width %d", dsc_sink_caps->branch_max_line_width);
+ DC_LOG_DSC("\tis_dp %d", dsc_sink_caps->is_dp);
+}
+
static void get_dsc_enc_caps(
const struct display_stream_compressor *dsc,
struct dsc_enc_caps *dsc_enc_caps,
diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c
index fae98cf52020..bc058f682438 100644
--- a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c
+++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c
@@ -270,16 +270,3 @@ void dcn30_dwbc_construct(struct dcn30_dwbc *dwbc30,
dwbc30->dwbc_shift = dwbc_shift;
dwbc30->dwbc_mask = dwbc_mask;
}
-
-void dwb3_set_host_read_rate_control(struct dwbc *dwbc, bool host_read_delay)
-{
- struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc);
-
- /*
- * Set maximum delay of host read access to DWBSCL LUT or OGAM LUT if there are no
- * idle cycles in HW pipeline (in number of clock cycles times 4)
- */
- REG_UPDATE(DWB_HOST_READ_CONTROL, DWB_HOST_READ_RATE_CONTROL, host_read_delay);
-
- DC_LOG_DWB("%s dwb3_rate_control at inst = %d", __func__, dwbc->inst);
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h
index 0f3f7c5fbaec..7f053f49ec6a 100644
--- a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h
@@ -914,7 +914,6 @@ bool dwb3_ogam_set_input_transfer_func(
struct dwbc *dwbc,
const struct dc_transfer_func *in_transfer_func_dwb_ogam);
-void dwb3_set_host_read_rate_control(struct dwbc *dwbc, bool host_read_delay);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c
index 22ac2b7e49ae..8364c9f9231a 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c
@@ -140,7 +140,7 @@ void hubp1_vready_workaround(struct hubp *hubp,
void hubp1_program_tiling(
struct hubp *hubp,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
@@ -518,6 +518,20 @@ bool hubp1_program_surface_flip_and_addr(
return true;
}
+void hubp1_clear_tiling(struct hubp *hubp)
+{
+ struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0);
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR);
+
+ REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, 0,
+ PRIMARY_SURFACE_DCC_IND_64B_BLK, 0,
+ SECONDARY_SURFACE_DCC_EN, 0,
+ SECONDARY_SURFACE_DCC_IND_64B_BLK, 0);
+}
+
void hubp1_dcc_control(struct hubp *hubp, bool enable,
enum hubp_ind_block_size independent_64b_blks)
{
@@ -535,7 +549,7 @@ void hubp1_dcc_control(struct hubp *hubp, bool enable,
void hubp1_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -1363,6 +1377,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = {
.hubp_disable_control = hubp1_disable_control,
.hubp_get_underflow_status = hubp1_get_underflow_status,
.hubp_init = hubp1_init,
+ .hubp_clear_tiling = hubp1_clear_tiling,
.dmdata_set_attributes = NULL,
.dmdata_load = NULL,
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h
index 69119b2fdce2..a85dc3be786f 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h
@@ -706,7 +706,7 @@ struct dcn10_hubp {
void hubp1_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -739,7 +739,7 @@ void hubp1_program_rotation(
void hubp1_program_tiling(
struct hubp *hubp,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format);
void hubp1_dcc_control(struct hubp *hubp,
@@ -794,4 +794,6 @@ void hubp1_soft_reset(struct hubp *hubp, bool reset);
void hubp1_set_flip_int(struct hubp *hubp);
+void hubp1_clear_tiling(struct hubp *hubp);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c
index 0637e4c552d8..c74f6a3313a2 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c
@@ -310,7 +310,7 @@ void hubp2_setup_interdependent(
*/
static void hubp2_program_tiling(
struct dcn20_hubp *hubp2,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
REG_UPDATE_3(DCSURF_ADDR_CONFIG,
@@ -406,6 +406,20 @@ void hubp2_program_rotation(
H_MIRROR_EN, mirror);
}
+void hubp2_clear_tiling(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0);
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR);
+
+ REG_UPDATE_4(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, 0,
+ PRIMARY_SURFACE_DCC_IND_64B_BLK, 0,
+ SECONDARY_SURFACE_DCC_EN, 0,
+ SECONDARY_SURFACE_DCC_IND_64B_BLK, 0);
+}
+
void hubp2_dcc_control(struct hubp *hubp, bool enable,
enum hubp_ind_block_size independent_64b_blks)
{
@@ -536,7 +550,7 @@ void hubp2_program_pixel_format(
void hubp2_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -1676,6 +1690,7 @@ static struct hubp_funcs dcn20_hubp_funcs = {
.hubp_in_blank = hubp1_in_blank,
.hubp_soft_reset = hubp1_soft_reset,
.hubp_set_flip_int = hubp1_set_flip_int,
+ .hubp_clear_tiling = hubp2_clear_tiling,
};
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
index 18e194507e36..6968087a3605 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h
@@ -382,7 +382,7 @@ void hubp2_program_pixel_format(
void hubp2_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -409,6 +409,8 @@ void hubp2_read_state_common(struct hubp *hubp);
void hubp2_read_state(struct hubp *hubp);
+void hubp2_clear_tiling(struct hubp *hubp);
+
#endif /* __DC_MEM_INPUT_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c
index cd2bfcc51276..65c628078ca2 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c
@@ -42,7 +42,7 @@
static void hubp201_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -131,6 +131,7 @@ static struct hubp_funcs dcn201_hubp_funcs = {
.hubp_clear_underflow = hubp1_clear_underflow,
.hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
.hubp_init = hubp1_init,
+ .hubp_clear_tiling = hubp1_clear_tiling,
};
bool dcn201_hubp_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c
index e13d69a22c1c..edbdb8c88d5c 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c
@@ -837,6 +837,7 @@ static struct hubp_funcs dcn21_hubp_funcs = {
.hubp_init = hubp21_init,
.validate_dml_output = hubp21_validate_dml_output,
.hubp_set_flip_int = hubp1_set_flip_int,
+ .hubp_clear_tiling = hubp1_clear_tiling,
};
bool hubp21_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
index 60a64d290352..12b282ed7067 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c
@@ -318,7 +318,7 @@ bool hubp3_program_surface_flip_and_addr(
void hubp3_program_tiling(
struct dcn20_hubp *hubp2,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
REG_UPDATE_4(DCSURF_ADDR_CONFIG,
@@ -334,6 +334,22 @@ void hubp3_program_tiling(
}
+void hubp3_clear_tiling(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0);
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR);
+
+ REG_UPDATE_6(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, 0,
+ PRIMARY_SURFACE_DCC_IND_BLK, 0,
+ PRIMARY_SURFACE_DCC_IND_BLK_C, 0,
+ SECONDARY_SURFACE_DCC_EN, 0,
+ SECONDARY_SURFACE_DCC_IND_BLK, 0,
+ SECONDARY_SURFACE_DCC_IND_BLK_C, 0);
+}
+
void hubp3_dcc_control(struct hubp *hubp, bool enable,
enum hubp_ind_block_size blk_size)
{
@@ -395,7 +411,7 @@ void hubp3_dmdata_set_attributes(
void hubp3_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -512,6 +528,7 @@ static struct hubp_funcs dcn30_hubp_funcs = {
.hubp_in_blank = hubp1_in_blank,
.hubp_soft_reset = hubp1_soft_reset,
.hubp_set_flip_int = hubp1_set_flip_int,
+ .hubp_clear_tiling = hubp3_clear_tiling,
};
bool hubp3_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h
index b010531a7fe8..b7d7adf0b58c 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h
@@ -264,7 +264,7 @@ bool hubp3_program_surface_flip_and_addr(
void hubp3_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -280,7 +280,7 @@ void hubp3_setup(
void hubp3_program_tiling(
struct dcn20_hubp *hubp2,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format);
void hubp3_dcc_control(struct hubp *hubp, bool enable,
@@ -297,6 +297,8 @@ void hubp3_read_state(struct hubp *hubp);
void hubp3_init(struct hubp *hubp);
+void hubp3_clear_tiling(struct hubp *hubp);
+
#endif /* __DC_HUBP_DCN30_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c
index 8394e8c06919..46b804ed05fb 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c
@@ -96,6 +96,7 @@ static struct hubp_funcs dcn31_hubp_funcs = {
.hubp_set_flip_int = hubp1_set_flip_int,
.hubp_in_blank = hubp1_in_blank,
.program_extended_blank = hubp31_program_extended_blank,
+ .hubp_clear_tiling = hubp3_clear_tiling,
};
bool hubp31_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c
index ca5b4b28a664..8b5bd73b8094 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c
@@ -201,7 +201,8 @@ static struct hubp_funcs dcn32_hubp_funcs = {
.hubp_update_force_cursor_pstate_disallow = hubp32_update_force_cursor_pstate_disallow,
.phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable,
.hubp_update_mall_sel = hubp32_update_mall_sel,
- .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering
+ .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering,
+ .hubp_clear_tiling = hubp3_clear_tiling,
};
bool hubp32_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c
index d1f05b82b3dd..faf37febc6fb 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c
@@ -172,7 +172,7 @@ void hubp35_program_pixel_format(
void hubp35_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -216,6 +216,7 @@ static struct hubp_funcs dcn35_hubp_funcs = {
.hubp_set_flip_int = hubp1_set_flip_int,
.hubp_in_blank = hubp1_in_blank,
.program_extended_blank = hubp31_program_extended_blank_value,
+ .hubp_clear_tiling = hubp3_clear_tiling,
};
bool hubp35_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h
index 586b43aa5834..d913f80b3130 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.h
@@ -65,7 +65,7 @@ void hubp35_program_pixel_format(
void hubp35_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
index b1ebf5053b4f..28ceceaf9e31 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c
@@ -40,7 +40,7 @@
#define FN(reg_name, field_name) \
hubp2->hubp_shift->field_name, hubp2->hubp_mask->field_name
-static void hubp401_program_3dlut_fl_addr(struct hubp *hubp,
+void hubp401_program_3dlut_fl_addr(struct hubp *hubp,
const struct dc_plane_address address)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
@@ -49,14 +49,14 @@ static void hubp401_program_3dlut_fl_addr(struct hubp *hubp,
REG_WRITE(HUBP_3DLUT_ADDRESS_LOW, address.lut3d.addr.low_part);
}
-static void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group)
+void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_UPDATE(HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, refcyc_per_3dlut_group);
}
-static void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable)
+void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
@@ -72,28 +72,28 @@ int hubp401_get_3dlut_fl_done(struct hubp *hubp)
return ret;
}
-static void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode)
+void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_ADDRESSING_MODE, addr_mode);
}
-static void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width)
+void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, width);
}
-static void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled)
+void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_enabled ? 1 : 0);
}
-static void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp,
+void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp,
enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g,
enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b,
enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r)
@@ -106,21 +106,21 @@ static void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp,
HUBP_3DLUT_CROSSBAR_SELECT_CR_R, bit_slice_cr_r);
}
-static void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale)
+void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_UPDATE_2(_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_BIAS, bias, HUBP0_3DLUT_FL_SCALE, scale);
}
-static void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode)
+void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_MODE, mode);
}
-static void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format)
+void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
@@ -145,30 +145,44 @@ void hubp401_init(struct hubp *hubp)
}
void hubp401_vready_at_or_After_vsync(struct hubp *hubp,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest)
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing)
{
- uint32_t value = 0;
+ unsigned int vstartup_lines = pipe_global_sync->dcn4x.vstartup_lines;
+ unsigned int vupdate_offset_pixels = pipe_global_sync->dcn4x.vupdate_offset_pixels;
+ unsigned int vupdate_width_pixels = pipe_global_sync->dcn4x.vupdate_vupdate_width_pixels;
+ unsigned int vready_offset_pixels = pipe_global_sync->dcn4x.vready_offset_pixels;
+ unsigned int htotal = timing->h_total;
+ unsigned int vblank_start = 0;
+ unsigned int vblank_end = 0;
+ unsigned int pixel_width = 0;
+ uint32_t reg_value = 0;
+ bool is_vready_at_or_after_vsync = false;
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
/*
* if (VSTARTUP_START - (VREADY_OFFSET+VUPDATE_WIDTH+VUPDATE_OFFSET)/htotal) <= OTG_V_BLANK_END
* Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 1
* else
* Set HUBP_VREADY_AT_OR_AFTER_VSYNC = 0
*/
- if (pipe_dest->htotal != 0) {
- if ((pipe_dest->vstartup_start - (pipe_dest->vready_offset+pipe_dest->vupdate_width
- + pipe_dest->vupdate_offset) / pipe_dest->htotal) <= pipe_dest->vblank_end) {
- value = 1;
- } else
- value = 0;
+ if (htotal != 0) {
+ vblank_start = timing->v_total - timing->v_front_porch;
+ vblank_end = vblank_start - timing->v_addressable - timing->v_border_top - timing->v_border_bottom;
+ pixel_width = vready_offset_pixels + vupdate_width_pixels + vupdate_offset_pixels;
+
+ is_vready_at_or_after_vsync = (vstartup_lines - pixel_width / htotal) <= vblank_end;
+
+ if (is_vready_at_or_after_vsync)
+ reg_value = 1;
}
- REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, value);
+ REG_UPDATE(DCHUBP_CNTL, HUBP_VREADY_AT_OR_AFTER_VSYNC, reg_value);
}
void hubp401_program_requestor(
struct hubp *hubp,
- struct _vcs_dpi_display_rq_regs_st *rq_regs)
+ struct dml2_display_rq_regs *rq_regs)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
@@ -196,8 +210,8 @@ void hubp401_program_requestor(
void hubp401_program_deadline(
struct hubp *hubp,
- struct _vcs_dpi_display_dlg_regs_st *dlg_attr,
- struct _vcs_dpi_display_ttu_regs_st *ttu_attr)
+ struct dml2_display_dlg_regs *dlg_attr,
+ struct dml2_display_ttu_regs *ttu_attr)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
@@ -294,66 +308,64 @@ void hubp401_program_deadline(
void hubp401_setup(
struct hubp *hubp,
- struct _vcs_dpi_display_dlg_regs_st *dlg_attr,
- struct _vcs_dpi_display_ttu_regs_st *ttu_attr,
- struct _vcs_dpi_display_rq_regs_st *rq_regs,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest)
+ struct dml2_dchub_per_pipe_register_set *pipe_regs,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing)
{
/* otg is locked when this func is called. Register are double buffered.
* disable the requestors is not needed
*/
- hubp401_vready_at_or_After_vsync(hubp, pipe_dest);
- hubp401_program_requestor(hubp, rq_regs);
- hubp401_program_deadline(hubp, dlg_attr, ttu_attr);
+ hubp401_vready_at_or_After_vsync(hubp, pipe_global_sync, timing);
+ hubp401_program_requestor(hubp, &pipe_regs->rq_regs);
+ hubp401_program_deadline(hubp, &pipe_regs->dlg_regs, &pipe_regs->ttu_regs);
}
void hubp401_setup_interdependent(
struct hubp *hubp,
- struct _vcs_dpi_display_dlg_regs_st *dlg_attr,
- struct _vcs_dpi_display_ttu_regs_st *ttu_attr)
+ struct dml2_dchub_per_pipe_register_set *pipe_regs)
{
struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
REG_SET_2(PREFETCH_SETTINGS, 0,
- DST_Y_PREFETCH, dlg_attr->dst_y_prefetch,
- VRATIO_PREFETCH, dlg_attr->vratio_prefetch);
+ DST_Y_PREFETCH, pipe_regs->dlg_regs.dst_y_prefetch,
+ VRATIO_PREFETCH, pipe_regs->dlg_regs.vratio_prefetch);
REG_SET(PREFETCH_SETTINGS_C, 0,
- VRATIO_PREFETCH_C, dlg_attr->vratio_prefetch_c);
+ VRATIO_PREFETCH_C, pipe_regs->dlg_regs.vratio_prefetch_c);
REG_SET_2(VBLANK_PARAMETERS_0, 0,
- DST_Y_PER_VM_VBLANK, dlg_attr->dst_y_per_vm_vblank,
- DST_Y_PER_ROW_VBLANK, dlg_attr->dst_y_per_row_vblank);
+ DST_Y_PER_VM_VBLANK, pipe_regs->dlg_regs.dst_y_per_vm_vblank,
+ DST_Y_PER_ROW_VBLANK, pipe_regs->dlg_regs.dst_y_per_row_vblank);
REG_SET_2(FLIP_PARAMETERS_0, 0,
- DST_Y_PER_VM_FLIP, dlg_attr->dst_y_per_vm_flip,
- DST_Y_PER_ROW_FLIP, dlg_attr->dst_y_per_row_flip);
+ DST_Y_PER_VM_FLIP, pipe_regs->dlg_regs.dst_y_per_vm_flip,
+ DST_Y_PER_ROW_FLIP, pipe_regs->dlg_regs.dst_y_per_row_flip);
REG_SET(VBLANK_PARAMETERS_3, 0,
- REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l);
+ REFCYC_PER_META_CHUNK_VBLANK_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_l);
REG_SET(VBLANK_PARAMETERS_4, 0,
- REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c);
+ REFCYC_PER_META_CHUNK_VBLANK_C, pipe_regs->dlg_regs.refcyc_per_meta_chunk_vblank_c);
REG_SET(FLIP_PARAMETERS_2, 0,
- REFCYC_PER_META_CHUNK_FLIP_L, dlg_attr->refcyc_per_meta_chunk_flip_l);
+ REFCYC_PER_META_CHUNK_FLIP_L, pipe_regs->dlg_regs.refcyc_per_meta_chunk_flip_l);
REG_SET_2(PER_LINE_DELIVERY_PRE, 0,
- REFCYC_PER_LINE_DELIVERY_PRE_L, dlg_attr->refcyc_per_line_delivery_pre_l,
- REFCYC_PER_LINE_DELIVERY_PRE_C, dlg_attr->refcyc_per_line_delivery_pre_c);
+ REFCYC_PER_LINE_DELIVERY_PRE_L, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_l,
+ REFCYC_PER_LINE_DELIVERY_PRE_C, pipe_regs->dlg_regs.refcyc_per_line_delivery_pre_c);
REG_SET(DCN_SURF0_TTU_CNTL1, 0,
REFCYC_PER_REQ_DELIVERY_PRE,
- ttu_attr->refcyc_per_req_delivery_pre_l);
+ pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_l);
REG_SET(DCN_SURF1_TTU_CNTL1, 0,
REFCYC_PER_REQ_DELIVERY_PRE,
- ttu_attr->refcyc_per_req_delivery_pre_c);
+ pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_c);
REG_SET(DCN_CUR0_TTU_CNTL1, 0,
- REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0);
+ REFCYC_PER_REQ_DELIVERY_PRE, pipe_regs->ttu_regs.refcyc_per_req_delivery_pre_cur0);
REG_SET_2(DCN_GLOBAL_TTU_CNTL, 0,
- MIN_TTU_VBLANK, ttu_attr->min_ttu_vblank,
- QoS_LEVEL_FLIP, ttu_attr->qos_level_flip);
+ MIN_TTU_VBLANK, pipe_regs->ttu_regs.min_ttu_vblank,
+ QoS_LEVEL_FLIP, pipe_regs->ttu_regs.qos_level_flip);
}
@@ -508,6 +520,18 @@ bool hubp401_program_surface_flip_and_addr(
return true;
}
+void hubp401_clear_tiling(struct hubp *hubp)
+{
+ struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
+
+ REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0);
+ REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR);
+
+ REG_UPDATE_2(DCSURF_SURFACE_CONTROL,
+ PRIMARY_SURFACE_DCC_EN, 0,
+ SECONDARY_SURFACE_DCC_EN, 0);
+}
+
void hubp401_dcc_control(struct hubp *hubp,
struct dc_plane_dcc_param *dcc)
{
@@ -520,7 +544,7 @@ void hubp401_dcc_control(struct hubp *hubp,
void hubp401_program_tiling(
struct dcn20_hubp *hubp2,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format)
{
/* DCSURF_ADDR_CONFIG still shows up in reg spec, but does not need to be programmed for DCN4x
@@ -568,7 +592,7 @@ void hubp401_program_size(
void hubp401_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -969,8 +993,8 @@ static struct hubp_funcs dcn401_hubp_funcs = {
.hubp_program_surface_flip_and_addr = hubp401_program_surface_flip_and_addr,
.hubp_program_surface_config = hubp401_program_surface_config,
.hubp_is_flip_pending = hubp2_is_flip_pending,
- .hubp_setup = hubp401_setup,
- .hubp_setup_interdependent = hubp401_setup_interdependent,
+ .hubp_setup2 = hubp401_setup,
+ .hubp_setup_interdependent2 = hubp401_setup_interdependent,
.hubp_set_vm_system_aperture_settings = hubp3_set_vm_system_aperture_settings,
.set_blank = hubp2_set_blank,
.set_blank_regs = hubp2_set_blank_regs,
@@ -1004,7 +1028,8 @@ static struct hubp_funcs dcn401_hubp_funcs = {
.hubp_program_3dlut_fl_width = hubp401_program_3dlut_fl_width,
.hubp_program_3dlut_fl_tmz_protected = hubp401_program_3dlut_fl_tmz_protected,
.hubp_program_3dlut_fl_crossbar = hubp401_program_3dlut_fl_crossbar,
- .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done
+ .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done,
+ .hubp_clear_tiling = hubp2_clear_tiling,
};
bool hubp401_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
index e52fdb5b0cd0..6e1d4c90ddd4 100644
--- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h
@@ -256,29 +256,15 @@
void hubp401_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor);
-void hubp401_vready_at_or_After_vsync(struct hubp *hubp,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest);
-
-void hubp401_program_requestor(
- struct hubp *hubp,
- struct _vcs_dpi_display_rq_regs_st *rq_regs);
-
-void hubp401_program_deadline(
- struct hubp *hubp,
- struct _vcs_dpi_display_dlg_regs_st *dlg_attr,
- struct _vcs_dpi_display_ttu_regs_st *ttu_attr);
-
void hubp401_setup(
struct hubp *hubp,
- struct _vcs_dpi_display_dlg_regs_st *dlg_attr,
- struct _vcs_dpi_display_ttu_regs_st *ttu_attr,
- struct _vcs_dpi_display_rq_regs_st *rq_regs,
- struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest);
+ struct dml2_dchub_per_pipe_register_set *pipe_regs,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing);
void hubp401_setup_interdependent(
struct hubp *hubp,
- struct _vcs_dpi_display_dlg_regs_st *dlg_attr,
- struct _vcs_dpi_display_ttu_regs_st *ttu_attr);
+ struct dml2_dchub_per_pipe_register_set *pipe_regs);
bool hubp401_program_surface_flip_and_addr(
struct hubp *hubp,
@@ -290,7 +276,7 @@ void hubp401_dcc_control(struct hubp *hubp,
void hubp401_program_tiling(
struct dcn20_hubp *hubp2,
- const union dc_tiling_info *info,
+ const struct dc_tiling_info *info,
const enum surface_pixel_format pixel_format);
void hubp401_program_size(
@@ -302,7 +288,7 @@ void hubp401_program_size(
void hubp401_program_surface_config(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -340,4 +326,42 @@ int hubp401_get_3dlut_fl_done(struct hubp *hubp);
void hubp401_set_unbounded_requesting(struct hubp *hubp, bool enable);
+void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale);
+
+void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b,
+ enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r);
+
+void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled);
+
+void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width);
+
+void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode);
+
+void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable);
+
+void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group);
+
+void hubp401_program_3dlut_fl_addr(struct hubp *hubp, const struct dc_plane_address address);
+
+void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format);
+
+void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode);
+
+void hubp401_clear_tiling(struct hubp *hubp);
+
+void hubp401_vready_at_or_After_vsync(struct hubp *hubp,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing);
+
+void hubp401_program_requestor(
+ struct hubp *hubp,
+ struct dml2_display_rq_regs *rq_regs);
+
+void hubp401_program_deadline(
+ struct hubp *hubp,
+ struct dml2_display_dlg_regs *dlg_attr,
+ struct dml2_display_ttu_regs *ttu_attr);
+
#endif /* __DC_HUBP_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
index b029ec1b26d3..a5e18ab72394 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c
@@ -1288,7 +1288,7 @@ static void dcn20_power_on_plane_resources(
}
}
-static void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
+void dcn20_enable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx,
struct dc_state *context)
{
//if (dc->debug.sanity_checks) {
@@ -1467,7 +1467,7 @@ void dcn20_pipe_control_lock(
}
}
-static void dcn20_detect_pipe_changes(struct dc_state *old_state,
+void dcn20_detect_pipe_changes(struct dc_state *old_state,
struct dc_state *new_state,
struct pipe_ctx *old_pipe,
struct pipe_ctx *new_pipe)
@@ -1655,7 +1655,7 @@ static void dcn20_detect_pipe_changes(struct dc_state *old_state,
}
}
-static void dcn20_update_dchubp_dpp(
+void dcn20_update_dchubp_dpp(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
struct dc_state *context)
@@ -1678,25 +1678,41 @@ static void dcn20_update_dchubp_dpp(
* VTG is within DCHUBBUB which is commond block share by each pipe HUBP.
* VTG is 1:1 mapping with OTG. Each pipe HUBP will select which VTG
*/
+
if (pipe_ctx->update_flags.bits.hubp_rq_dlg_ttu) {
hubp->funcs->hubp_vtg_sel(hubp, pipe_ctx->stream_res.tg->inst);
- hubp->funcs->hubp_setup(
- hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs,
- &pipe_ctx->rq_regs,
- &pipe_ctx->pipe_dlg_param);
+ if (hubp->funcs->hubp_setup2) {
+ hubp->funcs->hubp_setup2(
+ hubp,
+ &pipe_ctx->hubp_regs,
+ &pipe_ctx->global_sync,
+ &pipe_ctx->stream->timing);
+ } else {
+ hubp->funcs->hubp_setup(
+ hubp,
+ &pipe_ctx->dlg_regs,
+ &pipe_ctx->ttu_regs,
+ &pipe_ctx->rq_regs,
+ &pipe_ctx->pipe_dlg_param);
+ }
}
if (pipe_ctx->update_flags.bits.unbounded_req && hubp->funcs->set_unbounded_requesting)
hubp->funcs->set_unbounded_requesting(hubp, pipe_ctx->unbounded_req);
- if (pipe_ctx->update_flags.bits.hubp_interdependent)
- hubp->funcs->hubp_setup_interdependent(
- hubp,
- &pipe_ctx->dlg_regs,
- &pipe_ctx->ttu_regs);
+ if (pipe_ctx->update_flags.bits.hubp_interdependent) {
+ if (hubp->funcs->hubp_setup_interdependent2) {
+ hubp->funcs->hubp_setup_interdependent2(
+ hubp,
+ &pipe_ctx->hubp_regs);
+ } else {
+ hubp->funcs->hubp_setup_interdependent(
+ hubp,
+ &pipe_ctx->dlg_regs,
+ &pipe_ctx->ttu_regs);
+ }
+ }
if (pipe_ctx->update_flags.bits.enable ||
pipe_ctx->update_flags.bits.plane_changed ||
@@ -1756,10 +1772,9 @@ static void dcn20_update_dchubp_dpp(
&pipe_ctx->plane_res.scl_data.viewport_c);
viewport_changed = true;
}
- if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate)
- hubp->funcs->hubp_program_mcache_id_and_split_coordinate(
- hubp,
- &pipe_ctx->mcache_regs);
+
+ if (hubp->funcs->hubp_program_mcache_id_and_split_coordinate)
+ hubp->funcs->hubp_program_mcache_id_and_split_coordinate(hubp, &pipe_ctx->mcache_regs);
/* Any updates are handled in dc interface, just need to apply existing for plane enable */
if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed ||
@@ -1838,7 +1853,7 @@ static void dcn20_update_dchubp_dpp(
hubp->funcs->phantom_hubp_post_enable(hubp);
}
-static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
+static int dcn20_calculate_vready_offset_for_group(struct pipe_ctx *pipe)
{
struct pipe_ctx *other_pipe;
int vready_offset = pipe->pipe_dlg_param.vready_offset;
@@ -1864,6 +1879,30 @@ static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
return vready_offset;
}
+static void dcn20_program_tg(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dce_hwseq *hws)
+{
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+ dcn20_calculate_vready_offset_for_group(pipe_ctx),
+ pipe_ctx->pipe_dlg_param.vstartup_start,
+ pipe_ctx->pipe_dlg_param.vupdate_offset,
+ pipe_ctx->pipe_dlg_param.vupdate_width,
+ pipe_ctx->pipe_dlg_param.pstate_keepout);
+
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+
+ pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
+
+ if (hws->funcs.setup_vupdate_interrupt)
+ hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
+}
+
static void dcn20_program_pipe(
struct dc *dc,
struct pipe_ctx *pipe_ctx,
@@ -1874,33 +1913,17 @@ static void dcn20_program_pipe(
/* Only need to unblank on top pipe */
if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) {
if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.odm ||
- pipe_ctx->stream->update_flags.bits.abm_level)
+ pipe_ctx->update_flags.bits.odm ||
+ pipe_ctx->stream->update_flags.bits.abm_level)
hws->funcs.blank_pixel_data(dc, pipe_ctx,
- !pipe_ctx->plane_state ||
- !pipe_ctx->plane_state->visible);
+ !pipe_ctx->plane_state ||
+ !pipe_ctx->plane_state->visible);
}
/* Only update TG on top pipe */
if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
- && !pipe_ctx->prev_odm_pipe) {
- pipe_ctx->stream_res.tg->funcs->program_global_sync(
- pipe_ctx->stream_res.tg,
- calculate_vready_offset_for_group(pipe_ctx),
- pipe_ctx->pipe_dlg_param.vstartup_start,
- pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width,
- pipe_ctx->pipe_dlg_param.pstate_keepout);
-
- if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
- pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
-
- pipe_ctx->stream_res.tg->funcs->set_vtg_params(
- pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
-
- if (hws->funcs.setup_vupdate_interrupt)
- hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
- }
+ && !pipe_ctx->prev_odm_pipe)
+ dcn20_program_tg(dc, pipe_ctx, context, hws);
if (pipe_ctx->update_flags.bits.odm)
hws->funcs.update_odm(dc, context, pipe_ctx);
@@ -1931,22 +1954,22 @@ static void dcn20_program_pipe(
dcn20_update_dchubp_dpp(dc, pipe_ctx, context);
if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->plane_state->update_flags.bits.hdr_mult))
+ pipe_ctx->plane_state->update_flags.bits.hdr_mult))
hws->funcs.set_hdr_multiplier(pipe_ctx);
if (hws->funcs.populate_mcm_luts) {
if (pipe_ctx->plane_state) {
hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts,
- pipe_ctx->plane_state->lut_bank_a);
+ pipe_ctx->plane_state->lut_bank_a);
pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a;
}
}
if (pipe_ctx->plane_state &&
- (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
- pipe_ctx->plane_state->update_flags.bits.gamma_change ||
- pipe_ctx->plane_state->update_flags.bits.lut_3d ||
- pipe_ctx->update_flags.bits.enable))
+ (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
+ pipe_ctx->plane_state->update_flags.bits.gamma_change ||
+ pipe_ctx->plane_state->update_flags.bits.lut_3d ||
+ pipe_ctx->update_flags.bits.enable))
hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
/* dcn10_translate_regamma_to_hw_format takes 750us to finish
@@ -1954,10 +1977,10 @@ static void dcn20_program_pipe(
* updating on slave planes
*/
if (pipe_ctx->update_flags.bits.enable ||
- pipe_ctx->update_flags.bits.plane_changed ||
- pipe_ctx->stream->update_flags.bits.out_tf ||
- (pipe_ctx->plane_state &&
- pipe_ctx->plane_state->update_flags.bits.output_tf_change))
+ pipe_ctx->update_flags.bits.plane_changed ||
+ pipe_ctx->stream->update_flags.bits.out_tf ||
+ (pipe_ctx->plane_state &&
+ pipe_ctx->plane_state->update_flags.bits.output_tf_change))
hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
/* If the pipe has been enabled or has a different opp, we
@@ -1966,7 +1989,7 @@ static void dcn20_program_pipe(
* causes a different pipe to be chosen to odm combine with.
*/
if (pipe_ctx->update_flags.bits.enable
- || pipe_ctx->update_flags.bits.opp_changed) {
+ || pipe_ctx->update_flags.bits.opp_changed) {
pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
pipe_ctx->stream_res.opp,
@@ -1996,14 +2019,14 @@ static void dcn20_program_pipe(
memset(&params, 0, sizeof(params));
odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
dc->hwss.set_disp_pattern_generator(dc,
- pipe_ctx,
- pipe_ctx->stream_res.test_pattern_params.test_pattern,
- pipe_ctx->stream_res.test_pattern_params.color_space,
- pipe_ctx->stream_res.test_pattern_params.color_depth,
- NULL,
- pipe_ctx->stream_res.test_pattern_params.width,
- pipe_ctx->stream_res.test_pattern_params.height,
- pipe_ctx->stream_res.test_pattern_params.offset);
+ pipe_ctx,
+ pipe_ctx->stream_res.test_pattern_params.test_pattern,
+ pipe_ctx->stream_res.test_pattern_params.color_space,
+ pipe_ctx->stream_res.test_pattern_params.color_depth,
+ NULL,
+ pipe_ctx->stream_res.test_pattern_params.width,
+ pipe_ctx->stream_res.test_pattern_params.height,
+ pipe_ctx->stream_res.test_pattern_params.offset);
}
}
@@ -2012,11 +2035,12 @@ void dcn20_program_front_end_for_ctx(
struct dc_state *context)
{
int i;
- struct dce_hwseq *hws = dc->hwseq;
- DC_LOGGER_INIT(dc->ctx->logger);
unsigned int prev_hubp_count = 0;
unsigned int hubp_count = 0;
- struct pipe_ctx *pipe;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct pipe_ctx *pipe = NULL;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
if (resource_is_pipe_topology_changed(dc->current_state, context))
resource_log_pipe_topology_update(dc, context);
@@ -2029,7 +2053,7 @@ void dcn20_program_front_end_for_ctx(
ASSERT(!pipe->plane_state->triplebuffer_flips);
/*turn off triple buffer for full update*/
dc->hwss.program_triplebuffer(
- dc, pipe, pipe->plane_state->triplebuffer_flips);
+ dc, pipe, pipe->plane_state->triplebuffer_flips);
}
}
}
@@ -2044,30 +2068,31 @@ void dcn20_program_front_end_for_ctx(
if (prev_hubp_count == 0 && hubp_count > 0) {
if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
dc->res_pool->hubbub->funcs->force_pstate_change_control(
- dc->res_pool->hubbub, true, false);
+ dc->res_pool->hubbub, true, false);
udelay(500);
}
/* Set pipe update flags and lock pipes */
for (i = 0; i < dc->res_pool->pipe_count; i++)
dcn20_detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i],
- &context->res_ctx.pipe_ctx[i]);
+ &context->res_ctx.pipe_ctx[i]);
/* When disabling phantom pipes, turn on phantom OTG first (so we can get double
* buffer updates properly)
*/
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream;
+
pipe = &dc->current_state->res_ctx.pipe_ctx[i];
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream &&
- dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
+ dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
if (tg->funcs->enable_crtc) {
- if (dc->hwseq->funcs.blank_pixel_data) {
+ if (dc->hwseq->funcs.blank_pixel_data)
dc->hwseq->funcs.blank_pixel_data(dc, pipe, true);
- }
+
tg->funcs->enable_crtc(tg);
}
}
@@ -2075,15 +2100,15 @@ void dcn20_program_front_end_for_ctx(
/* OTG blank before disabling all front ends */
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
- && !context->res_ctx.pipe_ctx[i].top_pipe
- && !context->res_ctx.pipe_ctx[i].prev_odm_pipe
- && context->res_ctx.pipe_ctx[i].stream)
+ && !context->res_ctx.pipe_ctx[i].top_pipe
+ && !context->res_ctx.pipe_ctx[i].prev_odm_pipe
+ && context->res_ctx.pipe_ctx[i].stream)
hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
/* Disconnect mpcc */
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
- || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) {
+ || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) {
struct hubbub *hubbub = dc->res_pool->hubbub;
/* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom
@@ -2093,13 +2118,18 @@ void dcn20_program_front_end_for_ctx(
* DET allocation.
*/
if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
- (context->res_ctx.pipe_ctx[i].plane_state && dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) == SUBVP_PHANTOM))) {
+ (context->res_ctx.pipe_ctx[i].plane_state &&
+ dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i])
+ == SUBVP_PHANTOM))) {
if (hubbub->funcs->program_det_size)
- hubbub->funcs->program_det_size(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
+ hubbub->funcs->program_det_size(hubbub,
+ dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
if (dc->res_pool->hubbub->funcs->program_det_segments)
- dc->res_pool->hubbub->funcs->program_det_segments(hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
+ dc->res_pool->hubbub->funcs->program_det_segments(
+ hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
}
- hws->funcs.plane_atomic_disconnect(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
+ hws->funcs.plane_atomic_disconnect(dc, dc->current_state,
+ &dc->current_state->res_ctx.pipe_ctx[i]);
DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
}
@@ -2107,9 +2137,9 @@ void dcn20_program_front_end_for_ctx(
for (i = 0; i < dc->res_pool->pipe_count; i++) {
pipe = &context->res_ctx.pipe_ctx[i];
if (resource_is_pipe_type(pipe, OTG_MASTER) &&
- !resource_is_pipe_type(pipe, DPP_PIPE) &&
- pipe->update_flags.bits.odm &&
- hws->funcs.update_odm)
+ !resource_is_pipe_type(pipe, DPP_PIPE) &&
+ pipe->update_flags.bits.odm &&
+ hws->funcs.update_odm)
hws->funcs.update_odm(dc, context, pipe);
}
@@ -2127,25 +2157,28 @@ void dcn20_program_front_end_for_ctx(
else {
/* Don't program phantom pipes in the regular front end programming sequence.
* There is an MPO transition case where a pipe being used by a video plane is
- * transitioned directly to be a phantom pipe when closing the MPO video. However
- * the phantom pipe will program a new HUBP_VTG_SEL (update takes place right away),
- * but the MPO still exists until the double buffered update of the main pipe so we
- * will get a frame of underflow if the phantom pipe is programmed here.
+ * transitioned directly to be a phantom pipe when closing the MPO video.
+ * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place
+ * right away) but the MPO still exists until the double buffered update of the
+ * main pipe so we will get a frame of underflow if the phantom pipe is
+ * programmed here.
*/
- if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM)
+ if (pipe->stream &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM)
dcn20_program_pipe(dc, pipe, context);
}
pipe = pipe->bottom_pipe;
}
}
+
/* Program secondary blending tree and writeback pipes */
pipe = &context->res_ctx.pipe_ctx[i];
if (!pipe->top_pipe && !pipe->prev_odm_pipe
- && pipe->stream && pipe->stream->num_wb_info > 0
- && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw)
- || pipe->stream->update_flags.raw)
- && hws->funcs.program_all_writeback_pipes_in_tree)
+ && pipe->stream && pipe->stream->num_wb_info > 0
+ && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw)
+ || pipe->stream->update_flags.raw)
+ && hws->funcs.program_all_writeback_pipes_in_tree)
hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context);
/* Avoid underflow by check of pipe line read when adding 2nd plane. */
@@ -2164,7 +2197,7 @@ void dcn20_program_front_end_for_ctx(
* buffered pending status clear and reset opp head pipe's none double buffered
* registers to their initial state.
*/
-static void post_unlock_reset_opp(struct dc *dc,
+void dcn20_post_unlock_reset_opp(struct dc *dc,
struct pipe_ctx *opp_head)
{
struct display_stream_compressor *dsc = opp_head->stream_res.dsc;
@@ -2201,16 +2234,17 @@ void dcn20_post_unlock_program_front_end(
struct dc *dc,
struct dc_state *context)
{
- int i;
- const unsigned int TIMEOUT_FOR_PIPE_ENABLE_US = 100000;
+ // Timeout for pipe enable
+ unsigned int timeout_us = 100000;
unsigned int polling_interval_us = 1;
struct dce_hwseq *hwseq = dc->hwseq;
+ int i;
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) &&
- !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD))
- post_unlock_reset_opp(dc,
- &dc->current_state->res_ctx.pipe_ctx[i]);
+ !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD))
+ dcn20_post_unlock_reset_opp(dc,
+ &dc->current_state->res_ctx.pipe_ctx[i]);
for (i = 0; i < dc->res_pool->pipe_count; i++)
if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
@@ -2226,11 +2260,12 @@ void dcn20_post_unlock_program_front_end(
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
// Don't check flip pending on phantom pipes
if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable &&
- dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
struct hubp *hubp = pipe->plane_res.hubp;
int j = 0;
- for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us
- && hubp->funcs->hubp_is_flip_pending(hubp); j++)
+
+ for (j = 0; j < timeout_us / polling_interval_us
+ && hubp->funcs->hubp_is_flip_pending(hubp); j++)
udelay(polling_interval_us);
}
}
@@ -2244,15 +2279,14 @@ void dcn20_post_unlock_program_front_end(
* before we've transitioned to 2:1 or 4:1
*/
if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) &&
- resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) &&
- dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
+ resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
int j = 0;
struct timing_generator *tg = pipe->stream_res.tg;
-
if (tg->funcs->get_optc_double_buffer_pending) {
- for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_US / polling_interval_us
- && tg->funcs->get_optc_double_buffer_pending(tg); j++)
+ for (j = 0; j < timeout_us / polling_interval_us
+ && tg->funcs->get_optc_double_buffer_pending(tg); j++)
udelay(polling_interval_us);
}
}
@@ -2260,7 +2294,7 @@ void dcn20_post_unlock_program_front_end(
if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
dc->res_pool->hubbub->funcs->force_pstate_change_control(
- dc->res_pool->hubbub, false, false);
+ dc->res_pool->hubbub, false, false);
for (i = 0; i < dc->res_pool->pipe_count; i++) {
struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -2291,11 +2325,11 @@ void dcn20_post_unlock_program_front_end(
return;
/* P-State support transitions:
- * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe
- * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally)
- * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe
- * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe
- * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes
+ * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe
+ * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally)
+ * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe
+ * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe
+ * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes
*/
if (hwseq->funcs.update_force_pstate)
dc->hwseq->funcs.update_force_pstate(dc, context);
@@ -2310,12 +2344,11 @@ void dcn20_post_unlock_program_front_end(
if (hwseq->wa.DEGVIDCN21)
dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub);
-
/* WA for stutter underflow during MPO transitions when adding 2nd plane */
if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) {
if (dc->current_state->stream_status[0].plane_count == 1 &&
- context->stream_status[0].plane_count > 1) {
+ context->stream_status[0].plane_count > 1) {
struct timing_generator *tg = dc->res_pool->timing_generators[0];
@@ -2463,7 +2496,7 @@ bool dcn20_update_bandwidth(
pipe_ctx->stream_res.tg->funcs->program_global_sync(
pipe_ctx->stream_res.tg,
- calculate_vready_offset_for_group(pipe_ctx),
+ dcn20_calculate_vready_offset_for_group(pipe_ctx),
pipe_ctx->pipe_dlg_param.vstartup_start,
pipe_ctx->pipe_dlg_param.vupdate_offset,
pipe_ctx->pipe_dlg_param.vupdate_width,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
index 5c874f7b0683..9d1ad3b29ca5 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h
@@ -154,6 +154,21 @@ void dcn20_setup_gsl_group_as_lock(
const struct dc *dc,
struct pipe_ctx *pipe_ctx,
bool enable);
-
+void dcn20_detect_pipe_changes(
+ struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe);
+void dcn20_enable_plane(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+void dcn20_update_dchubp_dpp(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+void dcn20_post_unlock_reset_opp(
+ struct dc *dc,
+ struct pipe_ctx *opp_head);
#endif /* __DC_HWSS_DCN20_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
index 0e8d32e3dbae..c32764aef884 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c
@@ -86,7 +86,6 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
index 780ce4c064aa..dcb27cdbce73 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c
@@ -86,7 +86,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
index 5f8f45b48720..fb2ffb637931 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c
@@ -89,7 +89,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -98,7 +97,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
.set_flip_control_gsl = dcn20_set_flip_control_gsl,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.calc_vupdate_position = dcn10_calc_vupdate_position,
- .set_backlight_level = dcn31_set_backlight_level,
+ .set_backlight_level = dcn21_set_backlight_level,
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.set_pipe = dcn21_set_pipe,
.enable_lvds_link_output = dce110_enable_lvds_link_output,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
index 9b88eb72086d..be26c925fdfa 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
@@ -162,6 +162,8 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx
int opp_inst[MAX_PIPES] = {0};
int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false);
int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true);
+ struct mpc *mpc = dc->res_pool->mpc;
+ int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -174,6 +176,16 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+ if (mpc->funcs->set_out_rate_control) {
+ for (i = 0; i < opp_cnt; ++i) {
+ mpc->funcs->set_out_rate_control(
+ mpc, opp_inst[i],
+ false,
+ 0,
+ NULL);
+ }
+ }
+
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
index 6bdfbf22ce87..21ef03a76229 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c
@@ -91,7 +91,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -100,7 +99,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
.set_flip_control_gsl = dcn20_set_flip_control_gsl,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.calc_vupdate_position = dcn10_calc_vupdate_position,
- .set_backlight_level = dcn31_set_backlight_level,
+ .set_backlight_level = dcn21_set_backlight_level,
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.set_pipe = dcn21_set_pipe,
.enable_lvds_link_output = dce110_enable_lvds_link_output,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index d7f8b2dcaa6b..ee4de9ddfef4 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -985,6 +985,7 @@ void dcn32_init_hw(struct dc *dc)
dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support;
dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable;
dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
+ dc->caps.dmub_caps.aux_backlight_support = dc->ctx->dmub_srv->dmub->feature_caps.abm_aux_backlight_support;
/* for DCN401 testing only */
dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
@@ -1049,7 +1050,8 @@ void dcn32_update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
}
/* Enable DSC hw block */
- dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
+ dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow +
+ stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
dsc_cfg.color_depth = stream->timing.display_color_depth;
@@ -1397,12 +1399,12 @@ void dcn32_disable_link_output(struct dc_link *link,
link_hwss->disable_link_output(link, link_res, signal);
link->phy_state.symclk_state = SYMCLK_OFF_TX_OFF;
-
- if (signal == SIGNAL_TYPE_EDP &&
- link->dc->hwss.edp_power_control &&
- !link->skip_implict_edp_power_control)
- link->dc->hwss.edp_power_control(link, false);
- else if (dmcu != NULL && dmcu->funcs->unlock_phy)
+ /*
+ * Add the logic to extract BOTH power up and power down sequences
+ * from enable/disable link output and only call edp panel control
+ * in enable_link_dp and disable_link_dp once.
+ */
+ if (dmcu != NULL && dmcu->funcs->unlock_phy)
dmcu->funcs->unlock_phy(dmcu);
dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
index 5ecee7e320da..e4d149eff10f 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c
@@ -87,7 +87,6 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index e599cdc465bf..59fc1c114fbe 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -426,6 +426,8 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
int opp_inst[MAX_PIPES] = {0};
int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false);
int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true);
+ struct mpc *mpc = dc->res_pool->mpc;
+ int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -438,6 +440,16 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+ if (mpc->funcs->set_out_rate_control) {
+ for (i = 0; i < opp_cnt; ++i) {
+ mpc->funcs->set_out_rate_control(
+ mpc, opp_inst[i],
+ false,
+ 0,
+ NULL);
+ }
+ }
+
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
@@ -1020,8 +1032,13 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context,
if (pipe_ctx->plane_res.dpp || pipe_ctx->stream_res.opp)
update_state->pg_pipe_res_update[PG_MPCC][pipe_ctx->plane_res.mpcc_inst] = false;
- if (pipe_ctx->stream_res.dsc)
+ if (pipe_ctx->stream_res.dsc) {
update_state->pg_pipe_res_update[PG_DSC][pipe_ctx->stream_res.dsc->inst] = false;
+ if (dc->caps.sequential_ono) {
+ update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false;
+ update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false;
+ }
+ }
if (pipe_ctx->stream_res.opp)
update_state->pg_pipe_res_update[PG_OPP][pipe_ctx->stream_res.opp->inst] = false;
@@ -1579,3 +1596,37 @@ bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx)
return false;
}
+
+/*
+ * Set powerup to true for every pipe to match pre-OS configuration.
+ */
+static void dcn35_calc_blocks_to_ungate_for_hw_release(struct dc *dc, struct pg_block_update *update_state)
+{
+ int i = 0, j = 0;
+
+ memset(update_state, 0, sizeof(struct pg_block_update));
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ for (j = 0; j < PG_HW_PIPE_RESOURCES_NUM_ELEMENT; j++)
+ update_state->pg_pipe_res_update[j][i] = true;
+
+ update_state->pg_res_update[PG_HPO] = true;
+ update_state->pg_res_update[PG_DWB] = true;
+}
+
+/*
+ * The purpose is to power up all gatings to restore optimization to pre-OS env.
+ * Re-use hwss func and existing PG&RCG flags to decide powerup sequence.
+ */
+void dcn35_hardware_release(struct dc *dc)
+{
+ struct pg_block_update pg_update_state;
+
+ dcn35_calc_blocks_to_ungate_for_hw_release(dc, &pg_update_state);
+
+ if (dc->hwss.root_clock_control)
+ dc->hwss.root_clock_control(dc, &pg_update_state, true);
+ /*power up required HW block*/
+ if (dc->hwss.hw_block_power_up)
+ dc->hwss.hw_block_power_up(dc, &pg_update_state);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
index e27b3609020f..0b1d6f608edd 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.h
@@ -99,4 +99,6 @@ void dcn35_set_long_vblank(struct pipe_ctx **pipe_ctx,
bool dcn35_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx);
+void dcn35_hardware_release(struct dc *dc);
+
#endif /* __DC_HWSS_DCN35_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index fd67779c27a9..c7acaf97974c 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -92,7 +92,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -123,6 +122,11 @@ static const struct hw_sequencer_funcs dcn35_funcs = {
.root_clock_control = dcn35_root_clock_control,
.set_long_vtotal = dcn35_set_long_vblank,
.calculate_pix_rate_divider = dcn32_calculate_pix_rate_divider,
+ .hardware_release = dcn35_hardware_release,
+ .detect_pipe_changes = dcn20_detect_pipe_changes,
+ .enable_plane = dcn20_enable_plane,
+ .update_dchubp_dpp = dcn20_update_dchubp_dpp,
+ .post_unlock_reset_opp = dcn20_post_unlock_reset_opp,
};
static const struct hwseq_private_funcs dcn35_private_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
index 3c275a1eff58..4f73e7f551ac 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -91,7 +91,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
index 5de11e2837c0..555a9f590cd7 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -3,6 +3,7 @@
// Copyright 2024 Advanced Micro Devices, Inc.
#include "dm_services.h"
+#include "basics/dc_common.h"
#include "dm_helpers.h"
#include "core_types.h"
#include "resource.h"
@@ -126,91 +127,6 @@ void dcn401_program_gamut_remap(struct pipe_ctx *pipe_ctx)
mpc->funcs->set_gamut_remap(mpc, mpcc_id, &mpc_adjust);
}
-struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc, uint8_t region)
-{
- struct dce_hwseq *hws = dc->hwseq;
- struct ips_ono_region_state state = {0, 0};
-
- switch (region) {
- case 0:
- /* dccg, dio, dcio */
- REG_GET_2(DOMAIN22_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 1:
- /* dchubbub, dchvm, dchubbubmem */
- REG_GET_2(DOMAIN23_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 2:
- /* mpc, opp, optc, dwb */
- REG_GET_2(DOMAIN24_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 3:
- /* hpo */
- REG_GET_2(DOMAIN25_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 4:
- /* dchubp0, dpp0 */
- REG_GET_2(DOMAIN0_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 5:
- /* dsc0 */
- REG_GET_2(DOMAIN16_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 6:
- /* dchubp1, dpp1 */
- REG_GET_2(DOMAIN1_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 7:
- /* dsc1 */
- REG_GET_2(DOMAIN17_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 8:
- /* dchubp2, dpp2 */
- REG_GET_2(DOMAIN2_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 9:
- /* dsc2 */
- REG_GET_2(DOMAIN18_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 10:
- /* dchubp3, dpp3 */
- REG_GET_2(DOMAIN3_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- case 11:
- /* dsc3 */
- REG_GET_2(DOMAIN19_PG_STATUS,
- DOMAIN_DESIRED_PWR_STATE, &state.desire_pwr_state,
- DOMAIN_PGFSM_PWR_STATUS, &state.current_pwr_state);
- break;
- default:
- break;
- }
-
- return state;
-}
-
void dcn401_init_hw(struct dc *dc)
{
struct abm **abms = dc->res_pool->multiple_abms;
@@ -435,7 +351,8 @@ void dcn401_init_hw(struct dc *dc)
dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0;
dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver;
- dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2;
+ dc->debug.fams2_config.bits.enable &=
+ dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver; // sw & fw fams versions must match for support
if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box)
|| res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) {
/* update bounding box if FAMS2 disabled, or if dchub clk has changed */
@@ -820,7 +737,8 @@ enum dc_status dcn401_enable_stream_timing(
int opp_cnt = 1;
int opp_inst[MAX_PIPES] = {0};
struct pipe_ctx *opp_heads[MAX_PIPES] = {0};
- bool manual_mode;
+ struct dc_crtc_timing patched_crtc_timing = stream->timing;
+ bool manual_mode = false;
unsigned int tmds_div = PIXEL_RATE_DIV_NA;
unsigned int unused_div = PIXEL_RATE_DIV_NA;
int odm_slice_width;
@@ -874,16 +792,20 @@ enum dc_status dcn401_enable_stream_timing(
if (dc->hwseq->funcs.PLAT_58856_wa && (!dc_is_dp_signal(stream->signal)))
dc->hwseq->funcs.PLAT_58856_wa(context, pipe_ctx);
+ /* if we are borrowing from hblank, h_addressable needs to be adjusted */
+ if (dc->debug.enable_hblank_borrow)
+ patched_crtc_timing.h_addressable = patched_crtc_timing.h_addressable + pipe_ctx->hblank_borrow;
+
pipe_ctx->stream_res.tg->funcs->program_timing(
- pipe_ctx->stream_res.tg,
- &stream->timing,
- pipe_ctx->pipe_dlg_param.vready_offset,
- pipe_ctx->pipe_dlg_param.vstartup_start,
- pipe_ctx->pipe_dlg_param.vupdate_offset,
- pipe_ctx->pipe_dlg_param.vupdate_width,
- pipe_ctx->pipe_dlg_param.pstate_keepout,
- pipe_ctx->stream->signal,
- true);
+ pipe_ctx->stream_res.tg,
+ &patched_crtc_timing,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vready_offset_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines,
+ pipe_ctx->stream->signal,
+ true);
for (i = 0; i < opp_cnt; i++) {
opp_heads[i]->stream_res.opp->funcs->opp_pipe_clock_control(
@@ -2007,3 +1929,730 @@ void dcn401_reset_hw_ctx_wrap(
}
}
}
+
+static unsigned int dcn401_calculate_vready_offset_for_group(struct pipe_ctx *pipe)
+{
+ struct pipe_ctx *other_pipe;
+ unsigned int vready_offset = pipe->global_sync.dcn4x.vready_offset_pixels;
+
+ /* Always use the largest vready_offset of all connected pipes */
+ for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
+ if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset)
+ vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels;
+ }
+ for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
+ if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset)
+ vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels;
+ }
+ for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
+ if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset)
+ vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels;
+ }
+ for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
+ if (other_pipe->global_sync.dcn4x.vready_offset_pixels > vready_offset)
+ vready_offset = other_pipe->global_sync.dcn4x.vready_offset_pixels;
+ }
+
+ return vready_offset;
+}
+
+static void dcn401_program_tg(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context,
+ struct dce_hwseq *hws)
+{
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+ dcn401_calculate_vready_offset_for_group(pipe_ctx),
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines);
+
+ if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)
+ pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+
+ pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
+
+ if (hws->funcs.setup_vupdate_interrupt)
+ hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
+}
+
+static void dcn401_program_pipe(
+ struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+
+ /* Only need to unblank on top pipe */
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) {
+ if (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->update_flags.bits.odm ||
+ pipe_ctx->stream->update_flags.bits.abm_level)
+ hws->funcs.blank_pixel_data(dc, pipe_ctx,
+ !pipe_ctx->plane_state ||
+ !pipe_ctx->plane_state->visible);
+ }
+
+ /* Only update TG on top pipe */
+ if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
+ && !pipe_ctx->prev_odm_pipe)
+ dcn401_program_tg(dc, pipe_ctx, context, hws);
+
+ if (pipe_ctx->update_flags.bits.odm)
+ hws->funcs.update_odm(dc, context, pipe_ctx);
+
+ if (pipe_ctx->update_flags.bits.enable) {
+ if (hws->funcs.enable_plane)
+ hws->funcs.enable_plane(dc, pipe_ctx, context);
+ else
+ dc->hwss.enable_plane(dc, pipe_ctx, context);
+
+ if (dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes)
+ dc->res_pool->hubbub->funcs->force_wm_propagate_to_pipes(dc->res_pool->hubbub);
+ }
+
+ if (pipe_ctx->update_flags.bits.det_size) {
+ if (dc->res_pool->hubbub->funcs->program_det_size)
+ dc->res_pool->hubbub->funcs->program_det_size(
+ dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->det_buffer_size_kb);
+ if (dc->res_pool->hubbub->funcs->program_det_segments)
+ dc->res_pool->hubbub->funcs->program_det_segments(
+ dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size);
+ }
+
+ if (pipe_ctx->update_flags.raw ||
+ (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) ||
+ pipe_ctx->stream->update_flags.raw)
+ dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context);
+
+ if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->plane_state->update_flags.bits.hdr_mult))
+ hws->funcs.set_hdr_multiplier(pipe_ctx);
+
+ if (hws->funcs.populate_mcm_luts) {
+ if (pipe_ctx->plane_state) {
+ hws->funcs.populate_mcm_luts(dc, pipe_ctx, pipe_ctx->plane_state->mcm_luts,
+ pipe_ctx->plane_state->lut_bank_a);
+ pipe_ctx->plane_state->lut_bank_a = !pipe_ctx->plane_state->lut_bank_a;
+ }
+ }
+
+ if (pipe_ctx->plane_state &&
+ (pipe_ctx->plane_state->update_flags.bits.in_transfer_func_change ||
+ pipe_ctx->plane_state->update_flags.bits.gamma_change ||
+ pipe_ctx->plane_state->update_flags.bits.lut_3d ||
+ pipe_ctx->update_flags.bits.enable))
+ hws->funcs.set_input_transfer_func(dc, pipe_ctx, pipe_ctx->plane_state);
+
+ /* dcn10_translate_regamma_to_hw_format takes 750us to finish
+ * only do gamma programming for powering on, internal memcmp to avoid
+ * updating on slave planes
+ */
+ if (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->update_flags.bits.plane_changed ||
+ pipe_ctx->stream->update_flags.bits.out_tf ||
+ (pipe_ctx->plane_state &&
+ pipe_ctx->plane_state->update_flags.bits.output_tf_change))
+ hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
+
+ /* If the pipe has been enabled or has a different opp, we
+ * should reprogram the fmt. This deals with cases where
+ * interation between mpc and odm combine on different streams
+ * causes a different pipe to be chosen to odm combine with.
+ */
+ if (pipe_ctx->update_flags.bits.enable
+ || pipe_ctx->update_flags.bits.opp_changed) {
+
+ pipe_ctx->stream_res.opp->funcs->opp_set_dyn_expansion(
+ pipe_ctx->stream_res.opp,
+ COLOR_SPACE_YCBCR601,
+ pipe_ctx->stream->timing.display_color_depth,
+ pipe_ctx->stream->signal);
+
+ pipe_ctx->stream_res.opp->funcs->opp_program_fmt(
+ pipe_ctx->stream_res.opp,
+ &pipe_ctx->stream->bit_depth_params,
+ &pipe_ctx->stream->clamping);
+ }
+
+ /* Set ABM pipe after other pipe configurations done */
+ if ((pipe_ctx->plane_state && pipe_ctx->plane_state->visible)) {
+ if (pipe_ctx->stream_res.abm) {
+ dc->hwss.set_pipe(pipe_ctx);
+ pipe_ctx->stream_res.abm->funcs->set_abm_level(pipe_ctx->stream_res.abm,
+ pipe_ctx->stream->abm_level);
+ }
+ }
+
+ if (pipe_ctx->update_flags.bits.test_pattern_changed) {
+ struct output_pixel_processor *odm_opp = pipe_ctx->stream_res.opp;
+ struct bit_depth_reduction_params params;
+
+ memset(&params, 0, sizeof(params));
+ odm_opp->funcs->opp_program_bit_depth_reduction(odm_opp, &params);
+ dc->hwss.set_disp_pattern_generator(dc,
+ pipe_ctx,
+ pipe_ctx->stream_res.test_pattern_params.test_pattern,
+ pipe_ctx->stream_res.test_pattern_params.color_space,
+ pipe_ctx->stream_res.test_pattern_params.color_depth,
+ NULL,
+ pipe_ctx->stream_res.test_pattern_params.width,
+ pipe_ctx->stream_res.test_pattern_params.height,
+ pipe_ctx->stream_res.test_pattern_params.offset);
+ }
+}
+
+void dcn401_program_front_end_for_ctx(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i;
+ unsigned int prev_hubp_count = 0;
+ unsigned int hubp_count = 0;
+ struct dce_hwseq *hws = dc->hwseq;
+ struct pipe_ctx *pipe = NULL;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ if (resource_is_pipe_topology_changed(dc->current_state, context))
+ resource_log_pipe_topology_update(dc, context);
+
+ if (dc->hwss.program_triplebuffer != NULL && dc->debug.enable_tri_buf) {
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (!pipe->top_pipe && !pipe->prev_odm_pipe && pipe->plane_state) {
+ if (pipe->plane_state->triplebuffer_flips)
+ BREAK_TO_DEBUGGER();
+
+ /*turn off triple buffer for full update*/
+ dc->hwss.program_triplebuffer(
+ dc, pipe, pipe->plane_state->triplebuffer_flips);
+ }
+ }
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (dc->current_state->res_ctx.pipe_ctx[i].plane_state)
+ prev_hubp_count++;
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ hubp_count++;
+ }
+
+ if (prev_hubp_count == 0 && hubp_count > 0) {
+ if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, true, false);
+ udelay(500);
+ }
+
+ /* Set pipe update flags and lock pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ dc->hwss.detect_pipe_changes(dc->current_state, context, &dc->current_state->res_ctx.pipe_ctx[i],
+ &context->res_ctx.pipe_ctx[i]);
+
+ /* When disabling phantom pipes, turn on phantom OTG first (so we can get double
+ * buffer updates properly)
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct dc_stream_state *stream = dc->current_state->res_ctx.pipe_ctx[i].stream;
+
+ pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable && stream &&
+ dc_state_get_pipe_subvp_type(dc->current_state, pipe) == SUBVP_PHANTOM) {
+ struct timing_generator *tg = dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg;
+
+ if (tg->funcs->enable_crtc) {
+ if (dc->hwseq->funcs.blank_pixel_data)
+ dc->hwseq->funcs.blank_pixel_data(dc, pipe, true);
+
+ tg->funcs->enable_crtc(tg);
+ }
+ }
+ }
+ /* OTG blank before disabling all front ends */
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
+ && !context->res_ctx.pipe_ctx[i].top_pipe
+ && !context->res_ctx.pipe_ctx[i].prev_odm_pipe
+ && context->res_ctx.pipe_ctx[i].stream)
+ hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
+
+
+ /* Disconnect mpcc */
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable
+ || context->res_ctx.pipe_ctx[i].update_flags.bits.opp_changed) {
+ struct hubbub *hubbub = dc->res_pool->hubbub;
+
+ /* Phantom pipe DET should be 0, but if a pipe in use is being transitioned to phantom
+ * then we want to do the programming here (effectively it's being disabled). If we do
+ * the programming later the DET won't be updated until the OTG for the phantom pipe is
+ * turned on (i.e. in an MCLK switch) which can come in too late and cause issues with
+ * DET allocation.
+ */
+ if ((context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
+ (context->res_ctx.pipe_ctx[i].plane_state &&
+ dc_state_get_pipe_subvp_type(context, &context->res_ctx.pipe_ctx[i]) ==
+ SUBVP_PHANTOM))) {
+ if (hubbub->funcs->program_det_size)
+ hubbub->funcs->program_det_size(hubbub,
+ dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
+ if (dc->res_pool->hubbub->funcs->program_det_segments)
+ dc->res_pool->hubbub->funcs->program_det_segments(
+ hubbub, dc->current_state->res_ctx.pipe_ctx[i].plane_res.hubp->inst, 0);
+ }
+ hws->funcs.plane_atomic_disconnect(dc, dc->current_state,
+ &dc->current_state->res_ctx.pipe_ctx[i]);
+ DC_LOG_DC("Reset mpcc for pipe %d\n", dc->current_state->res_ctx.pipe_ctx[i].pipe_idx);
+ }
+
+ /* update ODM for blanked OTG master pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (resource_is_pipe_type(pipe, OTG_MASTER) &&
+ !resource_is_pipe_type(pipe, DPP_PIPE) &&
+ pipe->update_flags.bits.odm &&
+ hws->funcs.update_odm)
+ hws->funcs.update_odm(dc, context, pipe);
+ }
+
+ /*
+ * Program all updated pipes, order matters for mpcc setup. Start with
+ * top pipe and program all pipes that follow in order
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state && !pipe->top_pipe) {
+ while (pipe) {
+ if (hws->funcs.program_pipe)
+ hws->funcs.program_pipe(dc, pipe, context);
+ else {
+ /* Don't program phantom pipes in the regular front end programming sequence.
+ * There is an MPO transition case where a pipe being used by a video plane is
+ * transitioned directly to be a phantom pipe when closing the MPO video.
+ * However the phantom pipe will program a new HUBP_VTG_SEL (update takes place
+ * right away) but the MPO still exists until the double buffered update of the
+ * main pipe so we will get a frame of underflow if the phantom pipe is
+ * programmed here.
+ */
+ if (pipe->stream &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM)
+ dcn401_program_pipe(dc, pipe, context);
+ }
+
+ pipe = pipe->bottom_pipe;
+ }
+ }
+
+ /* Program secondary blending tree and writeback pipes */
+ pipe = &context->res_ctx.pipe_ctx[i];
+ if (!pipe->top_pipe && !pipe->prev_odm_pipe
+ && pipe->stream && pipe->stream->num_wb_info > 0
+ && (pipe->update_flags.raw || (pipe->plane_state && pipe->plane_state->update_flags.raw)
+ || pipe->stream->update_flags.raw)
+ && hws->funcs.program_all_writeback_pipes_in_tree)
+ hws->funcs.program_all_writeback_pipes_in_tree(dc, pipe->stream, context);
+
+ /* Avoid underflow by check of pipe line read when adding 2nd plane. */
+ if (hws->wa.wait_hubpret_read_start_during_mpo_transition &&
+ !pipe->top_pipe &&
+ pipe->stream &&
+ pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start &&
+ dc->current_state->stream_status[0].plane_count == 1 &&
+ context->stream_status[0].plane_count > 1) {
+ pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp);
+ }
+ }
+}
+
+void dcn401_post_unlock_program_front_end(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ // Timeout for pipe enable
+ unsigned int timeout_us = 100000;
+ unsigned int polling_interval_us = 1;
+ struct dce_hwseq *hwseq = dc->hwseq;
+ int i;
+
+ DC_LOGGER_INIT(dc->ctx->logger);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (resource_is_pipe_type(&dc->current_state->res_ctx.pipe_ctx[i], OPP_HEAD) &&
+ !resource_is_pipe_type(&context->res_ctx.pipe_ctx[i], OPP_HEAD))
+ dc->hwss.post_unlock_reset_opp(dc,
+ &dc->current_state->res_ctx.pipe_ctx[i]);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable)
+ dc->hwss.disable_plane(dc, dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]);
+
+ /*
+ * If we are enabling a pipe, we need to wait for pending clear as this is a critical
+ * part of the enable operation otherwise, DM may request an immediate flip which
+ * will cause HW to perform an "immediate enable" (as opposed to "vsync enable") which
+ * is unsupported on DCN.
+ */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ // Don't check flip pending on phantom pipes
+ if (pipe->plane_state && !pipe->top_pipe && pipe->update_flags.bits.enable &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
+ struct hubp *hubp = pipe->plane_res.hubp;
+ int j = 0;
+
+ for (j = 0; j < timeout_us / polling_interval_us
+ && hubp->funcs->hubp_is_flip_pending(hubp); j++)
+ udelay(polling_interval_us);
+ }
+ }
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ /* When going from a smaller ODM slice count to larger, we must ensure double
+ * buffer update completes before we return to ensure we don't reduce DISPCLK
+ * before we've transitioned to 2:1 or 4:1
+ */
+ if (resource_is_pipe_type(old_pipe, OTG_MASTER) && resource_is_pipe_type(pipe, OTG_MASTER) &&
+ resource_get_odm_slice_count(old_pipe) < resource_get_odm_slice_count(pipe) &&
+ dc_state_get_pipe_subvp_type(context, pipe) != SUBVP_PHANTOM) {
+ int j = 0;
+ struct timing_generator *tg = pipe->stream_res.tg;
+
+ if (tg->funcs->get_optc_double_buffer_pending) {
+ for (j = 0; j < timeout_us / polling_interval_us
+ && tg->funcs->get_optc_double_buffer_pending(tg); j++)
+ udelay(polling_interval_us);
+ }
+ }
+ }
+
+ if (dc->res_pool->hubbub->funcs->force_pstate_change_control)
+ dc->res_pool->hubbub->funcs->force_pstate_change_control(
+ dc->res_pool->hubbub, false, false);
+
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe->plane_state && !pipe->top_pipe) {
+ /* Program phantom pipe here to prevent a frame of underflow in the MPO transition
+ * case (if a pipe being used for a video plane transitions to a phantom pipe, it
+ * can underflow due to HUBP_VTG_SEL programming if done in the regular front end
+ * programming sequence).
+ */
+ while (pipe) {
+ if (pipe->stream && dc_state_get_pipe_subvp_type(context, pipe) == SUBVP_PHANTOM) {
+ /* When turning on the phantom pipe we want to run through the
+ * entire enable sequence, so apply all the "enable" flags.
+ */
+ if (dc->hwss.apply_update_flags_for_phantom)
+ dc->hwss.apply_update_flags_for_phantom(pipe);
+ if (dc->hwss.update_phantom_vp_position)
+ dc->hwss.update_phantom_vp_position(dc, context, pipe);
+ dcn401_program_pipe(dc, pipe, context);
+ }
+ pipe = pipe->bottom_pipe;
+ }
+ }
+ }
+
+ if (!hwseq)
+ return;
+
+ /* P-State support transitions:
+ * Natural -> FPO: P-State disabled in prepare, force disallow anytime is safe
+ * FPO -> Natural: Unforce anytime after FW disable is safe (P-State will assert naturally)
+ * Unsupported -> FPO: P-State enabled in optimize, force disallow anytime is safe
+ * FPO -> Unsupported: P-State disabled in prepare, unforce disallow anytime is safe
+ * FPO <-> SubVP: Force disallow is maintained on the FPO / SubVP pipes
+ */
+ if (hwseq->funcs.update_force_pstate)
+ dc->hwseq->funcs.update_force_pstate(dc, context);
+
+ /* Only program the MALL registers after all the main and phantom pipes
+ * are done programming.
+ */
+ if (hwseq->funcs.program_mall_pipe_config)
+ hwseq->funcs.program_mall_pipe_config(dc, context);
+
+ /* WA to apply WM setting*/
+ if (hwseq->wa.DEGVIDCN21)
+ dc->res_pool->hubbub->funcs->apply_DEDCN21_147_wa(dc->res_pool->hubbub);
+
+
+ /* WA for stutter underflow during MPO transitions when adding 2nd plane */
+ if (hwseq->wa.disallow_self_refresh_during_multi_plane_transition) {
+
+ if (dc->current_state->stream_status[0].plane_count == 1 &&
+ context->stream_status[0].plane_count > 1) {
+
+ struct timing_generator *tg = dc->res_pool->timing_generators[0];
+
+ dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, false);
+
+ hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied = true;
+ hwseq->wa_state.disallow_self_refresh_during_multi_plane_transition_applied_on_frame =
+ tg->funcs->get_frame_count(tg);
+ }
+ }
+}
+
+bool dcn401_update_bandwidth(
+ struct dc *dc,
+ struct dc_state *context)
+{
+ int i;
+ struct dce_hwseq *hws = dc->hwseq;
+
+ /* recalculate DML parameters */
+ if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false))
+ return false;
+
+ /* apply updated bandwidth parameters */
+ dc->hwss.prepare_bandwidth(dc, context);
+
+ /* update hubp configs for all pipes */
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->plane_state == NULL)
+ continue;
+
+ if (pipe_ctx->top_pipe == NULL) {
+ bool blank = !is_pipe_tree_visible(pipe_ctx);
+
+ pipe_ctx->stream_res.tg->funcs->program_global_sync(
+ pipe_ctx->stream_res.tg,
+ dcn401_calculate_vready_offset_for_group(pipe_ctx),
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vstartup_lines,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_offset_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.vupdate_vupdate_width_pixels,
+ (unsigned int)pipe_ctx->global_sync.dcn4x.pstate_keepout_start_lines);
+
+ pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false);
+
+ if (pipe_ctx->prev_odm_pipe == NULL)
+ hws->funcs.blank_pixel_data(dc, pipe_ctx, blank);
+
+ if (hws->funcs.setup_vupdate_interrupt)
+ hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
+ }
+
+ if (pipe_ctx->plane_res.hubp->funcs->hubp_setup2)
+ pipe_ctx->plane_res.hubp->funcs->hubp_setup2(
+ pipe_ctx->plane_res.hubp,
+ &pipe_ctx->hubp_regs,
+ &pipe_ctx->global_sync,
+ &pipe_ctx->stream->timing);
+ }
+
+ return true;
+}
+
+void dcn401_detect_pipe_changes(struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe)
+{
+ bool old_is_phantom = dc_state_get_pipe_subvp_type(old_state, old_pipe) == SUBVP_PHANTOM;
+ bool new_is_phantom = dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM;
+
+ unsigned int old_pipe_vready_offset_pixels = old_pipe->global_sync.dcn4x.vready_offset_pixels;
+ unsigned int new_pipe_vready_offset_pixels = new_pipe->global_sync.dcn4x.vready_offset_pixels;
+ unsigned int old_pipe_vstartup_lines = old_pipe->global_sync.dcn4x.vstartup_lines;
+ unsigned int new_pipe_vstartup_lines = new_pipe->global_sync.dcn4x.vstartup_lines;
+ unsigned int old_pipe_vupdate_offset_pixels = old_pipe->global_sync.dcn4x.vupdate_offset_pixels;
+ unsigned int new_pipe_vupdate_offset_pixels = new_pipe->global_sync.dcn4x.vupdate_offset_pixels;
+ unsigned int old_pipe_vupdate_width_pixels = old_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels;
+ unsigned int new_pipe_vupdate_width_pixels = new_pipe->global_sync.dcn4x.vupdate_vupdate_width_pixels;
+
+ new_pipe->update_flags.raw = 0;
+
+ /* If non-phantom pipe is being transitioned to a phantom pipe,
+ * set disable and return immediately. This is because the pipe
+ * that was previously in use must be fully disabled before we
+ * can "enable" it as a phantom pipe (since the OTG will certainly
+ * be different). The post_unlock sequence will set the correct
+ * update flags to enable the phantom pipe.
+ */
+ if (old_pipe->plane_state && !old_is_phantom &&
+ new_pipe->plane_state && new_is_phantom) {
+ new_pipe->update_flags.bits.disable = 1;
+ return;
+ }
+
+ if (resource_is_pipe_type(new_pipe, OTG_MASTER) &&
+ resource_is_odm_topology_changed(new_pipe, old_pipe))
+ /* Detect odm changes */
+ new_pipe->update_flags.bits.odm = 1;
+
+ /* Exit on unchanged, unused pipe */
+ if (!old_pipe->plane_state && !new_pipe->plane_state)
+ return;
+ /* Detect pipe enable/disable */
+ if (!old_pipe->plane_state && new_pipe->plane_state) {
+ new_pipe->update_flags.bits.enable = 1;
+ new_pipe->update_flags.bits.mpcc = 1;
+ new_pipe->update_flags.bits.dppclk = 1;
+ new_pipe->update_flags.bits.hubp_interdependent = 1;
+ new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+ new_pipe->update_flags.bits.unbounded_req = 1;
+ new_pipe->update_flags.bits.gamut_remap = 1;
+ new_pipe->update_flags.bits.scaler = 1;
+ new_pipe->update_flags.bits.viewport = 1;
+ new_pipe->update_flags.bits.det_size = 1;
+ if (new_pipe->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE &&
+ new_pipe->stream_res.test_pattern_params.width != 0 &&
+ new_pipe->stream_res.test_pattern_params.height != 0)
+ new_pipe->update_flags.bits.test_pattern_changed = 1;
+ if (!new_pipe->top_pipe && !new_pipe->prev_odm_pipe) {
+ new_pipe->update_flags.bits.odm = 1;
+ new_pipe->update_flags.bits.global_sync = 1;
+ }
+ return;
+ }
+
+ /* For SubVP we need to unconditionally enable because any phantom pipes are
+ * always removed then newly added for every full updates whenever SubVP is in use.
+ * The remove-add sequence of the phantom pipe always results in the pipe
+ * being blanked in enable_stream_timing (DPG).
+ */
+ if (new_pipe->stream && dc_state_get_pipe_subvp_type(new_state, new_pipe) == SUBVP_PHANTOM)
+ new_pipe->update_flags.bits.enable = 1;
+
+ /* Phantom pipes are effectively disabled, if the pipe was previously phantom
+ * we have to enable
+ */
+ if (old_pipe->plane_state && old_is_phantom &&
+ new_pipe->plane_state && !new_is_phantom)
+ new_pipe->update_flags.bits.enable = 1;
+
+ if (old_pipe->plane_state && !new_pipe->plane_state) {
+ new_pipe->update_flags.bits.disable = 1;
+ return;
+ }
+
+ /* Detect plane change */
+ if (old_pipe->plane_state != new_pipe->plane_state)
+ new_pipe->update_flags.bits.plane_changed = true;
+
+ /* Detect top pipe only changes */
+ if (resource_is_pipe_type(new_pipe, OTG_MASTER)) {
+ /* Detect global sync changes */
+ if ((old_pipe_vready_offset_pixels != new_pipe_vready_offset_pixels)
+ || (old_pipe_vstartup_lines != new_pipe_vstartup_lines)
+ || (old_pipe_vupdate_offset_pixels != new_pipe_vupdate_offset_pixels)
+ || (old_pipe_vupdate_width_pixels != new_pipe_vupdate_width_pixels))
+ new_pipe->update_flags.bits.global_sync = 1;
+ }
+
+ if (old_pipe->det_buffer_size_kb != new_pipe->det_buffer_size_kb)
+ new_pipe->update_flags.bits.det_size = 1;
+
+ /*
+ * Detect opp / tg change, only set on change, not on enable
+ * Assume mpcc inst = pipe index, if not this code needs to be updated
+ * since mpcc is what is affected by these. In fact all of our sequence
+ * makes this assumption at the moment with how hubp reset is matched to
+ * same index mpcc reset.
+ */
+ if (old_pipe->stream_res.opp != new_pipe->stream_res.opp)
+ new_pipe->update_flags.bits.opp_changed = 1;
+ if (old_pipe->stream_res.tg != new_pipe->stream_res.tg)
+ new_pipe->update_flags.bits.tg_changed = 1;
+
+ /*
+ * Detect mpcc blending changes, only dpp inst and opp matter here,
+ * mpccs getting removed/inserted update connected ones during their own
+ * programming
+ */
+ if (old_pipe->plane_res.dpp != new_pipe->plane_res.dpp
+ || old_pipe->stream_res.opp != new_pipe->stream_res.opp)
+ new_pipe->update_flags.bits.mpcc = 1;
+
+ /* Detect dppclk change */
+ if (old_pipe->plane_res.bw.dppclk_khz != new_pipe->plane_res.bw.dppclk_khz)
+ new_pipe->update_flags.bits.dppclk = 1;
+
+ /* Check for scl update */
+ if (memcmp(&old_pipe->plane_res.scl_data, &new_pipe->plane_res.scl_data, sizeof(struct scaler_data)))
+ new_pipe->update_flags.bits.scaler = 1;
+ /* Check for vp update */
+ if (memcmp(&old_pipe->plane_res.scl_data.viewport, &new_pipe->plane_res.scl_data.viewport, sizeof(struct rect))
+ || memcmp(&old_pipe->plane_res.scl_data.viewport_c,
+ &new_pipe->plane_res.scl_data.viewport_c, sizeof(struct rect)))
+ new_pipe->update_flags.bits.viewport = 1;
+
+ /* Detect dlg/ttu/rq updates */
+ {
+ struct dml2_display_dlg_regs old_dlg_regs = old_pipe->hubp_regs.dlg_regs;
+ struct dml2_display_ttu_regs old_ttu_regs = old_pipe->hubp_regs.ttu_regs;
+ struct dml2_display_rq_regs old_rq_regs = old_pipe->hubp_regs.rq_regs;
+ struct dml2_display_dlg_regs *new_dlg_regs = &new_pipe->hubp_regs.dlg_regs;
+ struct dml2_display_ttu_regs *new_ttu_regs = &new_pipe->hubp_regs.ttu_regs;
+ struct dml2_display_rq_regs *new_rq_regs = &new_pipe->hubp_regs.rq_regs;
+
+ /* Detect pipe interdependent updates */
+ if ((old_dlg_regs.dst_y_prefetch != new_dlg_regs->dst_y_prefetch)
+ || (old_dlg_regs.vratio_prefetch != new_dlg_regs->vratio_prefetch)
+ || (old_dlg_regs.vratio_prefetch_c != new_dlg_regs->vratio_prefetch_c)
+ || (old_dlg_regs.dst_y_per_vm_vblank != new_dlg_regs->dst_y_per_vm_vblank)
+ || (old_dlg_regs.dst_y_per_row_vblank != new_dlg_regs->dst_y_per_row_vblank)
+ || (old_dlg_regs.dst_y_per_vm_flip != new_dlg_regs->dst_y_per_vm_flip)
+ || (old_dlg_regs.dst_y_per_row_flip != new_dlg_regs->dst_y_per_row_flip)
+ || (old_dlg_regs.refcyc_per_meta_chunk_vblank_l != new_dlg_regs->refcyc_per_meta_chunk_vblank_l)
+ || (old_dlg_regs.refcyc_per_meta_chunk_vblank_c != new_dlg_regs->refcyc_per_meta_chunk_vblank_c)
+ || (old_dlg_regs.refcyc_per_meta_chunk_flip_l != new_dlg_regs->refcyc_per_meta_chunk_flip_l)
+ || (old_dlg_regs.refcyc_per_line_delivery_pre_l != new_dlg_regs->refcyc_per_line_delivery_pre_l)
+ || (old_dlg_regs.refcyc_per_line_delivery_pre_c != new_dlg_regs->refcyc_per_line_delivery_pre_c)
+ || (old_ttu_regs.refcyc_per_req_delivery_pre_l != new_ttu_regs->refcyc_per_req_delivery_pre_l)
+ || (old_ttu_regs.refcyc_per_req_delivery_pre_c != new_ttu_regs->refcyc_per_req_delivery_pre_c)
+ || (old_ttu_regs.refcyc_per_req_delivery_pre_cur0 !=
+ new_ttu_regs->refcyc_per_req_delivery_pre_cur0)
+ || (old_ttu_regs.min_ttu_vblank != new_ttu_regs->min_ttu_vblank)
+ || (old_ttu_regs.qos_level_flip != new_ttu_regs->qos_level_flip)) {
+ old_dlg_regs.dst_y_prefetch = new_dlg_regs->dst_y_prefetch;
+ old_dlg_regs.vratio_prefetch = new_dlg_regs->vratio_prefetch;
+ old_dlg_regs.vratio_prefetch_c = new_dlg_regs->vratio_prefetch_c;
+ old_dlg_regs.dst_y_per_vm_vblank = new_dlg_regs->dst_y_per_vm_vblank;
+ old_dlg_regs.dst_y_per_row_vblank = new_dlg_regs->dst_y_per_row_vblank;
+ old_dlg_regs.dst_y_per_vm_flip = new_dlg_regs->dst_y_per_vm_flip;
+ old_dlg_regs.dst_y_per_row_flip = new_dlg_regs->dst_y_per_row_flip;
+ old_dlg_regs.refcyc_per_meta_chunk_vblank_l = new_dlg_regs->refcyc_per_meta_chunk_vblank_l;
+ old_dlg_regs.refcyc_per_meta_chunk_vblank_c = new_dlg_regs->refcyc_per_meta_chunk_vblank_c;
+ old_dlg_regs.refcyc_per_meta_chunk_flip_l = new_dlg_regs->refcyc_per_meta_chunk_flip_l;
+ old_dlg_regs.refcyc_per_line_delivery_pre_l = new_dlg_regs->refcyc_per_line_delivery_pre_l;
+ old_dlg_regs.refcyc_per_line_delivery_pre_c = new_dlg_regs->refcyc_per_line_delivery_pre_c;
+ old_ttu_regs.refcyc_per_req_delivery_pre_l = new_ttu_regs->refcyc_per_req_delivery_pre_l;
+ old_ttu_regs.refcyc_per_req_delivery_pre_c = new_ttu_regs->refcyc_per_req_delivery_pre_c;
+ old_ttu_regs.refcyc_per_req_delivery_pre_cur0 = new_ttu_regs->refcyc_per_req_delivery_pre_cur0;
+ old_ttu_regs.min_ttu_vblank = new_ttu_regs->min_ttu_vblank;
+ old_ttu_regs.qos_level_flip = new_ttu_regs->qos_level_flip;
+ new_pipe->update_flags.bits.hubp_interdependent = 1;
+ }
+ /* Detect any other updates to ttu/rq/dlg */
+ if (memcmp(&old_dlg_regs, new_dlg_regs, sizeof(old_dlg_regs)) ||
+ memcmp(&old_ttu_regs, new_ttu_regs, sizeof(old_ttu_regs)) ||
+ memcmp(&old_rq_regs, new_rq_regs, sizeof(old_rq_regs)))
+ new_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+ }
+
+ if (old_pipe->unbounded_req != new_pipe->unbounded_req)
+ new_pipe->update_flags.bits.unbounded_req = 1;
+
+ if (memcmp(&old_pipe->stream_res.test_pattern_params,
+ &new_pipe->stream_res.test_pattern_params, sizeof(struct test_pattern_params))) {
+ new_pipe->update_flags.bits.test_pattern_changed = 1;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
index 28a513dfc005..17cea748789e 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h
@@ -63,8 +63,6 @@ void dcn401_set_cursor_position(struct pipe_ctx *pipe_ctx);
bool dcn401_apply_idle_power_optimizations(struct dc *dc, bool enable);
-struct ips_ono_region_state dcn401_read_ono_state(struct dc *dc,
- uint8_t region);
void dcn401_wait_for_dcc_meta_propagation(const struct dc *dc,
const struct pipe_ctx *top_pipe_to_program);
@@ -96,5 +94,12 @@ void dcn401_reset_hw_ctx_wrap(
struct dc *dc,
struct dc_state *context);
void dcn401_perform_3dlut_wa_unlock(struct pipe_ctx *pipe_ctx);
-
+void dcn401_program_front_end_for_ctx(struct dc *dc, struct dc_state *context);
+void dcn401_post_unlock_program_front_end(struct dc *dc, struct dc_state *context);
+bool dcn401_update_bandwidth(struct dc *dc, struct dc_state *context);
+void dcn401_detect_pipe_changes(
+ struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe);
#endif /* __DC_HWSS_DCN401_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
index 23e4f208152e..44cb376f97c1 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c
@@ -17,9 +17,9 @@ static const struct hw_sequencer_funcs dcn401_funcs = {
.init_hw = dcn401_init_hw,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
- .program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
+ .program_front_end_for_ctx = dcn401_program_front_end_for_ctx,
.wait_for_pending_cleared = dcn10_wait_for_pending_cleared,
- .post_unlock_program_front_end = dcn20_post_unlock_program_front_end,
+ .post_unlock_program_front_end = dcn401_post_unlock_program_front_end,
.update_plane_addr = dcn20_update_plane_addr,
.update_dchub = dcn10_update_dchub,
.update_pending_status = dcn10_update_pending_status,
@@ -42,7 +42,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = {
.cursor_lock = dcn10_cursor_lock,
.prepare_bandwidth = dcn401_prepare_bandwidth,
.optimize_bandwidth = dcn401_optimize_bandwidth,
- .update_bandwidth = dcn20_update_bandwidth,
+ .update_bandwidth = dcn401_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
.set_static_screen_control = dcn31_set_static_screen_control,
@@ -66,7 +66,6 @@ static const struct hw_sequencer_funcs dcn401_funcs = {
.enable_writeback = dcn30_enable_writeback,
.disable_writeback = dcn30_disable_writeback,
.update_writeback = dcn30_update_writeback,
- .mmhubbub_warmup = dcn30_mmhubbub_warmup,
.dmdata_status_done = dcn20_dmdata_status_done,
.program_dmdata_engine = dcn30_program_dmdata_engine,
.set_dmdata_attributes = dcn20_set_dmdata_attributes,
@@ -100,6 +99,10 @@ static const struct hw_sequencer_funcs dcn401_funcs = {
.fams2_global_control_lock_fast = dcn401_fams2_global_control_lock_fast,
.program_outstanding_updates = dcn401_program_outstanding_updates,
.wait_for_all_pending_updates = dcn30_wait_for_all_pending_updates,
+ .detect_pipe_changes = dcn401_detect_pipe_changes,
+ .enable_plane = dcn20_enable_plane,
+ .update_dchubp_dpp = dcn20_update_dchubp_dpp,
+ .post_unlock_reset_opp = dcn20_post_unlock_reset_opp,
};
static const struct hwseq_private_funcs dcn401_private_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
index 66fdc5805d0a..a7d66cfd93c9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h
@@ -194,7 +194,6 @@ enum block_sequence_func {
DMUB_SUBVP_SAVE_SURF_ADDR,
HUBP_WAIT_FOR_DCC_META_PROP,
DMUB_FAMS2_GLOBAL_CONTROL_LOCK_FAST,
-
};
struct block_sequence {
@@ -331,10 +330,6 @@ struct hw_sequencer_funcs {
void (*disable_writeback)(struct dc *dc,
unsigned int dwb_pipe_inst);
- bool (*mmhubbub_warmup)(struct dc *dc,
- unsigned int num_dwb,
- struct dc_writeback_info *wb_info);
-
/* Clock Related */
enum dc_status (*set_clock)(struct dc *dc,
enum dc_clock_type clock_type,
@@ -462,6 +457,18 @@ struct hw_sequencer_funcs {
struct dc_state *context);
void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable);
void (*wait_for_all_pending_updates)(const struct pipe_ctx *pipe_ctx);
+ void (*detect_pipe_changes)(struct dc_state *old_state,
+ struct dc_state *new_state,
+ struct pipe_ctx *old_pipe,
+ struct pipe_ctx *new_pipe);
+ void (*enable_plane)(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+ void (*update_dchubp_dpp)(struct dc *dc,
+ struct pipe_ctx *pipe_ctx,
+ struct dc_state *context);
+ void (*post_unlock_reset_opp)(struct dc *dc,
+ struct pipe_ctx *opp_head);
};
void color_space_to_black_color(
@@ -489,11 +496,12 @@ void get_hdr_visual_confirm_color(
void get_mpctree_visual_confirm_color(
struct pipe_ctx *pipe_ctx,
struct tg_color *color);
-
+void get_vabc_visual_confirm_color(
+ struct pipe_ctx *pipe_ctx,
+ struct tg_color *color);
void get_subvp_visual_confirm_color(
struct pipe_ctx *pipe_ctx,
struct tg_color *color);
-
void get_fams2_visual_confirm_color(
struct dc *dc,
struct dc_state *context,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 8597e866bfe6..d558efc6e12f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -45,9 +45,6 @@
#define MAX_SVP_PHANTOM_STREAMS 2
#define MAX_SVP_PHANTOM_PLANES 2
-void enable_surface_flip_reporting(struct dc_plane_state *plane_state,
- uint32_t controller_id);
-
#include "grph_object_id.h"
#include "link_encoder.h"
#include "stream_encoder.h"
@@ -219,6 +216,8 @@ struct resource_funcs {
* Get indicator of power from a context that went through full validation
*/
int (*get_power_profile)(const struct dc_state *context);
+ unsigned int (*get_det_buffer_size)(const struct dc_state *context);
+ unsigned int (*get_vstartup_for_pipe)(struct pipe_ctx *pipe_ctx);
};
struct audio_support{
@@ -467,6 +466,7 @@ struct pipe_ctx {
unsigned int surface_size_in_mall_bytes;
struct dml2_dchub_per_pipe_register_set hubp_regs;
struct dml2_hubp_pipe_mcache_regs mcache_regs;
+ union dml2_global_sync_programming global_sync;
struct dwbc *dwbc;
struct mcif_wb *mcif_wb;
@@ -477,6 +477,8 @@ struct pipe_ctx {
/* subvp_index: only valid if the pipe is a SUBVP_MAIN*/
uint8_t subvp_index;
struct pixel_rate_divider pixel_rate_divider;
+ /* pixels borrowed from hblank to hactive */
+ uint8_t hblank_borrow;
};
/* Data used for dynamic link encoder assignment.
@@ -539,7 +541,8 @@ struct dcn_bw_output {
bool legacy_svp_drr_stream_index_valid;
struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES];
struct dmub_cmd_fams2_global_config fams2_global_config;
- struct dmub_fams2_stream_static_state fams2_stream_params[DML2_MAX_PLANES];
+ union dmub_cmd_fams2_config fams2_stream_base_params[DML2_MAX_PLANES];
+ union dmub_cmd_fams2_config fams2_stream_sub_params[DML2_MAX_PLANES];
struct dml2_display_arb_regs arb_regs;
};
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
index 55529c5f471c..d19a595c2be4 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
@@ -624,10 +624,6 @@ bool dcn_validate_bandwidth(
struct dc_state *context,
bool fast_validate);
-unsigned int dcn_find_dcfclk_suits_all(
- const struct dc *dc,
- struct dc_clocks *clocks);
-
void dcn_get_soc_clks(
struct dc *dc,
int *min_fclk_khz,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index 2d06067ff36d..c14d64687a3d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -306,6 +306,9 @@ struct clk_mgr_funcs {
*/
void (*set_hard_min_memclk)(struct clk_mgr *clk_mgr, bool current_mode);
+ int (*get_hard_min_memclk)(struct clk_mgr *clk_mgr);
+ int (*get_hard_min_fclk)(struct clk_mgr *clk_mgr);
+
/* Send message to PMFW to set hard max memclk frequency to highest DPM */
void (*set_hard_max_memclk)(struct clk_mgr *clk_mgr);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
index c2dd061892f4..7a1ca1e98059 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
@@ -166,6 +166,41 @@ enum dentist_divider_range {
CLK_SR_DCN32(CLK1_CLK4_CURRENT_CNT), \
CLK_SR_DCN32(CLK4_CLK0_CURRENT_CNT)
+#define CLK_REG_LIST_DCN35() \
+ CLK_SR_DCN35(CLK1_CLK_PLL_REQ), \
+ CLK_SR_DCN35(CLK1_CLK0_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK1_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK2_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK3_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK4_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK5_DFS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK0_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK1_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK2_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK3_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK4_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK5_CURRENT_CNT), \
+ CLK_SR_DCN35(CLK1_CLK0_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK1_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK2_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK3_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK4_BYPASS_CNTL),\
+ CLK_SR_DCN35(CLK1_CLK5_BYPASS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK0_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK1_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK2_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK3_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK4_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK5_DS_CNTL), \
+ CLK_SR_DCN35(CLK1_CLK0_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK1_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK2_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK3_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \
+ CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \
+ CLK_SR_DCN35(CLK5_spll_field_8), \
+ SR(DENTIST_DISPCLK_CNTL), \
+
#define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \
CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh),\
CLK_SF(CLK1_CLK_PLL_REQ, FbMult_int, mask_sh),\
@@ -236,6 +271,7 @@ struct clk_mgr_registers {
uint32_t CLK1_CLK2_DFS_CNTL;
uint32_t CLK1_CLK3_DFS_CNTL;
uint32_t CLK1_CLK4_DFS_CNTL;
+ uint32_t CLK1_CLK5_DFS_CNTL;
uint32_t CLK2_CLK2_DFS_CNTL;
uint32_t CLK1_CLK0_CURRENT_CNT;
@@ -243,11 +279,34 @@ struct clk_mgr_registers {
uint32_t CLK1_CLK2_CURRENT_CNT;
uint32_t CLK1_CLK3_CURRENT_CNT;
uint32_t CLK1_CLK4_CURRENT_CNT;
+ uint32_t CLK1_CLK5_CURRENT_CNT;
uint32_t CLK0_CLK0_DFS_CNTL;
uint32_t CLK0_CLK1_DFS_CNTL;
uint32_t CLK0_CLK3_DFS_CNTL;
uint32_t CLK0_CLK4_DFS_CNTL;
+ uint32_t CLK1_CLK0_BYPASS_CNTL;
+ uint32_t CLK1_CLK1_BYPASS_CNTL;
+ uint32_t CLK1_CLK2_BYPASS_CNTL;
+ uint32_t CLK1_CLK3_BYPASS_CNTL;
+ uint32_t CLK1_CLK4_BYPASS_CNTL;
+ uint32_t CLK1_CLK5_BYPASS_CNTL;
+
+ uint32_t CLK1_CLK0_DS_CNTL;
+ uint32_t CLK1_CLK1_DS_CNTL;
+ uint32_t CLK1_CLK2_DS_CNTL;
+ uint32_t CLK1_CLK3_DS_CNTL;
+ uint32_t CLK1_CLK4_DS_CNTL;
+ uint32_t CLK1_CLK5_DS_CNTL;
+
+ uint32_t CLK1_CLK0_ALLOW_DS;
+ uint32_t CLK1_CLK1_ALLOW_DS;
+ uint32_t CLK1_CLK2_ALLOW_DS;
+ uint32_t CLK1_CLK3_ALLOW_DS;
+ uint32_t CLK1_CLK4_ALLOW_DS;
+ uint32_t CLK1_CLK5_ALLOW_DS;
+ uint32_t CLK5_spll_field_8;
+
};
struct clk_mgr_shift {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 16580d624278..2a530a4a39f7 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -42,6 +42,7 @@
#include "cursor_reg_cache.h"
#include "dml2/dml21/inc/dml_top_dchub_registers.h"
+#include "dml2/dml21/inc/dml_top_types.h"
#define OPP_ID_INVALID 0xf
#define MAX_TTU 0xffffff
@@ -144,11 +145,21 @@ struct hubp_funcs {
struct _vcs_dpi_display_rq_regs_st *rq_regs,
struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest);
+ void (*hubp_setup2)(
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *pipe_regs,
+ union dml2_global_sync_programming *pipe_global_sync,
+ struct dc_crtc_timing *timing);
+
void (*hubp_setup_interdependent)(
struct hubp *hubp,
struct _vcs_dpi_display_dlg_regs_st *dlg_regs,
struct _vcs_dpi_display_ttu_regs_st *ttu_regs);
+ void (*hubp_setup_interdependent2)(
+ struct hubp *hubp,
+ struct dml2_dchub_per_pipe_register_set *pipe_regs);
+
void (*dcc_control)(struct hubp *hubp, bool enable,
enum hubp_ind_block_size blk_size);
@@ -165,7 +176,7 @@ struct hubp_funcs {
void (*hubp_program_pte_vm)(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation);
void (*hubp_set_vm_system_aperture_settings)(
@@ -179,7 +190,7 @@ struct hubp_funcs {
void (*hubp_program_surface_config)(
struct hubp *hubp,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -275,6 +286,7 @@ struct hubp_funcs {
enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b,
enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r);
int (*hubp_get_3dlut_fl_done)(struct hubp *hubp);
+ void (*hubp_clear_tiling)(struct hubp *hubp);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index af9183f5d69b..08c16ba52a51 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -168,6 +168,14 @@ struct link_encoder_funcs {
struct link_encoder *enc,
enum encoder_type_select sel,
uint32_t hpo_inst);
+ void (*enable_dpia_output)(struct link_encoder *enc,
+ const struct dc_link_settings *link_settings,
+ uint8_t dpia_id,
+ uint8_t digmode,
+ uint8_t fec_rdy);
+ void (*disable_dpia_output)(struct link_encoder *link_enc,
+ uint8_t dpia_id,
+ uint8_t digmode);
};
/*
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
index a8b44f398ce6..42fbc70f7056 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
@@ -150,7 +150,7 @@ struct mem_input_funcs {
void (*mem_input_program_pte_vm)(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
enum dc_rotation_angle rotation);
void (*mem_input_set_vm_system_aperture_settings)(
@@ -164,7 +164,7 @@ struct mem_input_funcs {
void (*mem_input_program_surface_config)(
struct mem_input *mem_input,
enum surface_pixel_format format,
- union dc_tiling_info *tiling_info,
+ struct dc_tiling_info *tiling_info,
struct plane_size *plane_size,
enum dc_rotation_angle rotation,
struct dc_plane_dcc_param *dcc,
@@ -187,6 +187,8 @@ struct mem_input_funcs {
const struct dc_cursor_position *pos,
const struct dc_cursor_mi_param *param);
+ void (*mem_input_clear_tiling)(
+ struct mem_input *mem_input);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
index 03cbcbb36f1c..6fdc9809280c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/optc.h
@@ -210,7 +210,7 @@ void optc1_enable_crtc_reset(struct timing_generator *optc,
bool optc1_configure_crc(struct timing_generator *optc, const struct crc_params *params);
-bool optc1_get_crc(struct timing_generator *optc,
+bool optc1_get_crc(struct timing_generator *optc, uint8_t idx,
uint32_t *r_cr,
uint32_t *g_y,
uint32_t *b_cb);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index b74e18cc1e66..9885cb3c310f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -141,6 +141,9 @@ struct crc_params {
bool continuous_mode;
bool enable;
+
+ uint8_t crc_eng_inst;
+ bool reset;
};
/**
@@ -291,7 +294,7 @@ struct timing_generator_funcs {
* @get_crc: Get CRCs for the given timing generator. Return false if
* CRCs are not enabled (via configure_crc).
*/
- bool (*get_crc)(struct timing_generator *tg,
+ bool (*get_crc)(struct timing_generator *tg, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
void (*program_manual_trigger)(struct timing_generator *optc);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h
index f04292086c08..fd1f9d3db039 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link.h
@@ -148,6 +148,10 @@ struct link_service {
const struct dc_stream_state *stream,
const unsigned int num_streams);
+ uint32_t (*dp_required_hblank_size_bytes)(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params);
+
/*************************** DPMS *************************************/
void (*set_dpms_on)(struct dc_state *state, struct pipe_ctx *pipe_ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
index 4fb9cd6708d5..1d61d475d36f 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
@@ -30,8 +30,8 @@
#include "../dce110/irq_service_dce110.h"
#include "irq_service_dcn201.h"
-#include "dcn/dcn_2_0_3_offset.h"
-#include "dcn/dcn_2_0_3_sh_mask.h"
+#include "dcn/dcn_2_0_1_offset.h"
+#include "dcn/dcn_2_0_1_sh_mask.h"
#include "cyan_skillfish_ip_offset.h"
#include "soc15_hw_ip.h"
diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
index ff8fe1a94965..96febabf464a 100644
--- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
+++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c
@@ -251,7 +251,7 @@ static void dp_test_send_phy_test_pattern(struct dc_link *link)
link_training_settings.lttpr_mode = dp_decide_lttpr_mode(link, &link->cur_link_settings);
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
link_training_settings.lttpr_mode == LTTPR_MODE_TRANSPARENT)
dp_fixed_vs_pe_read_lane_adjust(
link,
@@ -646,7 +646,7 @@ bool dp_set_test_pattern(
if (IS_DP_PHY_PATTERN(test_pattern)) {
/* Set DPCD Lane Settings before running test pattern */
if (p_link_settings != NULL) {
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
p_link_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
dp_fixed_vs_pe_set_retimer_lane_settings(
link,
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c
index 3e47a6735912..06faa461067b 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio.c
@@ -164,7 +164,9 @@ void disable_dio_link_output(struct dc_link *link,
{
struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
- link_enc->funcs->disable_output(link_enc, signal);
+ if (link_enc != NULL)
+ link_enc->funcs->disable_output(link_enc, signal);
+
link->dc->link_srv->dp_trace_source_sequence(link,
DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
}
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c
index 348ea4cb832d..a6d1d7641ab4 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dio_fixed_vs_pe_retimer.c
@@ -187,7 +187,7 @@ static const struct link_hwss dio_fixed_vs_pe_retimer_link_hwss = {
bool requires_fixed_vs_pe_retimer_dio_link_hwss(const struct dc_link *link)
{
- return (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN);
+ return ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN);
}
const struct link_hwss *get_dio_fixed_vs_pe_retimer_link_hwss(void)
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
index 6499807af72a..36adf95744fe 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.c
@@ -77,17 +77,74 @@ static void set_dio_dpia_lane_settings(struct dc_link *link,
{
}
+static void enable_dpia_link_output(struct dc_link *link,
+ const struct link_resource *link_res,
+ enum signal_type signal,
+ enum clock_source_id clock_source,
+ const struct dc_link_settings *link_settings)
+{
+ struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+
+ if (link_enc != NULL) {
+ if (link->dc->config.enable_dpia_pre_training && link_enc->funcs->enable_dpia_output) {
+ uint8_t fec_rdy = link->dc->link_srv->dp_should_enable_fec(link);
+ uint8_t digmode = dc_is_dp_sst_signal(signal) ? DIG_SST_MODE : DIG_MST_MODE;
+
+ link_enc->funcs->enable_dpia_output(
+ link_enc,
+ link_settings,
+ link->ddc_hw_inst,
+ digmode,
+ fec_rdy);
+ } else {
+ if (dc_is_dp_sst_signal(signal))
+ link_enc->funcs->enable_dp_output(
+ link_enc,
+ link_settings,
+ clock_source);
+ else
+ link_enc->funcs->enable_dp_mst_output(
+ link_enc,
+ link_settings,
+ clock_source);
+ }
+
+ }
+
+ link->dc->link_srv->dp_trace_source_sequence(link,
+ DPCD_SOURCE_SEQ_AFTER_ENABLE_LINK_PHY);
+}
+
+static void disable_dpia_link_output(struct dc_link *link,
+ const struct link_resource *link_res,
+ enum signal_type signal)
+{
+ struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link);
+
+ if (link_enc != NULL) {
+ if (link->dc->config.enable_dpia_pre_training && link_enc->funcs->disable_dpia_output) {
+ uint8_t digmode = dc_is_dp_sst_signal(signal) ? DIG_SST_MODE : DIG_MST_MODE;
+
+ link_enc->funcs->disable_dpia_output(link_enc, link->ddc_hw_inst, digmode);
+ } else
+ link_enc->funcs->disable_output(link_enc, signal);
+ }
+
+ link->dc->link_srv->dp_trace_source_sequence(link,
+ DPCD_SOURCE_SEQ_AFTER_DISABLE_LINK_PHY);
+}
+
static const struct link_hwss dpia_link_hwss = {
.setup_stream_encoder = setup_dio_stream_encoder,
.reset_stream_encoder = reset_dio_stream_encoder,
.setup_stream_attribute = setup_dio_stream_attribute,
- .disable_link_output = disable_dio_link_output,
+ .disable_link_output = disable_dpia_link_output,
.setup_audio_output = setup_dio_audio_output,
.enable_audio_packet = enable_dio_audio_packet,
.disable_audio_packet = disable_dio_audio_packet,
.ext = {
.set_throttled_vcp_size = set_dio_throttled_vcp_size,
- .enable_dp_link_output = enable_dio_dp_link_output,
+ .enable_dp_link_output = enable_dpia_link_output,
.set_dp_link_test_pattern = set_dio_dpia_link_test_pattern,
.set_dp_lane_settings = set_dio_dpia_lane_settings,
.update_stream_allocation_table = update_dpia_stream_allocation_table,
diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h
index ad16ec5d9bb7..259e0f4775e1 100644
--- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h
+++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_dpia.h
@@ -27,6 +27,9 @@
#include "link_hwss.h"
+#define DIG_SST_MODE 0
+#define DIG_MST_MODE 5
+
const struct link_hwss *get_dpia_link_hwss(void);
bool can_use_dpia_link_hwss(const struct dc_link *link,
const struct link_resource *link_res);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index e026c728042a..550e1a098fa2 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -829,7 +829,8 @@ static bool should_verify_link_capability_destructively(struct dc_link *link,
if (link->dc->debug.skip_detection_link_training ||
dc_is_embedded_signal(link->local_sink->sink_signal) ||
- link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+ (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ !link->dc->config.enable_dpia_pre_training)) {
destrictive = false;
} else if (link_dp_get_encoding_format(&max_link_cap) ==
DP_8b_10b_ENCODING) {
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 41cab9ad6885..ec7de9c01fab 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -772,6 +772,20 @@ static bool dp_set_dsc_on_rx(struct pipe_ctx *pipe_ctx, bool enable)
return result;
}
+static bool dp_set_hblank_reduction_on_rx(struct pipe_ctx *pipe_ctx)
+{
+ struct dc *dc = pipe_ctx->stream->ctx->dc;
+ struct dc_stream_state *stream = pipe_ctx->stream;
+ bool result = false;
+
+ if (dc_is_virtual_signal(stream->signal))
+ result = true;
+ else
+ result = dm_helpers_dp_write_hblank_reduction(dc->ctx, stream);
+ return result;
+}
+
+
/* The stream with these settings can be sent (unblanked) only after DSC was enabled on RX first,
* i.e. after dp_enable_dsc_on_rx() had been called
*/
@@ -808,7 +822,8 @@ void link_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
enum optc_dsc_mode optc_dsc_mode;
/* Enable DSC hw block */
- dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
+ dsc_cfg.pic_width = (stream->timing.h_addressable + pipe_ctx->hblank_borrow +
+ stream->timing.h_border_left + stream->timing.h_border_right) / opp_cnt;
dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top + stream->timing.v_border_bottom;
dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
dsc_cfg.color_depth = stream->timing.display_color_depth;
@@ -1952,11 +1967,15 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
stream->phy_pix_clk = stream->timing.pix_clk_100hz / 10;
if (stream->phy_pix_clk > 340000)
is_over_340mhz = true;
+ if (dc_is_tmds_signal(stream->signal) && stream->phy_pix_clk > 6000000UL) {
+ ASSERT(false);
+ return;
+ }
if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) {
unsigned short masked_chip_caps = pipe_ctx->stream->link->chip_caps &
- EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
- if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
+ AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+ if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
/* DP159, Retimer settings */
eng_id = pipe_ctx->stream_res.stream_enc->id;
@@ -1967,7 +1986,7 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx)
write_i2c_default_retimer_setting(pipe_ctx,
is_vga_mode, is_over_340mhz);
}
- } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
+ } else if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
/* PI3EQX1204, Redriver settings */
write_i2c_redriver_setting(pipe_ctx, is_over_340mhz);
}
@@ -2023,7 +2042,7 @@ static enum dc_status enable_link_dp(struct dc_state *state,
int lt_attempts = LINK_TRAINING_ATTEMPTS;
// Increase retry count if attempting DP1.x on FIXED_VS link
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING)
lt_attempts = 10;
@@ -2038,7 +2057,8 @@ static enum dc_status enable_link_dp(struct dc_state *state,
/* Train with fallback when enabling DPIA link. Conventional links are
* trained with fallback during sink detection.
*/
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ !link->dc->config.enable_dpia_pre_training)
do_fallback = true;
/*
@@ -2374,13 +2394,13 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id;
unsigned short masked_chip_caps = link->chip_caps &
- EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+ AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
//Need to inform that sink is going to use legacy HDMI mode.
write_scdc_data(
link->ddc,
165000,//vbios only handles 165Mhz.
false);
- if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
+ if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
/* DP159, Retimer settings */
if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings))
write_i2c_retimer_setting(pipe_ctx,
@@ -2388,7 +2408,7 @@ void link_set_dpms_off(struct pipe_ctx *pipe_ctx)
else
write_i2c_default_retimer_setting(pipe_ctx,
false, false);
- } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
+ } else if (masked_chip_caps == AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
/* PI3EQX1204, Redriver settings */
write_i2c_redriver_setting(pipe_ctx, false);
}
@@ -2528,6 +2548,15 @@ void link_set_dpms_on(
if (pipe_ctx->stream->dpms_off)
return;
+ /* For Dp tunneling link, a pending HPD means that we have a race condition between processing
+ * current link and processing the pending HPD. If we enable the link now, we may end up with a
+ * link that is not actually connected to a sink. So we skip enabling the link in this case.
+ */
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->is_hpd_pending) {
+ DC_LOG_DEBUG("%s, Link%d HPD is pending, not enable it.\n", __func__, link->link_index);
+ return;
+ }
+
/* Have to setup DSC before DIG FE and BE are connected (which happens before the
* link training). This is to make sure the bandwidth sent to DIG BE won't be
* bigger than what the link and/or DIG BE can handle. VBID[6]/CompressedStream_flag
@@ -2593,6 +2622,9 @@ void link_set_dpms_on(
}
}
+ if (dc_is_dp_signal(pipe_ctx->stream->signal))
+ dp_set_hblank_reduction_on_rx(pipe_ctx);
+
if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
allocate_usb4_bandwidth(pipe_ctx->stream);
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
index 5e1b5ab9fbc6..a7877d57a00f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c
@@ -101,6 +101,7 @@ static void construct_link_service_validation(struct link_service *link_srv)
link_srv->validate_mode_timing = link_validate_mode_timing;
link_srv->dp_link_bandwidth_kbps = dp_link_bandwidth_kbps;
link_srv->validate_dpia_bandwidth = link_validate_dpia_bandwidth;
+ link_srv->dp_required_hblank_size_bytes = dp_required_hblank_size_bytes;
}
/* link dpms owns the programming sequence of stream's dpms state associated
@@ -698,7 +699,7 @@ static bool construct_phy(struct dc_link *link,
link->chip_caps);
}
- if (link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) {
+ if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) {
link->bios_forced_drive_settings.VOLTAGE_SWING =
(bios->integrated_info->ext_disp_conn_info.fixdpvoltageswing & 0x3);
link->bios_forced_drive_settings.PRE_EMPHASIS =
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
index 60f15a9ba7a5..29606fda029d 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c
@@ -409,3 +409,182 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un
return dpia_validate_usb4_bw(dpia_link, bw_needed, num_dpias);
}
+
+struct dp_audio_layout_config {
+ uint8_t layouts_per_sample_denom;
+ uint8_t symbols_per_layout;
+ uint8_t max_layouts_per_audio_sdp;
+};
+
+static void get_audio_layout_config(
+ uint32_t channel_count,
+ enum dp_link_encoding encoding,
+ struct dp_audio_layout_config *output)
+{
+ memset(output, 0, sizeof(struct dp_audio_layout_config));
+
+ /* Assuming L-PCM audio. Current implementation uses max 1 layout per SDP,
+ * with each layout being the same size (8ch layout).
+ */
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (channel_count == 2) {
+ output->layouts_per_sample_denom = 4;
+ output->symbols_per_layout = 40;
+ output->max_layouts_per_audio_sdp = 1;
+ } else if (channel_count == 8 || channel_count == 6) {
+ output->layouts_per_sample_denom = 1;
+ output->symbols_per_layout = 40;
+ output->max_layouts_per_audio_sdp = 1;
+ }
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ if (channel_count == 2) {
+ output->layouts_per_sample_denom = 4;
+ output->symbols_per_layout = 10;
+ output->max_layouts_per_audio_sdp = 1;
+ } else if (channel_count == 8 || channel_count == 6) {
+ output->layouts_per_sample_denom = 1;
+ output->symbols_per_layout = 10;
+ output->max_layouts_per_audio_sdp = 1;
+ }
+ }
+}
+
+static uint32_t get_av_stream_map_lane_count(
+ enum dp_link_encoding encoding,
+ enum dc_lane_count lane_count,
+ bool is_mst)
+{
+ uint32_t av_stream_map_lane_count = 0;
+
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (!is_mst)
+ av_stream_map_lane_count = lane_count;
+ else
+ av_stream_map_lane_count = 4;
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ av_stream_map_lane_count = 4;
+ }
+
+ ASSERT(av_stream_map_lane_count != 0);
+
+ return av_stream_map_lane_count;
+}
+
+static uint32_t get_audio_sdp_overhead(
+ enum dp_link_encoding encoding,
+ enum dc_lane_count lane_count,
+ bool is_mst)
+{
+ uint32_t audio_sdp_overhead = 0;
+
+ if (encoding == DP_8b_10b_ENCODING) {
+ if (is_mst)
+ audio_sdp_overhead = 16; /* 4 * 2 + 8 */
+ else
+ audio_sdp_overhead = lane_count * 2 + 8;
+ } else if (encoding == DP_128b_132b_ENCODING) {
+ audio_sdp_overhead = 10; /* 4 x 2.5 */
+ }
+
+ ASSERT(audio_sdp_overhead != 0);
+
+ return audio_sdp_overhead;
+}
+
+/* Current calculation only applicable for 8b/10b MST and 128b/132b SST/MST.
+ */
+static uint32_t calculate_overhead_hblank_bw_in_symbols(
+ uint32_t max_slice_h)
+{
+ uint32_t overhead_hblank_bw = 0; /* in stream symbols */
+
+ overhead_hblank_bw += max_slice_h * 4; /* EOC overhead */
+ overhead_hblank_bw += 12; /* Main link overhead (VBID, BS/BE) */
+
+ return overhead_hblank_bw;
+}
+
+uint32_t dp_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params)
+{
+ /* Main logic from dce_audio is duplicated here, with the main
+ * difference being:
+ * - Pre-determined lane count of 4
+ * - Assumed 16 dsc slices for worst case
+ * - Assumed SDP split disabled for worst case
+ * TODO: Unify logic from dce_audio to prevent duplicated logic.
+ */
+
+ const struct dc_crtc_timing *timing = audio_params->crtc_timing;
+ const uint32_t channel_count = audio_params->channel_count;
+ const uint32_t sample_rate_hz = audio_params->sample_rate_hz;
+ const enum dp_link_encoding link_encoding = audio_params->link_encoding;
+
+ // 8b/10b MST and 128b/132b are always 4 logical lanes.
+ const uint32_t lane_count = 4;
+ const bool is_mst = (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT);
+ // Maximum slice count is with ODM 4:1, 4 slices per DSC
+ const uint32_t max_slices_h = 16;
+
+ const uint32_t av_stream_map_lane_count = get_av_stream_map_lane_count(
+ link_encoding, lane_count, is_mst);
+ const uint32_t audio_sdp_overhead = get_audio_sdp_overhead(
+ link_encoding, lane_count, is_mst);
+ struct dp_audio_layout_config layout_config;
+
+ if (link_encoding == DP_8b_10b_ENCODING && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT)
+ return 0;
+
+ get_audio_layout_config(
+ channel_count, link_encoding, &layout_config);
+
+ /* DP spec recommends between 1.05 to 1.1 safety margin to prevent sample under-run */
+ struct fixed31_32 audio_sdp_margin = dc_fixpt_from_fraction(110, 100);
+ struct fixed31_32 horizontal_line_freq_khz = dc_fixpt_from_fraction(
+ timing->pix_clk_100hz, (long long)timing->h_total * 10);
+ struct fixed31_32 samples_per_line;
+ struct fixed31_32 layouts_per_line;
+ struct fixed31_32 symbols_per_sdp_max_layout;
+ struct fixed31_32 remainder;
+ uint32_t num_sdp_with_max_layouts;
+ uint32_t required_symbols_per_hblank;
+ uint32_t required_bytes_per_hblank = 0;
+
+ samples_per_line = dc_fixpt_from_fraction(sample_rate_hz, 1000);
+ samples_per_line = dc_fixpt_div(samples_per_line, horizontal_line_freq_khz);
+ layouts_per_line = dc_fixpt_div_int(samples_per_line, layout_config.layouts_per_sample_denom);
+ // HBlank expansion usage assumes SDP split disabled to allow for worst case.
+ layouts_per_line = dc_fixpt_from_int(dc_fixpt_ceil(layouts_per_line));
+
+ num_sdp_with_max_layouts = dc_fixpt_floor(
+ dc_fixpt_div_int(layouts_per_line, layout_config.max_layouts_per_audio_sdp));
+ symbols_per_sdp_max_layout = dc_fixpt_from_int(
+ layout_config.max_layouts_per_audio_sdp * layout_config.symbols_per_layout);
+ symbols_per_sdp_max_layout = dc_fixpt_add_int(symbols_per_sdp_max_layout, audio_sdp_overhead);
+ symbols_per_sdp_max_layout = dc_fixpt_mul(symbols_per_sdp_max_layout, audio_sdp_margin);
+ required_symbols_per_hblank = num_sdp_with_max_layouts;
+ required_symbols_per_hblank *= ((dc_fixpt_ceil(symbols_per_sdp_max_layout) + av_stream_map_lane_count) /
+ av_stream_map_lane_count) * av_stream_map_lane_count;
+
+ if (num_sdp_with_max_layouts != dc_fixpt_ceil(
+ dc_fixpt_div_int(layouts_per_line, layout_config.max_layouts_per_audio_sdp))) {
+ remainder = dc_fixpt_sub_int(layouts_per_line,
+ num_sdp_with_max_layouts * layout_config.max_layouts_per_audio_sdp);
+ remainder = dc_fixpt_mul_int(remainder, layout_config.symbols_per_layout);
+ remainder = dc_fixpt_add_int(remainder, audio_sdp_overhead);
+ remainder = dc_fixpt_mul(remainder, audio_sdp_margin);
+ required_symbols_per_hblank += ((dc_fixpt_ceil(remainder) + av_stream_map_lane_count) /
+ av_stream_map_lane_count) * av_stream_map_lane_count;
+ }
+
+ required_symbols_per_hblank += calculate_overhead_hblank_bw_in_symbols(max_slices_h);
+
+ if (link_encoding == DP_8b_10b_ENCODING)
+ required_bytes_per_hblank = required_symbols_per_hblank; // 8 bits per 8b/10b symbol
+ else if (link_encoding == DP_128b_132b_ENCODING)
+ required_bytes_per_hblank = required_symbols_per_hblank * 4; // 32 bits per 128b/132b symbol
+
+ return required_bytes_per_hblank;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.h b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
index 595fb05946e9..bf398c49c3e8 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_validation.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.h
@@ -37,4 +37,9 @@ uint32_t dp_link_bandwidth_kbps(
const struct dc_link *link,
const struct dc_link_settings *link_settings);
+
+uint32_t dp_required_hblank_size_bytes(
+ const struct dc_link *link,
+ struct dp_audio_bandwidth_params *audio_params);
+
#endif /* __LINK_VALIDATION_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c
index d6d5bbf2108c..267180e7bc48 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_ddc.c
@@ -505,7 +505,7 @@ bool try_to_configure_aux_timeout(struct ddc_service *ddc,
bool result = false;
struct ddc *ddc_pin = ddc->ddc_pin;
- if ((ddc->link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((ddc->link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
!ddc->link->dc->debug.disable_fixed_vs_aux_timeout_wa &&
ddc->ctx->dce_version == DCN_VERSION_3_1) {
/* Fixed VS workaround for AUX timeout */
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
index 9dabaf682171..44c3023a7731 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c
@@ -1554,7 +1554,7 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link)
/* If this chip cap is set, at least one retimer must exist in the chain
* Override count to 1 if we receive a known bad count (0 or an invalid value) */
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
(dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) {
/* If you see this message consistently, either the host platform has FIXED_VS flag
* incorrectly configured or the sink device is returning an invalid count.
@@ -1632,13 +1632,6 @@ static bool retrieve_link_cap(struct dc_link *link)
sizeof(link->dpcd_caps.lttpr_caps.phy_repeater_cnt));
}
- /* Read DP tunneling information. */
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
- status = dpcd_get_tunneling_device_data(link);
- if (status != DC_OK)
- dm_error("%s: Read tunneling device data failed.\n", __func__);
- }
-
dpcd_set_source_specific_data(link);
/* Sink may need to configure internals based on vendor, so allow some
* time before proceeding with possibly vendor specific transactions
@@ -1711,7 +1704,7 @@ static bool retrieve_link_cap(struct dc_link *link)
link->dpcd_caps.dprx_feature.raw = dpcd_dprx_data;
if (status != DC_OK)
- dm_error("%s: Read DPRX caps data failed.\n", __func__);
+ dm_error("%s: Read DPRX feature list failed.\n", __func__);
/* AdaptiveSyncCapability */
dpcd_dprx_data = 0;
@@ -1726,15 +1719,13 @@ static bool retrieve_link_cap(struct dc_link *link)
link->dpcd_caps.adaptive_sync_caps.dp_adap_sync_caps.raw = dpcd_dprx_data;
if (status != DC_OK)
- dm_error("%s: Read DPRX caps data failed. Addr:%#x\n",
+ dm_error("%s: Read DPRX feature list_1 failed. Addr:%#x\n",
__func__, DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1);
}
-
else {
link->dpcd_caps.dprx_feature.raw = 0;
}
-
/* Error condition checking...
* It is impossible for Sink to report Max Lane Count = 0.
* It is possible for Sink to report Max Link Rate = 0, if it is
@@ -1788,6 +1779,11 @@ static bool retrieve_link_cap(struct dc_link *link)
link->test_pattern_enabled = false;
link->compliance_test_state.raw = 0;
+ link->dpcd_caps.receive_port0_cap.raw[0] =
+ dpcd_data[DP_RECEIVE_PORT_0_CAP_0 - DP_DPCD_REV];
+ link->dpcd_caps.receive_port0_cap.raw[1] =
+ dpcd_data[DP_RECEIVE_PORT_0_BUFFER_SIZE - DP_DPCD_REV];
+
/* read sink count */
core_link_read_dpcd(link,
DP_SINK_COUNT,
@@ -1918,6 +1914,7 @@ static bool retrieve_link_cap(struct dc_link *link)
if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) {
DC_LOG_DP2("128b/132b encoding is supported at link %d", link->link_index);
+ /* Read 128b/132b suppoerted link rates */
core_link_read_dpcd(link,
DP_128B132B_SUPPORTED_LINK_RATES,
&link->dpcd_caps.dp_128b_132b_supported_link_rates.raw,
@@ -1965,6 +1962,13 @@ static bool retrieve_link_cap(struct dc_link *link)
link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw,
sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw));
+ /* Read DP tunneling information. */
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) {
+ status = dpcd_get_tunneling_device_data(link);
+ if (status != DC_OK)
+ dm_error("%s: Read DP tunneling device data failed.\n", __func__);
+ }
+
retrieve_cable_id(link);
dpcd_write_cable_id_to_dprx(link);
@@ -2308,6 +2312,14 @@ bool dp_verify_link_cap_with_retries(
} else {
link->verified_link_cap = last_verified_link_cap;
}
+
+ /* For Dp tunneling link, a pending HPD means that we have a race condition between processing
+ * current link and processing the pending HPD. Since the training is failed, we should just brak
+ * the loop so that we have chance to process the pending HPD.
+ */
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->is_hpd_pending)
+ break;
+
fsleep(10 * 1000);
}
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
index 48abeaa88678..a08403c022ea 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_irq_handler.c
@@ -226,6 +226,8 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
replay_configuration.bits.STATE_TRANSITION_ERROR_STATUS) {
bool allow_active;
+ link->replay_settings.config.replay_error_status.raw |= replay_error_status.raw;
+
if (link->replay_settings.config.force_disable_desync_error_check)
return;
@@ -237,6 +239,9 @@ static void handle_hpd_irq_replay_sink(struct dc_link *link)
&replay_configuration.raw,
sizeof(replay_configuration.raw));
+ /* Update desync error counter */
+ link->replay_settings.replay_desync_error_fail_count++;
+
/* Acknowledge and clear error bits */
dm_helpers_dp_write_dpcd(
link->ctx,
@@ -408,7 +413,8 @@ bool dp_handle_hpd_rx_irq(struct dc_link *link,
if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
// Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC
- if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+ if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+ !link->dc->config.enable_dpia_pre_training)
link->skip_fallback_on_link_loss = true;
device_service_clear.bits.AUTOMATED_TEST = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
index bafa52a0165a..2c73ac87cd66 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_phy.c
@@ -104,7 +104,7 @@ void dp_set_hw_lane_settings(
// Don't return here if using FIXED_VS link HWSS and encoding is 128b/132b
if ((link_settings->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) &&
!is_immediate_downstream(link, offset) &&
- (!(link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) ||
+ (!((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) ||
link_dp_get_encoding_format(&link_settings->link_settings) == DP_8b_10b_ENCODING))
return;
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index 754c895e1bfb..88d4288cde0f 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -739,7 +739,7 @@ void override_training_settings(
if (overrides->ffe_preset != NULL)
lt_settings->ffe_preset = overrides->ffe_preset;
/* Override HW lane settings with BIOS forced values if present */
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
+ if ((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) &&
lt_settings->lttpr_mode == LTTPR_MODE_TRANSPARENT) {
lt_settings->voltage_swing = &link->bios_forced_drive_settings.VOLTAGE_SWING;
lt_settings->pre_emphasis = &link->bios_forced_drive_settings.PRE_EMPHASIS;
@@ -1574,7 +1574,7 @@ enum link_training_result dp_perform_link_training(
* Per DP specs starting from here, DPTX device shall not issue
* Non-LT AUX transactions inside training mode.
*/
- if ((link->chip_caps & EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING)
+ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && encoding == DP_8b_10b_ENCODING)
status = dp_perform_fixed_vs_pe_training_sequence(link, link_res, &lt_settings);
else if (encoding == DP_8b_10b_ENCODING)
status = dp_perform_8b_10b_link_training(link, link_res, &lt_settings);
diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c
index fe26fde12eeb..85298b8a1b5e 100644
--- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c
@@ -110,6 +110,23 @@ void mpc3_disable_dwb_mux(
MPC_DWB0_MUX, 0xf);
}
+void mpc3_set_out_rate_control(
+ struct mpc *mpc,
+ int opp_id,
+ bool enable,
+ bool rate_2x_mode,
+ struct mpc_dwb_flow_control *flow_control)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+
+ /* Always disable mpc out rate and flow control.
+ * MPC flow rate control is not needed for DCN30 and above.
+ */
+ REG_UPDATE_2(MUX[opp_id],
+ MPC_OUT_RATE_CONTROL_DISABLE, 1,
+ MPC_OUT_RATE_CONTROL, 0);
+}
+
enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id)
{
/*Contrary to DCN2 and DCN1 wherein a single status register field holds this info;
@@ -1519,6 +1536,7 @@ static const struct mpc_funcs dcn30_mpc_funcs = {
.set_dwb_mux = mpc3_set_dwb_mux,
.disable_dwb_mux = mpc3_disable_dwb_mux,
.is_dwb_idle = mpc3_is_dwb_idle,
+ .set_out_rate_control = mpc3_set_out_rate_control,
.set_gamut_remap = mpc3_set_gamut_remap,
.program_shaper = mpc3_program_shaper,
.acquire_rmu = mpcc3_acquire_rmu,
diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h
index ce93003dae01..103f29900a2c 100644
--- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h
@@ -1085,6 +1085,13 @@ bool mpc3_is_dwb_idle(
struct mpc *mpc,
int dwb_id);
+void mpc3_set_out_rate_control(
+ struct mpc *mpc,
+ int opp_id,
+ bool enable,
+ bool rate_2x_mode,
+ struct mpc_dwb_flow_control *flow_control);
+
void mpc3_power_on_ogam_lut(
struct mpc *mpc, int mpcc_id,
bool power_on);
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
index 097d06023e64..19d5ebc6763c 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.c
@@ -302,7 +302,6 @@ void optc1_program_timing(
/* Enable stereo - only when we need to pack 3D frame. Other types
* of stereo handled in explicit call
*/
-
if (optc->funcs->is_two_pixels_per_container(&patched_crtc_timing) || optc1->opp_count == 2)
h_div = H_TIMING_DIV_BY2;
@@ -1471,37 +1470,71 @@ bool optc1_configure_crc(struct timing_generator *optc,
if (!optc1_is_tg_enabled(optc))
return false;
- REG_WRITE(OTG_CRC_CNTL, 0);
+ if (!params->enable || params->reset)
+ REG_WRITE(OTG_CRC_CNTL, 0);
if (!params->enable)
return true;
/* Program frame boundaries */
- /* Window A x axis start and end. */
- REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL,
- OTG_CRC0_WINDOWA_X_START, params->windowa_x_start,
- OTG_CRC0_WINDOWA_X_END, params->windowa_x_end);
-
- /* Window A y axis start and end. */
- REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL,
- OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start,
- OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end);
-
- /* Window B x axis start and end. */
- REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL,
- OTG_CRC0_WINDOWB_X_START, params->windowb_x_start,
- OTG_CRC0_WINDOWB_X_END, params->windowb_x_end);
-
- /* Window B y axis start and end. */
- REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL,
- OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start,
- OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end);
-
- /* Set crc mode and selection, and enable. Only using CRC0*/
- REG_UPDATE_3(OTG_CRC_CNTL,
- OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
- OTG_CRC0_SELECT, params->selection,
- OTG_CRC_EN, 1);
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL,
+ OTG_CRC0_WINDOWA_X_START, params->windowa_x_start,
+ OTG_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL,
+ OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+ OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL,
+ OTG_CRC0_WINDOWB_X_START, params->windowb_x_start,
+ OTG_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL,
+ OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+ OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable.*/
+ REG_UPDATE_3(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC0_SELECT, params->selection,
+ OTG_CRC_EN, 1);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL,
+ OTG_CRC1_WINDOWA_X_START, params->windowa_x_start,
+ OTG_CRC1_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL,
+ OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start,
+ OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL,
+ OTG_CRC1_WINDOWB_X_START, params->windowb_x_start,
+ OTG_CRC1_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL,
+ OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start,
+ OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable.*/
+ REG_UPDATE_3(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC1_SELECT, params->selection,
+ OTG_CRC_EN, 1);
+ break;
+ default:
+ return false;
+ }
return true;
}
@@ -1510,6 +1543,7 @@ bool optc1_configure_crc(struct timing_generator *optc,
* optc1_get_crc - Capture CRC result per component
*
* @optc: timing_generator instance.
+ * @idx: index of crc engine to get CRC from
* @r_cr: 16-bit primary CRC signature for red data.
* @g_y: 16-bit primary CRC signature for green data.
* @b_cb: 16-bit primary CRC signature for blue data.
@@ -1521,7 +1555,7 @@ bool optc1_configure_crc(struct timing_generator *optc,
* If CRC is disabled, return false; otherwise, return true, and the CRC
* results in the parameters.
*/
-bool optc1_get_crc(struct timing_generator *optc,
+bool optc1_get_crc(struct timing_generator *optc, uint8_t idx,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb)
{
uint32_t field = 0;
@@ -1533,14 +1567,30 @@ bool optc1_get_crc(struct timing_generator *optc,
if (!field)
return false;
- /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */
- REG_GET_2(OTG_CRC0_DATA_RG,
- CRC0_R_CR, r_cr,
- CRC0_G_Y, g_y);
+ switch (idx) {
+ case 0:
+ /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */
+ REG_GET_2(OTG_CRC0_DATA_RG,
+ CRC0_R_CR, r_cr,
+ CRC0_G_Y, g_y);
- /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */
- REG_GET(OTG_CRC0_DATA_B,
- CRC0_B_CB, b_cb);
+ /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */
+ REG_GET(OTG_CRC0_DATA_B,
+ CRC0_B_CB, b_cb);
+ break;
+ case 1:
+ /* OTG_CRC1_DATA_RG has the CRC16 results for the red and green component */
+ REG_GET_2(OTG_CRC1_DATA_RG,
+ CRC1_R_CR, r_cr,
+ CRC1_G_Y, g_y);
+
+ /* OTG_CRC1_DATA_B has the CRC16 results for the blue component */
+ REG_GET(OTG_CRC1_DATA_B,
+ CRC1_B_CB, b_cb);
+ break;
+ default:
+ return false;
+ }
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
index 40757f20d73f..159172178d51 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn10/dcn10_optc.h
@@ -86,6 +86,12 @@
SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\
SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\
SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\
+ SRI(OTG_CRC1_DATA_RG, OTG, inst),\
+ SRI(OTG_CRC1_DATA_B, OTG, inst),\
+ SRI(OTG_CRC1_WINDOWA_X_CONTROL, OTG, inst),\
+ SRI(OTG_CRC1_WINDOWA_Y_CONTROL, OTG, inst),\
+ SRI(OTG_CRC1_WINDOWB_X_CONTROL, OTG, inst),\
+ SRI(OTG_CRC1_WINDOWB_Y_CONTROL, OTG, inst),\
SR(GSL_SOURCE_SELECT),\
SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\
SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst)
@@ -315,6 +321,7 @@ struct dcn_optc_registers {
SF(OTG0_OTG_GSL_CONTROL, OTG_GSL_CHECK_ALL_FIELDS, mask_sh),\
SF(OTG0_OTG_CRC_CNTL, OTG_CRC_CONT_EN, mask_sh),\
SF(OTG0_OTG_CRC_CNTL, OTG_CRC0_SELECT, mask_sh),\
+ SF(OTG0_OTG_CRC_CNTL, OTG_CRC1_SELECT, mask_sh),\
SF(OTG0_OTG_CRC_CNTL, OTG_CRC_EN, mask_sh),\
SF(OTG0_OTG_CRC0_DATA_RG, CRC0_R_CR, mask_sh),\
SF(OTG0_OTG_CRC0_DATA_RG, CRC0_G_Y, mask_sh),\
@@ -327,6 +334,17 @@ struct dcn_optc_registers {
SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\
SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\
SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_RG, CRC1_R_CR, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_RG, CRC1_G_Y, mask_sh),\
+ SF(OTG0_OTG_CRC1_DATA_B, CRC1_B_CB, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_X_CONTROL, OTG_CRC1_WINDOWA_X_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWA_Y_CONTROL, OTG_CRC1_WINDOWA_Y_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_X_CONTROL, OTG_CRC1_WINDOWB_X_END, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_START, mask_sh),\
+ SF(OTG0_OTG_CRC1_WINDOWB_Y_CONTROL, OTG_CRC1_WINDOWB_Y_END, mask_sh),\
SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\
SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\
SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\
@@ -482,6 +500,7 @@ struct dcn_optc_registers {
type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN;\
type OTG_CRC_CONT_EN;\
type OTG_CRC0_SELECT;\
+ type OTG_CRC1_SELECT;\
type OTG_CRC_EN;\
type CRC0_R_CR;\
type CRC0_G_Y;\
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
index dfa9364fe5a6..d21e82b927d0 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn35/dcn35_optc.c
@@ -183,34 +183,87 @@ static bool optc35_configure_crc(struct timing_generator *optc,
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ /* Cannot configure crc on a CRTC that is disabled */
if (!optc1_is_tg_enabled(optc))
return false;
- REG_WRITE(OTG_CRC_CNTL, 0);
+
+ if (!params->enable || params->reset)
+ REG_WRITE(OTG_CRC_CNTL, 0);
+
if (!params->enable)
return true;
- REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL,
- OTG_CRC0_WINDOWA_X_START, params->windowa_x_start,
- OTG_CRC0_WINDOWA_X_END, params->windowa_x_end);
- REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL,
- OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start,
- OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end);
- REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL,
- OTG_CRC0_WINDOWB_X_START, params->windowb_x_start,
- OTG_CRC0_WINDOWB_X_END, params->windowb_x_end);
- REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL,
- OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start,
- OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end);
- if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0) {
- REG_UPDATE_4(OTG_CRC_CNTL,
- OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
- OTG_CRC0_SELECT, params->selection,
- OTG_CRC_EN, 1,
- OTG_CRC_WINDOW_DB_EN, 1);
- } else
- REG_UPDATE_3(OTG_CRC_CNTL,
- OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
- OTG_CRC0_SELECT, params->selection,
- OTG_CRC_EN, 1);
+
+ /* Program frame boundaries */
+ switch (params->crc_eng_inst) {
+ case 0:
+ /* Window A x axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWA_X_CONTROL,
+ OTG_CRC0_WINDOWA_X_START, params->windowa_x_start,
+ OTG_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWA_Y_CONTROL,
+ OTG_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+ OTG_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWB_X_CONTROL,
+ OTG_CRC0_WINDOWB_X_START, params->windowb_x_start,
+ OTG_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ REG_UPDATE_2(OTG_CRC0_WINDOWB_Y_CONTROL,
+ OTG_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+ OTG_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+ if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0)
+ REG_UPDATE_4(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC0_SELECT, params->selection,
+ OTG_CRC_EN, 1,
+ OTG_CRC_WINDOW_DB_EN, 1);
+ else
+ REG_UPDATE_3(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC0_SELECT, params->selection,
+ OTG_CRC_EN, 1);
+ break;
+ case 1:
+ /* Window A x axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWA_X_CONTROL,
+ OTG_CRC1_WINDOWA_X_START, params->windowa_x_start,
+ OTG_CRC1_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWA_Y_CONTROL,
+ OTG_CRC1_WINDOWA_Y_START, params->windowa_y_start,
+ OTG_CRC1_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWB_X_CONTROL,
+ OTG_CRC1_WINDOWB_X_START, params->windowb_x_start,
+ OTG_CRC1_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ REG_UPDATE_2(OTG_CRC1_WINDOWB_Y_CONTROL,
+ OTG_CRC1_WINDOWB_Y_START, params->windowb_y_start,
+ OTG_CRC1_WINDOWB_Y_END, params->windowb_y_end);
+
+ if (optc1->base.ctx->dc->debug.otg_crc_db && optc1->tg_mask->OTG_CRC_WINDOW_DB_EN != 0)
+ REG_UPDATE_4(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC1_SELECT, params->selection,
+ OTG_CRC_EN, 1,
+ OTG_CRC_WINDOW_DB_EN, 1);
+ else
+ REG_UPDATE_3(OTG_CRC_CNTL,
+ OTG_CRC_CONT_EN, params->continuous_mode ? 1 : 0,
+ OTG_CRC1_SELECT, params->selection,
+ OTG_CRC_EN, 1);
+ break;
+ default:
+ return false;
+ }
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c
index 783ca9acc762..338a0cad23a5 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c
@@ -315,7 +315,7 @@ void optc401_set_drr(
struct drr_params amended_params = { 0 };
bool program_manual_trigger = false;
- if (dc->caps.dmub_caps.fams_ver >= 2 && dc->debug.fams2_config.bits.enable) {
+ if (dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver && dc->debug.fams2_config.bits.enable) {
if (params != NULL &&
params->vertical_total_max > 0 &&
params->vertical_total_min > 0) {
@@ -380,7 +380,7 @@ void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, i
{
struct dc *dc = optc->ctx->dc;
- if (dc->caps.dmub_caps.fams_ver >= 2 && dc->debug.fams2_config.bits.enable) {
+ if (dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver && dc->debug.fams2_config.bits.enable) {
/* FAMS2 */
dc_dmub_srv_fams2_drr_update(dc, optc->inst,
vtotal_min,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
index 770a380cc03d..e92f14d50adb 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c
@@ -1258,6 +1258,11 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
return NULL;
}
+unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx)
+{
+ return pipe_ctx->pipe_dlg_param.vstartup_start;
+}
+
static const struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn10_get_dcc_compression_cap
};
@@ -1272,7 +1277,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = {
.validate_global = dcn10_validate_global,
.add_stream_to_ctx = dcn10_add_stream_to_ctx,
.patch_unknown_plane_state = dcn10_patch_unknown_plane_state,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static uint32_t read_pipe_fuses(struct dc_context *ctx)
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
index bf8e33cd8147..7bc1be53e800 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.h
@@ -51,6 +51,7 @@ struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
const struct resource_pool *pool,
struct dc_stream_state *stream);
+unsigned int dcn10_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx);
#endif /* __DC_RESOURCE_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
index 189d0c85872e..5c6dc710e96c 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
@@ -1509,41 +1509,12 @@ bool dcn20_split_stream_for_odm(
next_odm_pipe->prev_odm_pipe = prev_odm_pipe;
if (prev_odm_pipe->plane_state) {
- struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data;
- int new_width;
-
- /* HACTIVE halved for odm combine */
- sd->h_active /= 2;
- /* Calculate new vp and recout for left pipe */
- /* Need at least 16 pixels width per side */
- if (sd->recout.x + 16 >= sd->h_active)
- return false;
- new_width = sd->h_active - sd->recout.x;
- sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->recout.width - new_width));
- sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->recout.width - new_width));
- sd->recout.width = new_width;
-
- /* Calculate new vp and recout for right pipe */
- sd = &next_odm_pipe->plane_res.scl_data;
- /* HACTIVE halved for odm combine */
- sd->h_active /= 2;
- /* Need at least 16 pixels width per side */
- if (new_width <= 16)
- return false;
- new_width = sd->recout.width + sd->recout.x - sd->h_active;
- sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->recout.width - new_width));
- sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->recout.width - new_width));
- sd->recout.width = new_width;
- sd->viewport.x += dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz, sd->h_active - sd->recout.x));
- sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int(
- sd->ratios.horz_c, sd->h_active - sd->recout.x));
- sd->recout.x = 0;
+ if (!resource_build_scaling_params(prev_odm_pipe) ||
+ !resource_build_scaling_params(next_odm_pipe)) {
+ return false;
+ }
}
+
if (!next_odm_pipe->top_pipe)
next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
else
@@ -2132,6 +2103,7 @@ bool dcn20_fast_validate_bw(
ASSERT(0);
}
}
+
/* Actual dsc count per stream dsc validation*/
if (!dcn20_validate_dsc(dc, context)) {
context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] =
@@ -2257,7 +2229,8 @@ static const struct resource_funcs dcn20_res_pool_funcs = {
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.set_mcif_arb_params = dcn20_set_mcif_arb_params,
.populate_dml_pipes = dcn20_populate_dml_pipes_from_context,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
bool dcn20_dwbc_create(struct dc_context *ctx, struct resource_pool *pool)
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
index d3d67d366523..43fa2cb117f3 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c
@@ -59,8 +59,8 @@
#include "cyan_skillfish_ip_offset.h"
-#include "dcn/dcn_2_0_3_offset.h"
-#include "dcn/dcn_2_0_3_sh_mask.h"
+#include "dcn/dcn_2_0_1_offset.h"
+#include "dcn/dcn_2_0_1_sh_mask.h"
#include "dpcs/dpcs_2_0_3_offset.h"
#include "dpcs/dpcs_2_0_3_sh_mask.h"
@@ -1079,7 +1079,8 @@ static struct resource_funcs dcn201_res_pool_funcs = {
.populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.set_mcif_arb_params = dcn20_set_mcif_arb_params,
- .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn201_resource_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
index 021ba8ac5c8c..2615c36d5ffe 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c
@@ -1378,6 +1378,7 @@ static const struct resource_funcs dcn21_res_pool_funcs = {
.find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link,
.update_bw_bounding_box = dcn21_update_bw_bounding_box,
.get_panel_config_defaults = dcn21_get_panel_config_defaults,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn21_resource_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
index cd31e4f16c14..13202ce30d66 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
@@ -2250,6 +2250,7 @@ static const struct resource_funcs dcn30_res_pool_funcs = {
.update_bw_bounding_box = dcn30_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn30_get_panel_config_defaults,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
#define CTX ctx
@@ -2353,6 +2354,7 @@ static bool dcn30_resource_construct(
dc->caps.dp_hdmi21_pcon_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* read VBIOS LTTPR caps */
{
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
index a9816affd312..121a86a59833 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c
@@ -671,9 +671,9 @@ static const struct dc_plane_cap plane_cap = {
/* 6:1 downscaling ratio: 1000/6 = 166.666 */
.max_downscale_factor = {
- .argb8888 = 167,
- .nv12 = 167,
- .fp16 = 167
+ .argb8888 = 358,
+ .nv12 = 358,
+ .fp16 = 358
},
64,
64
@@ -693,7 +693,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
.performance_trace = false,
- .max_downscale_src_width = 7680,/*upto 8K*/
+ .max_downscale_src_width = 4096,/*upto true 4k*/
.scl_reset_length10 = true,
.sanity_checks = false,
.underflow_assert_delay_us = 0xFFFFFFFF,
@@ -1400,7 +1400,8 @@ static struct resource_funcs dcn301_res_pool_funcs = {
.acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
.release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
.update_bw_bounding_box = dcn301_update_bw_bounding_box,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state
+ .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn301_resource_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
index 02af8b8f4d27..012c5fd52cb1 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
@@ -1151,6 +1151,7 @@ static struct resource_funcs dcn302_res_pool_funcs = {
.update_bw_bounding_box = dcn302_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn302_get_panel_config_defaults,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static struct dc_cap_funcs cap_funcs = {
@@ -1233,6 +1234,7 @@ static bool dcn302_resource_construct(
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
index 7002a8dd358a..a8d0b4686f9a 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
@@ -1096,6 +1096,7 @@ static struct resource_funcs dcn303_res_pool_funcs = {
.update_bw_bounding_box = dcn303_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn303_get_panel_config_defaults,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static struct dc_cap_funcs cap_funcs = {
@@ -1178,6 +1179,7 @@ static bool dcn303_resource_construct(
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
index c16cf1c8f7f9..911bd60d4fbc 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c
@@ -1720,6 +1720,12 @@ int dcn31_populate_dml_pipes_from_context(
return pipe_cnt;
}
+unsigned int dcn31_get_det_buffer_size(
+ const struct dc_state *context)
+{
+ return context->bw_ctx.dml.ip.det_buffer_size_kbytes;
+}
+
void dcn31_calculate_wm_and_dlg(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
@@ -1842,6 +1848,8 @@ static struct resource_funcs dcn31_res_pool_funcs = {
.update_bw_bounding_box = dcn31_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn31_get_panel_config_defaults,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static struct clock_source *dcn30_clock_source_create(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
index 901436591ed4..551ad912f7be 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.h
@@ -63,6 +63,9 @@ struct resource_pool *dcn31_create_resource_pool(
const struct dc_init_data *init_data,
struct dc *dc);
+unsigned int dcn31_get_det_buffer_size(
+ const struct dc_state *context);
+
/*temp: B0 specific before switch to dcn313 headers*/
#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL
#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
index c0f48c78e968..e3ba105034f8 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c
@@ -1777,6 +1777,8 @@ static struct resource_funcs dcn314_res_pool_funcs = {
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn314_get_panel_config_defaults,
.get_preferred_eng_id_dpia = dcn314_get_preferred_eng_id_dpia,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static struct clock_source *dcn30_clock_source_create(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
index 6c3295259a81..14acef036b5a 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c
@@ -1845,6 +1845,8 @@ static struct resource_funcs dcn315_res_pool_funcs = {
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn315_get_panel_config_defaults,
.get_power_profile = dcn315_get_power_profile,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn315_resource_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
index 6edaaadcb173..568094827212 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c
@@ -1719,6 +1719,8 @@ static struct resource_funcs dcn316_res_pool_funcs = {
.update_bw_bounding_box = dcn316_update_bw_bounding_box,
.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
.get_panel_config_defaults = dcn316_get_panel_config_defaults,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn316_resource_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
index 01d1a11d5545..664302876019 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
@@ -2066,6 +2066,7 @@ static struct resource_funcs dcn32_res_pool_funcs = {
.add_phantom_pipes = dcn32_add_phantom_pipes,
.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
.calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -2189,6 +2190,7 @@ static bool dcn32_resource_construct(
dc->caps.dmcub_support = true;
dc->caps.seamless_odm = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
@@ -2803,6 +2805,7 @@ struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_opp_head(
free_pipe->plane_res.xfm = pool->transforms[free_pipe_idx];
free_pipe->plane_res.dpp = pool->dpps[free_pipe_idx];
free_pipe->plane_res.mpcc_inst = pool->dpps[free_pipe_idx]->inst;
+ free_pipe->hblank_borrow = otg_master->hblank_borrow;
if (free_pipe->stream->timing.flags.DSC == 1) {
dcn20_acquire_dsc(free_pipe->stream->ctx->dc,
&new_ctx->res_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
index 5cb74fd9cb7d..38d76434683e 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
@@ -1624,6 +1624,7 @@ static struct resource_funcs dcn321_res_pool_funcs = {
.add_phantom_pipes = dcn32_add_phantom_pipes,
.build_pipe_pix_clk_params = dcn20_build_pipe_pix_clk_params,
.calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1742,6 +1743,7 @@ static bool dcn321_resource_construct(
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
index 6cc2960b6104..8ee3d99ea2aa 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
@@ -1752,6 +1752,13 @@ static bool dcn35_validate_bandwidth(struct dc *dc,
return out;
}
+enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state)
+{
+ plane_state->tiling_info.gfxversion = DcGfxVersion9;
+ dcn20_patch_unknown_plane_state(plane_state);
+ return DC_OK;
+}
+
static struct resource_funcs dcn35_res_pool_funcs = {
.destroy = dcn35_destroy_resource_pool,
@@ -1775,9 +1782,11 @@ static struct resource_funcs dcn35_res_pool_funcs = {
.acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
.release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
.update_bw_bounding_box = dcn35_update_bw_bounding_box_fpu,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
+ .patch_unknown_plane_state = dcn35_patch_unknown_plane_state,
.get_panel_config_defaults = dcn35_get_panel_config_defaults,
.get_preferred_eng_id_dpia = dcn35_get_preferred_eng_id_dpia,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn35_resource_construct(
@@ -1849,6 +1858,7 @@ static bool dcn35_resource_construct(
dc->caps.zstate_support = true;
dc->caps.ips_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
index f97bb4cb3761..9d03a55d90cf 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.h
@@ -35,6 +35,7 @@
extern struct _vcs_dpi_ip_params_st dcn3_5_ip;
extern struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc;
+enum dc_status dcn35_patch_unknown_plane_state(struct dc_plane_state *plane_state);
struct dcn35_resource_pool {
struct resource_pool base;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index d87e2641cda1..14f7c3acdc96 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -1754,9 +1754,11 @@ static struct resource_funcs dcn351_res_pool_funcs = {
.acquire_post_bldn_3dlut = dcn30_acquire_post_bldn_3dlut,
.release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
.update_bw_bounding_box = dcn351_update_bw_bounding_box_fpu,
- .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
+ .patch_unknown_plane_state = dcn35_patch_unknown_plane_state,
.get_panel_config_defaults = dcn35_get_panel_config_defaults,
.get_preferred_eng_id_dpia = dcn351_get_preferred_eng_id_dpia,
+ .get_det_buffer_size = dcn31_get_det_buffer_size,
+ .get_vstartup_for_pipe = dcn10_get_vstartup_for_pipe
};
static bool dcn351_resource_construct(
@@ -1828,6 +1830,7 @@ static bool dcn351_resource_construct(
dc->caps.zstate_support = true;
dc->caps.ips_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
/* Color pipeline capabilities */
dc->caps.color.dpp.dcn_arch = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
index db93bac247c0..c1ebc6b1c937 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
@@ -726,6 +726,10 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_unbounded_requesting = false,
.enable_legacy_fast_update = false,
.dcc_meta_propagation_delay_us = 10,
+ .fams_version = {
+ .minor = 1,
+ .major = 2,
+ }, //v2.1
.fams2_config = {
.bits = {
.enable = true,
@@ -733,7 +737,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.enable_stall_recovery = true,
}
},
- .force_cositing = CHROMA_COSITING_TOPLEFT + 1,
+ .force_cositing = CHROMA_COSITING_NONE + 1,
};
static struct dce_aux *dcn401_aux_engine_create(
@@ -1293,6 +1297,29 @@ static struct hpo_dp_link_encoder *dcn401_hpo_dp_link_encoder_create(
return &hpo_dp_enc31->base;
}
+static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans)
+{
+ unsigned int num_available_chans = 1;
+
+ /* channels for MALL must be a power of 2 */
+ while (num_chans > 1) {
+ num_available_chans = (num_available_chans << 1);
+ num_chans = (num_chans >> 1);
+ }
+
+ /* cannot be odd */
+ num_available_chans &= ~1;
+
+ /* clamp to max available channels for MALL per ASIC */
+ if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) {
+ num_available_chans = num_available_chans > 16 ? 16 : num_available_chans;
+ } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) {
+ num_available_chans = num_available_chans > 8 ? 8 : num_available_chans;
+ }
+
+ return num_available_chans;
+}
+
static struct dce_hwseq *dcn401_hwseq_create(
struct dc_context *ctx)
{
@@ -1588,6 +1615,14 @@ static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b
memcpy(dml2_opt, &dc->dml2_options, sizeof(dc->dml2_options));
+ /* re-calculate the available MALL size if required */
+ if (bw_params->num_channels > 0) {
+ dc->caps.max_cab_allocation_bytes = dcn401_calc_num_avail_chans_for_mall(
+ dc, bw_params->num_channels) *
+ dc->caps.mall_size_per_mem_channel * 1024 * 1024;
+ dc->caps.mall_size_total = dc->caps.max_cab_allocation_bytes;
+ }
+
DC_FP_START();
dcn401_update_bw_bounding_box_fpu(dc, bw_params);
@@ -1605,6 +1640,7 @@ static void dcn401_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b
enum dc_status dcn401_patch_unknown_plane_state(struct dc_plane_state *plane_state)
{
+ plane_state->tiling_info.gfxversion = DcGfxAddr3;
plane_state->tiling_info.gfx_addr3.swizzle = DC_ADDR3_SW_64KB_2D;
return DC_OK;
}
@@ -1704,27 +1740,9 @@ static int dcn401_get_power_profile(const struct dc_state *context)
return dpm_level;
}
-static unsigned int dcn401_calc_num_avail_chans_for_mall(struct dc *dc, unsigned int num_chans)
+static unsigned int dcn401_get_vstartup_for_pipe(struct pipe_ctx *pipe_ctx)
{
- unsigned int num_available_chans = 1;
-
- /* channels for MALL must be a power of 2 */
- while (num_chans > 1) {
- num_available_chans = (num_available_chans << 1);
- num_chans = (num_chans >> 1);
- }
-
- /* cannot be odd */
- num_available_chans &= ~1;
-
- /* clamp to max available channels for MALL per ASIC */
- if (ASICREV_IS_GC_12_0_0_A0(dc->ctx->asic_id.hw_internal_rev)) {
- num_available_chans = num_available_chans > 16 ? 16 : num_available_chans;
- } else if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev)) {
- num_available_chans = num_available_chans > 8 ? 8 : num_available_chans;
- }
-
- return num_available_chans;
+ return pipe_ctx->global_sync.dcn4x.vstartup_lines;
}
static struct resource_funcs dcn401_res_pool_funcs = {
@@ -1754,6 +1772,7 @@ static struct resource_funcs dcn401_res_pool_funcs = {
.build_pipe_pix_clk_params = dcn401_build_pipe_pix_clk_params,
.calculate_mall_ways_from_bytes = dcn32_calculate_mall_ways_from_bytes,
.get_power_profile = dcn401_get_power_profile,
+ .get_vstartup_for_pipe = dcn401_get_vstartup_for_pipe
};
static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1864,6 +1883,7 @@ static bool dcn401_resource_construct(
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
dc->caps.max_v_total = (1 << 15) - 1;
+ dc->caps.vtotal_limited_by_fp2 = true;
if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev))
dc->caps.dcc_plane_width_limit = 7680;
diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
index 73a65913cb12..38a9a0d68058 100644
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c
@@ -11,6 +11,41 @@
#define IDENTITY_RATIO(ratio) (spl_fixpt_u2d19(ratio) == (1 << 19))
#define MIN_VIEWPORT_SIZE 12
+static bool spl_is_yuv420(enum spl_pixel_format format)
+{
+ if ((format >= SPL_PIXEL_FORMAT_420BPP8) &&
+ (format <= SPL_PIXEL_FORMAT_420BPP10))
+ return true;
+
+ return false;
+}
+
+static bool spl_is_rgb8(enum spl_pixel_format format)
+{
+ if (format == SPL_PIXEL_FORMAT_ARGB8888)
+ return true;
+
+ return false;
+}
+
+static bool spl_is_video_format(enum spl_pixel_format format)
+{
+ if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN
+ && format <= SPL_PIXEL_FORMAT_VIDEO_END)
+ return true;
+ else
+ return false;
+}
+
+static bool spl_is_subsampled_format(enum spl_pixel_format format)
+{
+ if (format >= SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN
+ && format <= SPL_PIXEL_FORMAT_SUBSAMPLED_END)
+ return true;
+ else
+ return false;
+}
+
static struct spl_rect intersect_rec(const struct spl_rect *r0, const struct spl_rect *r1)
{
struct spl_rect rec;
@@ -137,15 +172,32 @@ static struct spl_rect calculate_mpc_slice_in_timing_active(
struct spl_in *spl_in,
struct spl_rect *plane_clip_rec)
{
- int mpc_slice_count = spl_in->basic_in.mpc_combine_h;
- int mpc_slice_idx = spl_in->basic_in.mpc_combine_v;
+ bool use_recout_width_aligned =
+ spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned;
+ int mpc_slice_count =
+ spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices;
+ int recout_width_align =
+ spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_recout_width_align;
+ int mpc_slice_idx = spl_in->basic_in.mpc_h_slice_index;
int epimo = mpc_slice_count - plane_clip_rec->width % mpc_slice_count - 1;
struct spl_rect mpc_rec;
- mpc_rec.width = plane_clip_rec->width / mpc_slice_count;
- mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx;
- mpc_rec.height = plane_clip_rec->height;
- mpc_rec.y = plane_clip_rec->y;
+ if (use_recout_width_aligned) {
+ mpc_rec.width = recout_width_align;
+ if ((mpc_rec.width * (mpc_slice_idx + 1)) > plane_clip_rec->width) {
+ mpc_rec.width = plane_clip_rec->width % recout_width_align;
+ mpc_rec.x = plane_clip_rec->x + recout_width_align * mpc_slice_idx;
+ } else
+ mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx;
+ mpc_rec.height = plane_clip_rec->height;
+ mpc_rec.y = plane_clip_rec->y;
+
+ } else {
+ mpc_rec.width = plane_clip_rec->width / mpc_slice_count;
+ mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx;
+ mpc_rec.height = plane_clip_rec->height;
+ mpc_rec.y = plane_clip_rec->y;
+ }
SPL_ASSERT(mpc_slice_count == 1 ||
spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE ||
mpc_rec.width % 2 == 0);
@@ -391,8 +443,7 @@ static void spl_calculate_scaling_ratios(struct spl_in *spl_in,
spl_scratch->scl_data.ratios.horz_c = spl_scratch->scl_data.ratios.horz;
spl_scratch->scl_data.ratios.vert_c = spl_scratch->scl_data.ratios.vert;
- if (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8
- || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) {
+ if (spl_is_yuv420(spl_in->basic_in.format)) {
spl_scratch->scl_data.ratios.horz_c.value /= 2;
spl_scratch->scl_data.ratios.vert_c.value /= 2;
}
@@ -529,23 +580,6 @@ static void spl_calculate_init_and_vp(bool flip_scan_dir,
*vp_offset = src_size - *vp_offset - *vp_size;
}
-static bool spl_is_yuv420(enum spl_pixel_format format)
-{
- if ((format >= SPL_PIXEL_FORMAT_420BPP8) &&
- (format <= SPL_PIXEL_FORMAT_420BPP10))
- return true;
-
- return false;
-}
-
-static bool spl_is_rgb8(enum spl_pixel_format format)
-{
- if (format == SPL_PIXEL_FORMAT_ARGB8888)
- return true;
-
- return false;
-}
-
/*Calculate inits and viewport */
static void spl_calculate_inits_and_viewports(struct spl_in *spl_in,
struct spl_scratch *spl_scratch)
@@ -556,8 +590,7 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in,
struct spl_rect recout_clip_in_recout_dst;
struct spl_rect overlap_in_active_timing;
struct spl_rect odm_slice = calculate_odm_slice_in_timing_active(spl_in);
- int vpc_div = (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8
- || spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10) ? 2 : 1;
+ int vpc_div = spl_is_subsampled_format(spl_in->basic_in.format) ? 2 : 1;
bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir;
struct spl_fixed31_32 init_adj_h = spl_fixpt_zero;
struct spl_fixed31_32 init_adj_v = spl_fixpt_zero;
@@ -585,12 +618,7 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in,
&flip_vert_scan_dir,
&flip_horz_scan_dir);
- if (orthogonal_rotation) {
- spl_swap(src.width, src.height);
- spl_swap(flip_vert_scan_dir, flip_horz_scan_dir);
- }
-
- if (spl_is_yuv420(spl_in->basic_in.format)) {
+ if (spl_is_subsampled_format(spl_in->basic_in.format)) {
/* this gives the direction of the cositing (negative will move
* left, right otherwise)
*/
@@ -598,15 +626,15 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in,
switch (spl_in->basic_in.cositing) {
- case CHROMA_COSITING_LEFT:
- init_adj_h = spl_fixpt_zero;
+ case CHROMA_COSITING_TOPLEFT:
+ init_adj_h = spl_fixpt_from_fraction(sign, 4);
init_adj_v = spl_fixpt_from_fraction(sign, 4);
break;
- case CHROMA_COSITING_NONE:
+ case CHROMA_COSITING_LEFT:
init_adj_h = spl_fixpt_from_fraction(sign, 4);
- init_adj_v = spl_fixpt_from_fraction(sign, 4);
+ init_adj_v = spl_fixpt_zero;
break;
- case CHROMA_COSITING_TOPLEFT:
+ case CHROMA_COSITING_NONE:
default:
init_adj_h = spl_fixpt_zero;
init_adj_v = spl_fixpt_zero;
@@ -614,6 +642,12 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in,
}
}
+ if (orthogonal_rotation) {
+ spl_swap(src.width, src.height);
+ spl_swap(flip_vert_scan_dir, flip_horz_scan_dir);
+ spl_swap(init_adj_h, init_adj_v);
+ }
+
spl_calculate_init_and_vp(
flip_horz_scan_dir,
recout_clip_in_recout_dst.x,
@@ -678,7 +712,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout)
* since 3d is special and needs to calculate vp as if there is no recout offset
* This may break with rotation, good thing we aren't mixing hw rotation and 3d
*/
- if (spl_in->basic_in.mpc_combine_v) {
+ if (spl_in->basic_in.mpc_h_slice_index) {
SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 ||
(spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM &&
spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE));
@@ -698,24 +732,6 @@ static void spl_clamp_viewport(struct spl_rect *viewport)
viewport->width = MIN_VIEWPORT_SIZE;
}
-static bool spl_dscl_is_420_format(enum spl_pixel_format format)
-{
- if (format == SPL_PIXEL_FORMAT_420BPP8 ||
- format == SPL_PIXEL_FORMAT_420BPP10)
- return true;
- else
- return false;
-}
-
-static bool spl_dscl_is_video_format(enum spl_pixel_format format)
-{
- if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN
- && format <= SPL_PIXEL_FORMAT_VIDEO_END)
- return true;
- else
- return false;
-}
-
static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in,
const struct spl_scaler_data *data,
bool enable_isharp, bool enable_easf)
@@ -732,8 +748,8 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in,
&& !enable_isharp)
return SCL_MODE_SCALING_444_BYPASS;
- if (!spl_dscl_is_420_format(pixel_format)) {
- if (spl_dscl_is_video_format(pixel_format))
+ if (!spl_is_subsampled_format(pixel_format)) {
+ if (spl_is_video_format(pixel_format))
return SCL_MODE_SCALING_444_YCBCR_ENABLE;
else
return SCL_MODE_SCALING_444_RGB_ENABLE;
@@ -756,7 +772,7 @@ static bool spl_choose_lls_policy(enum spl_pixel_format format,
enum spl_transfer_func_predefined tf_predefined_type,
enum linear_light_scaling *lls_pref)
{
- if (spl_is_yuv420(format)) {
+ if (spl_is_video_format(format)) {
*lls_pref = LLS_PREF_NO;
if ((tf_type == SPL_TF_TYPE_PREDEFINED) ||
(tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS))
@@ -815,7 +831,7 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch)
/* Check if video is in fullscreen mode */
static bool spl_is_video_fullscreen(struct spl_in *spl_in)
{
- if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen)
+ if (spl_is_video_format(spl_in->basic_in.format) && spl_in->is_fullscreen)
return true;
return false;
}
@@ -846,10 +862,10 @@ static bool spl_get_isharp_en(struct spl_in *spl_in,
* Apply sharpness to RGB and YUV (NV12/P010)
* surfaces based on policy setting
*/
- if (!spl_is_yuv420(spl_in->basic_in.format) &&
+ if (!spl_is_video_format(spl_in->basic_in.format) &&
(spl_in->sharpen_policy == SHARPEN_YUV))
return enable_isharp;
- else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) &&
+ else if ((spl_is_video_format(spl_in->basic_in.format) && !fullscreen) &&
(spl_in->sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV))
return enable_isharp;
else if (!spl_in->is_fullscreen &&
@@ -882,8 +898,8 @@ static void spl_get_taps_non_adaptive_scaler(
if (in_taps->v_taps == 0) {
if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1)
- spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int(
- spl_scratch->scl_data.ratios.vert, 2)), 8);
+ spl_scratch->scl_data.taps.v_taps = spl_min(2 * spl_fixpt_ceil(
+ spl_scratch->scl_data.ratios.vert), 8);
else
spl_scratch->scl_data.taps.v_taps = 4;
} else
@@ -891,8 +907,8 @@ static void spl_get_taps_non_adaptive_scaler(
if (in_taps->v_taps_c == 0) {
if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1)
- spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int(
- spl_scratch->scl_data.ratios.vert_c, 2)), 8);
+ spl_scratch->scl_data.taps.v_taps_c = spl_min(2 * spl_fixpt_ceil(
+ spl_scratch->scl_data.ratios.vert_c), 8);
else
spl_scratch->scl_data.taps.v_taps_c = 4;
} else
@@ -932,7 +948,7 @@ static bool spl_get_optimal_number_of_taps(
int min_taps_y, min_taps_c;
enum lb_memory_config lb_config;
bool skip_easf = false;
- bool is_ycbcr = spl_dscl_is_video_format(spl_in->basic_in.format);
+ bool is_subsampled = spl_is_subsampled_format(spl_in->basic_in.format);
if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active &&
max_downscale_src_width != 0 &&
@@ -964,7 +980,7 @@ static bool spl_get_optimal_number_of_taps(
if (skip_easf)
spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps);
else {
- if (spl_is_yuv420(spl_in->basic_in.format)) {
+ if (spl_is_video_format(spl_in->basic_in.format)) {
spl_scratch->scl_data.taps.h_taps = 6;
spl_scratch->scl_data.taps.v_taps = 6;
spl_scratch->scl_data.taps.h_taps_c = 4;
@@ -982,8 +998,7 @@ static bool spl_get_optimal_number_of_taps(
min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c);
/* Use LB_MEMORY_CONFIG_3 for 4:2:0 */
- if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8)
- || (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10))
+ if (spl_is_yuv420(spl_in->basic_in.format))
lb_config = LB_MEMORY_CONFIG_3;
else
lb_config = LB_MEMORY_CONFIG_0;
@@ -1039,13 +1054,11 @@ static bool spl_get_optimal_number_of_taps(
if (spl_scratch->scl_data.taps.h_taps_c == 5)
spl_scratch->scl_data.taps.h_taps_c = 4;
- if (spl_is_yuv420(spl_in->basic_in.format)) {
- if ((spl_scratch->scl_data.taps.h_taps <= 4) ||
- (spl_scratch->scl_data.taps.h_taps_c <= 3)) {
+ if (spl_is_video_format(spl_in->basic_in.format)) {
+ if (spl_scratch->scl_data.taps.h_taps <= 4) {
*enable_easf_v = false;
*enable_easf_h = false;
- } else if ((spl_scratch->scl_data.taps.v_taps <= 3) ||
- (spl_scratch->scl_data.taps.v_taps_c <= 3)) {
+ } else if (spl_scratch->scl_data.taps.v_taps <= 3) {
*enable_easf_v = false;
*enable_easf_h = true;
} else {
@@ -1086,10 +1099,10 @@ static bool spl_get_optimal_number_of_taps(
spl_scratch->scl_data.taps.h_taps = 1;
spl_scratch->scl_data.taps.v_taps = 1;
- if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_ycbcr)
+ if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_subsampled)
spl_scratch->scl_data.taps.h_taps_c = 1;
- if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_ycbcr)
+ if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_subsampled)
spl_scratch->scl_data.taps.v_taps_c = 1;
*enable_easf_v = false;
@@ -1103,11 +1116,11 @@ static bool spl_get_optimal_number_of_taps(
(IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert)))
spl_scratch->scl_data.taps.v_taps = 1;
- if ((!*enable_easf_h) && !is_ycbcr &&
+ if ((!*enable_easf_h) && !is_subsampled &&
(IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c)))
spl_scratch->scl_data.taps.h_taps_c = 1;
- if ((!*enable_easf_v) && !is_ycbcr &&
+ if ((!*enable_easf_v) && !is_subsampled &&
(IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c)))
spl_scratch->scl_data.taps.v_taps_c = 1;
}
@@ -1118,7 +1131,7 @@ static bool spl_get_optimal_number_of_taps(
static void spl_set_black_color_data(enum spl_pixel_format format,
struct scl_black_color *scl_black_color)
{
- bool ycbcr = spl_dscl_is_video_format(format);
+ bool ycbcr = spl_is_video_format(format);
if (ycbcr) {
scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y;
scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR;
@@ -1585,7 +1598,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s
0x0; // fp1.5.10, C3 coefficient
}
- if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */
+ if (spl_is_subsampled_format(format)) { /* TODO: 0 = RGB, 1 = YUV */
dscl_prog_data->easf_matrix_mode = 1;
/*
* 2-bit, BF3 chroma mode correction calculation mode
diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h
index 55d557df4aa5..467af9dd90de 100644
--- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h
+++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h
@@ -63,13 +63,13 @@ enum spl_pixel_format {
SPL_PIXEL_FORMAT_420BPP8,
SPL_PIXEL_FORMAT_420BPP10,
/*end of pixel format definition*/
- SPL_PIXEL_FORMAT_INVALID,
- SPL_PIXEL_FORMAT_422BPP8,
- SPL_PIXEL_FORMAT_422BPP10,
SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8,
SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16,
+ SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN = SPL_PIXEL_FORMAT_420BPP8,
+ SPL_PIXEL_FORMAT_SUBSAMPLED_END = SPL_PIXEL_FORMAT_420BPP10,
SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8,
SPL_PIXEL_FORMAT_VIDEO_END = SPL_PIXEL_FORMAT_420BPP10,
+ SPL_PIXEL_FORMAT_INVALID,
SPL_PIXEL_FORMAT_UNKNOWN
};
@@ -436,8 +436,14 @@ struct basic_in {
struct spl_rect clip_rect; // Clip rect
enum spl_rotation_angle rotation; // Rotation
bool horizontal_mirror; // Horizontal mirror
- int mpc_combine_h; // MPC Horizontal Combine Factor (split_count)
- int mpc_combine_v; // MPC Vertical Combine Factor (split_idx)
+ struct { // previous mpc_combine_h - split count
+ bool use_recout_width_aligned;
+ union {
+ int mpc_num_h_slices;
+ int mpc_recout_width_align;
+ } num_slices_recout_width;
+ } num_h_slices_recout_width_align;
+ int mpc_h_slice_index; // previous mpc_combine_v - split_idx
// Inputs for adaptive scaler - TODO
enum spl_transfer_func_type tf_type; /* Transfer function type */
enum spl_transfer_func_predefined tf_predefined_type; /* Transfer function predefined type */
diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index b353c4ceb60d..4b3ccbca0da2 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -69,6 +69,9 @@
#define DMUB_PC_SNAPSHOT_COUNT 10
+/* Default tracebuffer size if meta is absent. */
+#define DMUB_TRACE_BUFFER_SIZE (64 * 1024)
+
/* Forward declarations */
struct dmub_srv;
struct dmub_srv_common_regs;
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index b800a507d1e0..d0fe324cb537 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -431,7 +431,68 @@ union replay_debug_flags {
*/
uint32_t enable_ips_residency_profiling : 1;
- uint32_t reserved : 20;
+ /**
+ * 0x1000 (bit 12)
+ * @enable_coasting_vtotal_check: Enable Coasting_vtotal_check
+ */
+ uint32_t enable_coasting_vtotal_check : 1;
+ /**
+ * 0x2000 (bit 13)
+ * @enable_visual_confirm_debug: Enable Visual Confirm Debug
+ */
+ uint32_t enable_visual_confirm_debug : 1;
+
+ uint32_t reserved : 18;
+ } bitfields;
+
+ uint32_t u32All;
+};
+
+/**
+ * Flags record error state.
+ */
+union replay_visual_confirm_error_state_flags {
+ struct {
+ /**
+ * 0x1 (bit 0) - Desync Error flag.
+ */
+ uint32_t desync_error : 1;
+
+ /**
+ * 0x2 (bit 1) - State Transition Error flag.
+ */
+ uint32_t state_transition_error : 1;
+
+ /**
+ * 0x4 (bit 2) - Crc Error flag
+ */
+ uint32_t crc_error : 1;
+
+ /**
+ * 0x8 (bit 3) - Reserved
+ */
+ uint32_t reserved_3 : 1;
+
+ /**
+ * 0x10 (bit 4) - Incorrect Coasting vtotal checking --> use debug flag to control DPCD write.
+ * Added new debug flag to control DPCD.
+ */
+ uint32_t incorrect_vtotal_in_static_screen : 1;
+
+ /**
+ * 0x20 (bit 5) - No doubled Refresh Rate.
+ */
+ uint32_t no_double_rr : 1;
+
+ /**
+ * Reserved bit 6-7
+ */
+ uint32_t reserved_6_7 : 2;
+
+ /**
+ * Reserved bit 9-31
+ */
+ uint32_t reserved_9_31 : 24;
} bitfields;
uint32_t u32All;
@@ -475,11 +536,23 @@ union replay_hw_flags {
* Use TPS3 signal when restore main link.
*/
uint32_t force_wakeup_by_tps3 : 1;
+ /**
+ * @is_alpm_initialized: Indicates whether ALPM is initialized
+ */
+ uint32_t is_alpm_initialized : 1;
} bitfields;
uint32_t u32All;
};
+union fw_assisted_mclk_switch_version {
+ struct {
+ uint8_t minor : 5;
+ uint8_t major : 3;
+ };
+ uint8_t ver;
+};
+
/**
* DMUB feature capabilities.
* After DMUB init, driver will query FW capabilities prior to enabling certain features.
@@ -1823,52 +1896,11 @@ enum fams2_stream_type {
FAMS2_STREAM_TYPE_SUBVP = 4,
};
-/* dynamic stream state */
-struct dmub_fams2_legacy_stream_dynamic_state {
- uint8_t force_allow_at_vblank;
- uint8_t pad[3];
-};
-
-struct dmub_fams2_subvp_stream_dynamic_state {
- uint16_t viewport_start_hubp_vline;
- uint16_t viewport_height_hubp_vlines;
- uint16_t viewport_start_c_hubp_vline;
- uint16_t viewport_height_c_hubp_vlines;
- uint16_t phantom_viewport_height_hubp_vlines;
- uint16_t phantom_viewport_height_c_hubp_vlines;
- uint16_t microschedule_start_otg_vline;
- uint16_t mall_start_otg_vline;
- uint16_t mall_start_hubp_vline;
- uint16_t mall_start_c_hubp_vline;
- uint8_t force_allow_at_vblank_only;
- uint8_t pad[3];
-};
-
-struct dmub_fams2_drr_stream_dynamic_state {
- uint16_t stretched_vtotal;
- uint8_t use_cur_vtotal;
- uint8_t pad;
-};
-
-struct dmub_fams2_stream_dynamic_state {
- uint64_t ref_tick;
- uint32_t cur_vtotal;
- uint16_t adjusted_allow_end_otg_vline;
- uint8_t pad[2];
- struct dmub_optc_position ref_otg_pos;
- struct dmub_optc_position target_otg_pos;
- union {
- struct dmub_fams2_legacy_stream_dynamic_state legacy;
- struct dmub_fams2_subvp_stream_dynamic_state subvp;
- struct dmub_fams2_drr_stream_dynamic_state drr;
- } sub_state;
-};
-
/* static stream state */
struct dmub_fams2_legacy_stream_static_state {
uint8_t vactive_det_fill_delay_otg_vlines;
uint8_t programming_delay_otg_vlines;
-};
+}; //v0
struct dmub_fams2_subvp_stream_static_state {
uint16_t vratio_numerator;
@@ -1887,14 +1919,59 @@ struct dmub_fams2_subvp_stream_static_state {
uint8_t phantom_otg_inst;
uint8_t phantom_pipe_mask;
uint8_t phantom_plane_pipe_masks[DMUB_MAX_PHANTOM_PLANES]; // phantom pipe mask per plane (for flip passthrough)
-};
+}; //v0
struct dmub_fams2_drr_stream_static_state {
uint16_t nom_stretched_vtotal;
uint8_t programming_delay_otg_vlines;
uint8_t only_stretch_if_required;
uint8_t pad[2];
-};
+}; //v0
+
+struct dmub_fams2_cmd_legacy_stream_static_state {
+ uint16_t vactive_det_fill_delay_otg_vlines;
+ uint16_t programming_delay_otg_vlines;
+}; //v1
+
+struct dmub_fams2_cmd_subvp_stream_static_state {
+ uint16_t vratio_numerator;
+ uint16_t vratio_denominator;
+ uint16_t phantom_vtotal;
+ uint16_t phantom_vactive;
+ uint16_t programming_delay_otg_vlines;
+ uint16_t prefetch_to_mall_otg_vlines;
+ union {
+ struct {
+ uint8_t is_multi_planar : 1;
+ uint8_t is_yuv420 : 1;
+ } bits;
+ uint8_t all;
+ } config;
+ uint8_t phantom_otg_inst;
+ uint8_t phantom_pipe_mask;
+ uint8_t pad0;
+ uint8_t phantom_plane_pipe_masks[DMUB_MAX_PHANTOM_PLANES]; // phantom pipe mask per plane (for flip passthrough)
+ uint8_t pad1[4 - (DMUB_MAX_PHANTOM_PLANES % 4)];
+}; //v1
+
+struct dmub_fams2_cmd_drr_stream_static_state {
+ uint16_t nom_stretched_vtotal;
+ uint16_t programming_delay_otg_vlines;
+ uint8_t only_stretch_if_required;
+ uint8_t pad[3];
+}; //v1
+
+union dmub_fams2_stream_static_sub_state {
+ struct dmub_fams2_legacy_stream_static_state legacy;
+ struct dmub_fams2_subvp_stream_static_state subvp;
+ struct dmub_fams2_drr_stream_static_state drr;
+}; //v0
+
+union dmub_fams2_cmd_stream_static_sub_state {
+ struct dmub_fams2_cmd_legacy_stream_static_state legacy;
+ struct dmub_fams2_cmd_subvp_stream_static_state subvp;
+ struct dmub_fams2_cmd_drr_stream_static_state drr;
+}; //v1
struct dmub_fams2_stream_static_state {
enum fams2_stream_type type;
@@ -1924,13 +2001,45 @@ struct dmub_fams2_stream_static_state {
uint8_t pipe_mask; // pipe mask for the whole config
uint8_t num_planes;
uint8_t plane_pipe_masks[DMUB_MAX_PLANES]; // pipe mask per plane (for flip passthrough)
- uint8_t pad[DMUB_MAX_PLANES % 4];
+ uint8_t pad[4 - (DMUB_MAX_PLANES % 4)];
+ union dmub_fams2_stream_static_sub_state sub_state;
+}; //v0
+
+struct dmub_fams2_cmd_stream_static_base_state {
+ enum fams2_stream_type type;
+ uint32_t otg_vline_time_ns;
+ uint32_t otg_vline_time_ticks;
+ uint16_t htotal;
+ uint16_t vtotal; // nominal vtotal
+ uint16_t vblank_start;
+ uint16_t vblank_end;
+ uint16_t max_vtotal;
+ uint16_t allow_start_otg_vline;
+ uint16_t allow_end_otg_vline;
+ uint16_t drr_keepout_otg_vline; // after this vline, vtotal cannot be changed
+ uint16_t scheduling_delay_otg_vlines; // min time to budget for ready to microschedule start
+ uint16_t contention_delay_otg_vlines; // time to budget for contention on execution
+ uint16_t vline_int_ack_delay_otg_vlines; // min time to budget for vertical interrupt firing
+ uint16_t allow_to_target_delay_otg_vlines; // time from allow vline to target vline
union {
- struct dmub_fams2_legacy_stream_static_state legacy;
- struct dmub_fams2_subvp_stream_static_state subvp;
- struct dmub_fams2_drr_stream_static_state drr;
- } sub_state;
-};
+ struct {
+ uint8_t is_drr : 1; // stream is DRR enabled
+ uint8_t clamp_vtotal_min : 1; // clamp vtotal to min instead of nominal
+ uint8_t min_ttu_vblank_usable : 1; // if min ttu vblank is above wm, no force pstate is needed in blank
+ } bits;
+ uint8_t all;
+ } config;
+ uint8_t otg_inst;
+ uint8_t pipe_mask; // pipe mask for the whole config
+ uint8_t num_planes;
+ uint8_t plane_pipe_masks[DMUB_MAX_PLANES]; // pipe mask per plane (for flip passthrough)
+ uint8_t pad[4 - (DMUB_MAX_PLANES % 4)];
+}; //v1
+
+struct dmub_fams2_stream_static_state_v1 {
+ struct dmub_fams2_cmd_stream_static_base_state base;
+ union dmub_fams2_cmd_stream_static_sub_state sub_state;
+}; //v1
/**
* enum dmub_fams2_allow_delay_check_mode - macroscheduler mode for breaking on excessive
@@ -1970,7 +2079,11 @@ struct dmub_cmd_fams2_global_config {
union dmub_cmd_fams2_config {
struct dmub_cmd_fams2_global_config global;
- struct dmub_fams2_stream_static_state stream;
+ struct dmub_fams2_stream_static_state stream; //v0
+ union {
+ struct dmub_fams2_cmd_stream_static_base_state base;
+ union dmub_fams2_cmd_stream_static_sub_state sub_state;
+ } stream_v1; //v1
};
/**
@@ -3592,6 +3705,8 @@ enum dmub_cmd_replay_general_subtype {
*/
REPLAY_GENERAL_CMD_DISABLED_ADAPTIVE_SYNC_SDP,
REPLAY_GENERAL_CMD_DISABLED_DESYNC_ERROR_DETECTION,
+ REPLAY_GENERAL_CMD_UPDATE_ERROR_STATUS,
+ REPLAY_GENERAL_CMD_SET_LOW_RR_ACTIVATE,
};
/**
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index a3f3ff5d49ac..15ea216e903d 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -61,10 +61,6 @@
/* Default state size if meta is absent. */
#define DMUB_FW_STATE_SIZE (64 * 1024)
-/* Default tracebuffer size if meta is absent. */
-#define DMUB_TRACE_BUFFER_SIZE (64 * 1024)
-
-
/* Default scratch mem size. */
#define DMUB_SCRATCH_MEM_SIZE (1024)
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index f980a84dceef..2b3964529539 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -122,6 +122,17 @@ static unsigned int calc_duration_in_us_from_v_total(
return duration_in_us;
}
+static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
+{
+ unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
+
+ if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
+ max_hw_v_total -= stream->timing.v_front_porch + 1;
+ }
+
+ return max_hw_v_total;
+}
+
unsigned int mod_freesync_calc_v_total_from_refresh(
const struct dc_stream_state *stream,
unsigned int refresh_in_uhz)
@@ -1016,7 +1027,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
if (stream->ctx->dc->caps.max_v_total != 0 && stream->timing.h_total != 0) {
min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
- (stream->timing.h_total * (long long)stream->ctx->dc->caps.max_v_total));
+ (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream)));
}
/* Limit minimum refresh rate to what can be supported by hardware */
min_refresh_in_uhz = min_hardware_refresh_in_uhz > in_config->min_refresh_in_uhz ?
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 95838c7ab054..29ccd3532d13 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -996,9 +996,9 @@ void set_replay_coasting_vtotal(struct dc_link *link,
link->replay_settings.coasting_vtotal_table[type] = vtotal;
}
-void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal)
+void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal)
{
- link->replay_settings.abm_with_ips_on_full_screen_video_pseudo_vtotal = vtotal;
+ link->replay_settings.low_rr_full_screen_video_pseudo_vtotal = vtotal;
}
void calculate_replay_link_off_frame_count(struct dc_link *link,
@@ -1039,3 +1039,8 @@ bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_back
memcpy(caps->data_points, custom_backlight_profiles[config_no].data_points, data_points_size);
return true;
}
+
+void reset_replay_dsync_error_count(struct dc_link *link)
+{
+ link->replay_settings.replay_desync_error_fail_count = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
index cac302e8fa10..758a8aa31fbe 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h
@@ -62,7 +62,7 @@ void set_replay_defer_update_coasting_vtotal(struct dc_link *link,
uint32_t vtotal);
void update_replay_coasting_vtotal_from_defer(struct dc_link *link,
enum replay_coasting_vtotal_type type);
-void set_replay_ips_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal);
+void set_replay_low_rr_full_screen_video_src_vtotal(struct dc_link *link, uint16_t vtotal);
void calculate_replay_link_off_frame_count(struct dc_link *link,
uint16_t vtotal, uint16_t htotal);
@@ -78,4 +78,5 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link,
bool fill_custom_backlight_caps(unsigned int config_no,
struct dm_acpi_atif_backlight_caps *caps);
+void reset_replay_dsync_error_count(struct dc_link *link);
#endif /* MODULES_POWER_POWER_HELPERS_H_ */
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 7eefcb0f5070..05bdb4e020ae 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -344,6 +344,11 @@ enum DC_DEBUG_MASK {
* eDP display from ACPI _DDC method.
*/
DC_DISABLE_ACPI_EDID = 0x8000,
+
+ /*
+ * @DC_DISABLE_HDMI_CEC: If set, disable HDMI-CEC feature in amdgpu driver.
+ */
+ DC_DISABLE_HDMI_CEC = 0x10000,
};
enum amd_dpm_forced_level;
@@ -401,9 +406,9 @@ struct amd_ip_funcs {
int (*pre_soft_reset)(struct amdgpu_ip_block *ip_block);
int (*soft_reset)(struct amdgpu_ip_block *ip_block);
int (*post_soft_reset)(struct amdgpu_ip_block *ip_block);
- int (*set_clockgating_state)(void *handle,
+ int (*set_clockgating_state)(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
- int (*set_powergating_state)(void *handle,
+ int (*set_powergating_state)(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
void (*get_clockgating_state)(void *handle, u64 *flags);
void (*dump_ip_state)(struct amdgpu_ip_block *ip_block);
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h
index cae1a7e74323..73c5dd5e83d4 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h
@@ -19,8 +19,8 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifndef _dcn_2_0_3_OFFSET_HEADER
-#define _dcn_2_0_3_OFFSET_HEADER
+#ifndef _dcn_2_0_1_OFFSET_HEADER
+#define _dcn_2_0_1_OFFSET_HEADER
// addressBlock: dce_dc_dccg_dccg_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h
index ca1e1eb39256..290d807800a6 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h
@@ -18,8 +18,8 @@
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifndef _dcn_2_0_3_SH_MASK_HEADER
-#define _dcn_2_0_3_SH_MASK_HEADER
+#ifndef _dcn_2_0_1_SH_MASK_HEADER
+#define _dcn_2_0_1_SH_MASK_HEADER
// addressBlock: dce_dc_dccg_dccg_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h
new file mode 100644
index 000000000000..0e8f12728d5f
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _umc_8_14_0_OFFSET_HEADER
+#define _umc_8_14_0_OFFSET_HEADER
+
+#define regUMCCH0_GeccErrCntSel 0x0328
+#define regUMCCH0_GeccErrCntSel_BASE_IDX 0
+#define regUMCCH0_GeccErrCnt 0x0329
+#define regUMCCH0_GeccErrCnt_BASE_IDX 0
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h
new file mode 100644
index 000000000000..5d723b5d9b87
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _umc_8_14_0_SH_MASK_HEADER
+#define _umc_8_14_0_SH_MASK_HEADER
+
+//UMCCH0_GeccErrCntSel
+#define UMCCH0_GeccErrCntSel__GeccErrInt__SHIFT 0xc
+#define UMCCH0_GeccErrCntSel__GeccErrCntEn__SHIFT 0xf
+#define UMCCH0_GeccErrCntSel__PoisonCntEn__SHIFT 0x10
+#define UMCCH0_GeccErrCntSel__GeccErrInt_MASK 0x00003000L
+#define UMCCH0_GeccErrCntSel__GeccErrCntEn_MASK 0x00008000L
+#define UMCCH0_GeccErrCntSel__PoisonCntEn_MASK 0x00030000L
+//UMCCH0_GeccErrCnt
+#define UMCCH0_GeccErrCnt__GeccErrCnt__SHIFT 0x0
+#define UMCCH0_GeccErrCnt__GeccUnCorrErrCnt__SHIFT 0x10
+#define UMCCH0_GeccErrCnt__GeccErrCnt_MASK 0x0000FFFFL
+#define UMCCH0_GeccErrCnt__GeccUnCorrErrCnt_MASK 0xFFFF0000L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index b0fc22383e28..0160d65f3f5e 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -1300,12 +1300,17 @@ struct atom_ext_display_path
//usCaps
enum ext_display_path_cap_def {
- EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE = 0x0001,
- EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = 0x0002,
- EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007C,
- EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x01 << 2), //PI redriver chip
- EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x02 << 2), //TI retimer chip
- EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 = (0x03 << 2) //Parade DP->HDMI recoverter chip
+ EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007E,
+ AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007E,
+ AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = (0x01 << 1),
+ AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x02 << 1),
+ AMD_EXT_DISPLAY_PATH_CAPS__DP_EARLY_8B10B_TPS2 = (0x03 << 1),
+ AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x04 << 1),
+ AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 = (0x06 << 1),
+ EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = (0x07 << 1),
+ EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x08 << 1), //PI redriver chip
+ EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x09 << 1), //TI retimer chip
+ EXT_DISPLAY_PATH_CAPS__AMD_INTERNAL = (0x0a << 1), //AMD internal customer chip placeholder
};
struct atom_external_display_connection_info
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h
new file mode 100644
index 000000000000..64b553e7de1a
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __IRQSRCS_VCN_5_0_H__
+#define __IRQSRCS_VCN_5_0_H__
+
+#define VCN_5_0__SRCID__UVD_TRAP 114 // 0x72 UVD_TRAP
+#define VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE 119 // 0x77 Encoder General Purpose
+#define VCN_5_0__SRCID__UVD_ENC_LOW_LATENCY 120 // 0x78 Encoder Low Latency
+#define VCN_5_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT 124 // 0x7c UVD system message interrupt
+#define VCN_5_0__SRCID__JPEG_ENCODE 151 // 0x97 JRBC Encode interrupt
+#define VCN_5_0__SRCID__JPEG_DECODE 153 // 0x99 JRBC Decode interrupt
+#define VCN_5_0__SRCID__JPEG1_DECODE 149 // 0x95 JRBC1 Decode interrupt
+#define VCN_5_0__SRCID__JPEG2_DECODE 151 // 0x97 JRBC2 Decode interrupt
+#define VCN_5_0__SRCID__JPEG3_DECODE 171 // 0xab JRBC3 Decode interrupt
+#define VCN_5_0__SRCID__JPEG4_DECODE 172 // 0xac JRBC4 Decode interrupt
+#define VCN_5_0__SRCID__JPEG5_DECODE 173 // 0xad JRBC5 Decode interrupt
+#define VCN_5_0__SRCID__JPEG6_DECODE 174 // 0xae JRBC6 Decode interrupt
+#define VCN_5_0__SRCID__JPEG7_DECODE 175 // 0xaf JRBC7 Decode interrupt
+#define VCN_5_0__SRCID__JPEG8_DECODE 177 // 0xb1 JRBC8 Decode interrupt
+#define VCN_5_0__SRCID__JPEG9_DECODE 178 // 0xb2 JRBC9 Decode interrupt
+
+#define VCN_5_0__SRCID_UVD_POISON 160
+#define VCN_5_0__SRCID_DJPEG0_POISON 161
+#define VCN_5_0__SRCID_EJPEG0_POISON 162
+#endif
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 67a5de573943..9189dcb65188 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -164,6 +164,7 @@ enum amd_pp_task {
};
enum PP_SMC_POWER_PROFILE {
+ PP_SMC_POWER_PROFILE_UNKNOWN = -1,
PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0,
PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1,
PP_SMC_POWER_PROFILE_POWERSAVING = 0x2,
@@ -420,7 +421,9 @@ struct amd_pm_funcs {
int (*load_firmware)(void *handle);
int (*wait_for_fw_loading_complete)(void *handle);
int (*set_powergating_by_smu)(void *handle,
- uint32_t block_type, bool gate);
+ uint32_t block_type,
+ bool gate,
+ int inst);
int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id);
int (*set_power_limit)(void *handle, uint32_t n);
int (*get_power_limit)(void *handle, uint32_t *limit,
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 9dc82f4d7c93..6a9e26905edf 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -70,13 +70,18 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low)
return ret;
}
-int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block_type, bool gate)
+int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev,
+ uint32_t block_type,
+ bool gate,
+ int inst)
{
int ret = 0;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
enum ip_power_state pwr_state = gate ? POWER_STATE_OFF : POWER_STATE_ON;
+ bool is_vcn = (block_type == AMD_IP_BLOCK_TYPE_UVD || block_type == AMD_IP_BLOCK_TYPE_VCN);
- if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state) {
+ if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state &&
+ (!is_vcn || adev->vcn.num_vcn_inst == 1)) {
dev_dbg(adev->dev, "IP block%d already in the target %s state!",
block_type, gate ? "gate" : "ungate");
return 0;
@@ -88,7 +93,6 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
case AMD_IP_BLOCK_TYPE_UVD:
case AMD_IP_BLOCK_TYPE_VCE:
case AMD_IP_BLOCK_TYPE_GFX:
- case AMD_IP_BLOCK_TYPE_VCN:
case AMD_IP_BLOCK_TYPE_SDMA:
case AMD_IP_BLOCK_TYPE_JPEG:
case AMD_IP_BLOCK_TYPE_GMC:
@@ -96,7 +100,12 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block
case AMD_IP_BLOCK_TYPE_VPE:
if (pp_funcs && pp_funcs->set_powergating_by_smu)
ret = (pp_funcs->set_powergating_by_smu(
- (adev)->powerplay.pp_handle, block_type, gate));
+ (adev)->powerplay.pp_handle, block_type, gate, 0));
+ break;
+ case AMD_IP_BLOCK_TYPE_VCN:
+ if (pp_funcs && pp_funcs->set_powergating_by_smu)
+ ret = (pp_funcs->set_powergating_by_smu(
+ (adev)->powerplay.pp_handle, block_type, gate, inst));
break;
default:
break;
@@ -566,7 +575,17 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
return;
}
- ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable, 0);
+ if (ret)
+ DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
+ enable ? "enable" : "disable", ret);
+}
+
+void amdgpu_dpm_enable_vcn(struct amdgpu_device *adev, bool enable, int inst)
+{
+ int ret = 0;
+
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCN, !enable, inst);
if (ret)
DRM_ERROR("Dpm %s uvd failed, ret = %d. \n",
enable ? "enable" : "disable", ret);
@@ -591,7 +610,7 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
return;
}
- ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable, 0);
if (ret)
DRM_ERROR("Dpm %s vce failed, ret = %d. \n",
enable ? "enable" : "disable", ret);
@@ -601,7 +620,7 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
- ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable);
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable, 0);
if (ret)
DRM_ERROR("Dpm %s jpeg failed, ret = %d. \n",
enable ? "enable" : "disable", ret);
@@ -611,7 +630,7 @@ void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable)
{
int ret = 0;
- ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable);
+ ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable, 0);
if (ret)
DRM_ERROR("Dpm %s vpe failed, ret = %d.\n",
enable ? "enable" : "disable", ret);
@@ -700,6 +719,21 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev)
return ret;
}
+int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask)
+{
+ struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret;
+
+ if (!is_support_sw_smu(adev))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&adev->pm.mutex);
+ ret = smu_reset_sdma(smu, inst_mask);
+ mutex_unlock(&adev->pm.mutex);
+
+ return ret;
+}
+
int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
enum pp_clock_type type,
uint32_t *min,
@@ -953,6 +987,24 @@ enum amd_dpm_forced_level amdgpu_dpm_get_performance_level(struct amdgpu_device
return level;
}
+static void amdgpu_dpm_enter_umd_state(struct amdgpu_device *adev)
+{
+ /* enter UMD Pstate */
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+ AMD_PG_STATE_UNGATE);
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+ AMD_CG_STATE_UNGATE);
+}
+
+static void amdgpu_dpm_exit_umd_state(struct amdgpu_device *adev)
+{
+ /* exit UMD Pstate */
+ amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+ AMD_CG_STATE_GATE);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+ AMD_PG_STATE_GATE);
+}
+
int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev,
enum amd_dpm_forced_level level)
{
@@ -973,6 +1025,10 @@ int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev,
if (current_level == level)
return 0;
+ if (!(current_level & profile_mode_mask) &&
+ (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT))
+ return -EINVAL;
+
if (adev->asic_type == CHIP_RAVEN) {
if (!(adev->apu_flags & AMD_APU_IS_RAVEN2)) {
if (current_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
@@ -984,35 +1040,25 @@ int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev,
}
}
- if (!(current_level & profile_mode_mask) &&
- (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT))
- return -EINVAL;
-
- if (!(current_level & profile_mode_mask) &&
- (level & profile_mode_mask)) {
- /* enter UMD Pstate */
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_PG_STATE_UNGATE);
- amdgpu_device_ip_set_clockgating_state(adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_CG_STATE_UNGATE);
- } else if ((current_level & profile_mode_mask) &&
- !(level & profile_mode_mask)) {
- /* exit UMD Pstate */
- amdgpu_device_ip_set_clockgating_state(adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_CG_STATE_GATE);
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_PG_STATE_GATE);
- }
+ if (!(current_level & profile_mode_mask) && (level & profile_mode_mask))
+ amdgpu_dpm_enter_umd_state(adev);
+ else if ((current_level & profile_mode_mask) &&
+ !(level & profile_mode_mask))
+ amdgpu_dpm_exit_umd_state(adev);
mutex_lock(&adev->pm.mutex);
if (pp_funcs->force_performance_level(adev->powerplay.pp_handle,
level)) {
mutex_unlock(&adev->pm.mutex);
+ /* If new level failed, retain the umd state as before */
+ if (!(current_level & profile_mode_mask) &&
+ (level & profile_mode_mask))
+ amdgpu_dpm_exit_umd_state(adev);
+ else if ((current_level & profile_mode_mask) &&
+ !(level & profile_mode_mask))
+ amdgpu_dpm_enter_umd_state(adev);
+
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 136e8193867c..e8ae7681bf0a 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -1361,7 +1361,11 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
* create a custom set of heuristics, write a string of numbers to the file
* starting with the number of the custom profile along with a setting
* for each heuristic parameter. Due to differences across asic families
- * the heuristic parameters vary from family to family.
+ * the heuristic parameters vary from family to family. Additionally,
+ * you can apply the custom heuristics to different clock domains. Each
+ * clock domain is considered a distinct operation so if you modify the
+ * gfxclk heuristics and then the memclk heuristics, the all of the
+ * custom heuristics will be retained until you switch to another profile.
*
*/
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 363af8990aa2..1f5ac7e0230d 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -397,7 +397,7 @@ int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit
int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, uint32_t limit);
int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev,
- uint32_t block_type, bool gate);
+ uint32_t block_type, bool gate, int inst);
extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low);
@@ -446,6 +446,7 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev);
void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev);
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable);
+void amdgpu_dpm_enable_vcn(struct amdgpu_device *adev, bool enable, int inst);
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable);
void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable);
@@ -601,5 +602,6 @@ int amdgpu_dpm_set_pm_policy(struct amdgpu_device *adev, int policy_type,
int policy_level);
ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev,
enum pp_pm_policy p_type, char *buf);
+int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask);
#endif
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
index 8908646ad620..67a8e22b1126 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
@@ -3177,13 +3177,13 @@ static int kv_dpm_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int kv_dpm_set_clockgating_state(void *handle,
+static int kv_dpm_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int kv_dpm_set_powergating_state(void *handle,
+static int kv_dpm_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3276,7 +3276,9 @@ static int kv_dpm_read_sensor(void *handle, int idx,
}
static int kv_set_powergating_by_smu(void *handle,
- uint32_t block_type, bool gate)
+ uint32_t block_type,
+ bool gate,
+ int inst)
{
switch (block_type) {
case AMD_IP_BLOCK_TYPE_UVD:
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
index ee23a0f897c5..a87dcf0974bc 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
@@ -7709,7 +7709,8 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s_smc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_smc.bin", chip_name);
if (err) {
DRM_ERROR("si_smc: Failed to load firmware. err = %d\"%s_smc.bin\"\n",
err, chip_name);
@@ -7849,13 +7850,13 @@ static int si_dpm_wait_for_idle(struct amdgpu_ip_block *ip_block)
return 0;
}
-static int si_dpm_set_clockgating_state(void *handle,
+static int si_dpm_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_dpm_set_powergating_state(void *handle,
+static int si_dpm_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index 26624a716fc6..686345f75f26 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -244,7 +244,7 @@ static bool pp_is_idle(void *handle)
return false;
}
-static int pp_set_powergating_state(void *handle,
+static int pp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -267,7 +267,7 @@ static int pp_resume(struct amdgpu_ip_block *ip_block)
return hwmgr_resume(hwmgr);
}
-static int pp_set_clockgating_state(void *handle,
+static int pp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
@@ -1227,7 +1227,9 @@ static void pp_dpm_powergate_sdma(void *handle, bool gate)
}
static int pp_set_powergating_by_smu(void *handle,
- uint32_t block_type, bool gate)
+ uint32_t block_type,
+ bool gate,
+ int inst)
{
int ret = 0;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c
index fe24219c3bf4..4bd92fd782be 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c
@@ -992,6 +992,8 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr
GetIndexIntoMasterTable(DATA, SMU_Info),
&size, &frev, &crev);
+ if (!psmu_info)
+ return -EINVAL;
for (i = 0; i < psmu_info->ucSclkEntryNum; i++) {
table->entry[i].ucVco_setting = psmu_info->asSclkFcwRangeEntry[i].ucVco_setting;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c
index 3007b054c873..776d58ea63ae 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c
@@ -1120,13 +1120,14 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr)
result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT);
result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT);
if (0 != result)
- return result;
+ goto exit_safe_mode;
vega10_didt_set_mask(hwmgr, false);
+exit_safe_mode:
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
- return 0;
+ return result;
}
static int vega10_disable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b8355293518f..8ca793c222ff 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -72,6 +72,10 @@ static int smu_set_power_limit(void *handle, uint32_t limit);
static int smu_set_fan_speed_rpm(void *handle, uint32_t speed);
static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
+static void smu_power_profile_mode_get(struct smu_context *smu,
+ enum PP_SMC_POWER_PROFILE profile_mode);
+static void smu_power_profile_mode_put(struct smu_context *smu,
+ enum PP_SMC_POWER_PROFILE profile_mode);
static int smu_sys_get_pp_feature_mask(void *handle,
char *buf)
@@ -234,7 +238,8 @@ static bool is_vcn_enabled(struct amdgpu_device *adev)
}
static int smu_dpm_set_vcn_enable(struct smu_context *smu,
- bool enable)
+ bool enable,
+ int inst)
{
struct smu_power_context *smu_power = &smu->smu_power;
struct smu_power_gate *power_gate = &smu_power->power_gate;
@@ -249,12 +254,12 @@ static int smu_dpm_set_vcn_enable(struct smu_context *smu,
if (!smu->ppt_funcs->dpm_set_vcn_enable)
return 0;
- if (atomic_read(&power_gate->vcn_gated) ^ enable)
+ if (atomic_read(&power_gate->vcn_gated[inst]) ^ enable)
return 0;
- ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable, 0xff);
+ ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable, inst);
if (!ret)
- atomic_set(&power_gate->vcn_gated, !enable);
+ atomic_set(&power_gate->vcn_gated[inst], !enable);
return ret;
}
@@ -341,8 +346,9 @@ static int smu_set_mall_enable(struct smu_context *smu)
* smu_dpm_set_power_gate - power gate/ungate the specific IP block
*
* @handle: smu_context pointer
- * @block_type: the IP block to power gate/ungate
- * @gate: to power gate if true, ungate otherwise
+ * @block_type: the IP block to power gate/ungate
+ * @gate: to power gate if true, ungate otherwise
+ * @inst: the instance of the IP block to power gate/ungate
*
* This API uses no smu->mutex lock protection due to:
* 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce).
@@ -353,7 +359,8 @@ static int smu_set_mall_enable(struct smu_context *smu)
*/
static int smu_dpm_set_power_gate(void *handle,
uint32_t block_type,
- bool gate)
+ bool gate,
+ int inst)
{
struct smu_context *smu = handle;
int ret = 0;
@@ -372,10 +379,10 @@ static int smu_dpm_set_power_gate(void *handle,
*/
case AMD_IP_BLOCK_TYPE_UVD:
case AMD_IP_BLOCK_TYPE_VCN:
- ret = smu_dpm_set_vcn_enable(smu, !gate);
+ ret = smu_dpm_set_vcn_enable(smu, !gate, inst);
if (ret)
- dev_err(smu->adev->dev, "Failed to power %s VCN!\n",
- gate ? "gate" : "ungate");
+ dev_err(smu->adev->dev, "Failed to power %s VCN instance %d!\n",
+ gate ? "gate" : "ungate", inst);
break;
case AMD_IP_BLOCK_TYPE_GFX:
ret = smu_gfx_off_control(smu, gate);
@@ -720,6 +727,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
break;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
smu_v13_0_6_set_ppt_funcs(smu);
/* Enable pp_od_clk_voltage node */
smu->od_enabled = true;
@@ -760,6 +768,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block)
smu->smu_baco.platform_support = false;
smu->smu_baco.maco_support = false;
smu->user_dpm_profile.fan_mode = -1;
+ smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN;
mutex_init(&smu->message_lock);
@@ -777,21 +786,25 @@ static int smu_set_default_dpm_table(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
struct smu_power_context *smu_power = &smu->smu_power;
struct smu_power_gate *power_gate = &smu_power->power_gate;
- int vcn_gate, jpeg_gate;
+ int vcn_gate[AMDGPU_MAX_VCN_INSTANCES], jpeg_gate, i;
int ret = 0;
if (!smu->ppt_funcs->set_default_dpm_table)
return 0;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
- vcn_gate = atomic_read(&power_gate->vcn_gated);
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ vcn_gate[i] = atomic_read(&power_gate->vcn_gated[i]);
+ }
if (adev->pg_flags & AMD_PG_SUPPORT_JPEG)
jpeg_gate = atomic_read(&power_gate->jpeg_gated);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
- ret = smu_dpm_set_vcn_enable(smu, true);
- if (ret)
- return ret;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ ret = smu_dpm_set_vcn_enable(smu, true, i);
+ if (ret)
+ return ret;
+ }
}
if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {
@@ -808,8 +821,10 @@ static int smu_set_default_dpm_table(struct smu_context *smu)
if (adev->pg_flags & AMD_PG_SUPPORT_JPEG)
smu_dpm_set_jpeg_enable(smu, !jpeg_gate);
err_out:
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
- smu_dpm_set_vcn_enable(smu, !vcn_gate);
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ smu_dpm_set_vcn_enable(smu, !vcn_gate[i], i);
+ }
return ret;
}
@@ -1244,11 +1259,26 @@ static bool smu_is_workload_profile_available(struct smu_context *smu,
return smu->workload_map && smu->workload_map[profile].valid_mapping;
}
+static void smu_init_power_profile(struct smu_context *smu)
+{
+ if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) {
+ if (smu->is_apu ||
+ !smu_is_workload_profile_available(
+ smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
+ smu->power_profile_mode =
+ PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+ else
+ smu->power_profile_mode =
+ PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ }
+ smu_power_profile_mode_get(smu, smu->power_profile_mode);
+}
+
static int smu_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
struct smu_context *smu = adev->powerplay.pp_handle;
- int ret;
+ int i, ret;
smu->pool_size = adev->pm.smu_prv_buffer_size;
smu->smu_feature.feature_num = SMU_FEATURE_MAX;
@@ -1259,42 +1289,14 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block)
INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn);
atomic64_set(&smu->throttle_int_counter, 0);
smu->watermarks_bitmap = 0;
- smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
- smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
- smu->user_dpm_profile.user_workload_mask = 0;
- atomic_set(&smu->smu_power.power_gate.vcn_gated, 1);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ atomic_set(&smu->smu_power.power_gate.vcn_gated[i], 1);
atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1);
atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
- smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0;
- smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1;
- smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2;
- smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3;
- smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4;
- smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
- smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
-
- if (smu->is_apu ||
- !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) {
- smu->driver_workload_mask =
- 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
- } else {
- smu->driver_workload_mask =
- 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
- smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
- }
-
- smu->workload_mask = smu->driver_workload_mask |
- smu->user_dpm_profile.user_workload_mask;
- smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
- smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
- smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING;
- smu->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO;
- smu->workload_setting[4] = PP_SMC_POWER_PROFILE_VR;
- smu->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE;
- smu->workload_setting[6] = PP_SMC_POWER_PROFILE_CUSTOM;
+ smu_init_power_profile(smu);
smu->display_config = &adev->pm.pm_display_cfg;
smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
@@ -1347,6 +1349,11 @@ static int smu_sw_fini(struct amdgpu_ip_block *ip_block)
return ret;
}
+ if (smu->custom_profile_params) {
+ kfree(smu->custom_profile_params);
+ smu->custom_profile_params = NULL;
+ }
+
smu_fini_microcode(smu);
return 0;
@@ -1814,7 +1821,7 @@ static int smu_start_smc_engine(struct smu_context *smu)
static int smu_hw_init(struct amdgpu_ip_block *ip_block)
{
- int ret;
+ int i, ret;
struct amdgpu_device *adev = ip_block->adev;
struct smu_context *smu = adev->powerplay.pp_handle;
@@ -1840,7 +1847,8 @@ static int smu_hw_init(struct amdgpu_ip_block *ip_block)
ret = smu_set_gfx_imu_enable(smu);
if (ret)
return ret;
- smu_dpm_set_vcn_enable(smu, true);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ smu_dpm_set_vcn_enable(smu, true, i);
smu_dpm_set_jpeg_enable(smu, true);
smu_dpm_set_vpe_enable(smu, true);
smu_dpm_set_umsch_mm_enable(smu, true);
@@ -2038,12 +2046,13 @@ static int smu_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
struct smu_context *smu = adev->powerplay.pp_handle;
- int ret;
+ int i, ret;
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
- smu_dpm_set_vcn_enable(smu, false);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ smu_dpm_set_vcn_enable(smu, false, i);
smu_dpm_set_jpeg_enable(smu, false);
smu_dpm_set_vpe_enable(smu, false);
smu_dpm_set_umsch_mm_enable(smu, false);
@@ -2131,6 +2140,9 @@ static int smu_suspend(struct amdgpu_ip_block *ip_block)
if (!ret)
adev->gfx.gfx_off_entrycount = count;
+ /* clear this on suspend so it will get reprogrammed on resume */
+ smu->workload_mask = 0;
+
return 0;
}
@@ -2192,13 +2204,13 @@ static int smu_display_configuration_change(void *handle,
return 0;
}
-static int smu_set_clockgating_state(void *handle,
+static int smu_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int smu_set_powergating_state(void *handle,
+static int smu_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2243,25 +2255,49 @@ static int smu_enable_umd_pstate(void *handle,
}
static int smu_bump_power_profile_mode(struct smu_context *smu,
- long *param,
- uint32_t param_size)
+ long *custom_params,
+ u32 custom_params_max_idx)
{
- int ret = 0;
+ u32 workload_mask = 0;
+ int i, ret = 0;
+
+ for (i = 0; i < PP_SMC_POWER_PROFILE_COUNT; i++) {
+ if (smu->workload_refcount[i])
+ workload_mask |= 1 << i;
+ }
+
+ if (smu->workload_mask == workload_mask)
+ return 0;
if (smu->ppt_funcs->set_power_profile_mode)
- ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size);
+ ret = smu->ppt_funcs->set_power_profile_mode(smu, workload_mask,
+ custom_params,
+ custom_params_max_idx);
+
+ if (!ret)
+ smu->workload_mask = workload_mask;
return ret;
}
+static void smu_power_profile_mode_get(struct smu_context *smu,
+ enum PP_SMC_POWER_PROFILE profile_mode)
+{
+ smu->workload_refcount[profile_mode]++;
+}
+
+static void smu_power_profile_mode_put(struct smu_context *smu,
+ enum PP_SMC_POWER_PROFILE profile_mode)
+{
+ if (smu->workload_refcount[profile_mode])
+ smu->workload_refcount[profile_mode]--;
+}
+
static int smu_adjust_power_state_dynamic(struct smu_context *smu,
enum amd_dpm_forced_level level,
- bool skip_display_settings,
- bool init)
+ bool skip_display_settings)
{
int ret = 0;
- int index = 0;
- long workload[1];
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
if (!skip_display_settings) {
@@ -2298,14 +2334,8 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
}
if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
- smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
- index = fls(smu->workload_mask);
- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
- workload[0] = smu->workload_setting[index];
-
- if (init || smu->power_profile_mode != workload[0])
- smu_bump_power_profile_mode(smu, workload, 0);
- }
+ smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
+ smu_bump_power_profile_mode(smu, NULL, 0);
return ret;
}
@@ -2324,13 +2354,13 @@ static int smu_handle_task(struct smu_context *smu,
ret = smu_pre_display_config_changed(smu);
if (ret)
return ret;
- ret = smu_adjust_power_state_dynamic(smu, level, false, false);
+ ret = smu_adjust_power_state_dynamic(smu, level, false);
break;
case AMD_PP_TASK_COMPLETE_INIT:
- ret = smu_adjust_power_state_dynamic(smu, level, true, true);
+ ret = smu_adjust_power_state_dynamic(smu, level, true);
break;
case AMD_PP_TASK_READJUST_POWER_STATE:
- ret = smu_adjust_power_state_dynamic(smu, level, true, false);
+ ret = smu_adjust_power_state_dynamic(smu, level, true);
break;
default:
break;
@@ -2352,12 +2382,11 @@ static int smu_handle_dpm_task(void *handle,
static int smu_switch_power_profile(void *handle,
enum PP_SMC_POWER_PROFILE type,
- bool en)
+ bool enable)
{
struct smu_context *smu = handle;
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
- long workload[1];
- uint32_t index;
+ int ret;
if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
return -EOPNOTSUPP;
@@ -2365,24 +2394,21 @@ static int smu_switch_power_profile(void *handle,
if (!(type < PP_SMC_POWER_PROFILE_CUSTOM))
return -EINVAL;
- if (!en) {
- smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]);
- index = fls(smu->workload_mask);
- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
- workload[0] = smu->workload_setting[index];
- } else {
- smu->driver_workload_mask |= (1 << smu->workload_priority[type]);
- index = fls(smu->workload_mask);
- index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
- workload[0] = smu->workload_setting[index];
- }
-
- smu->workload_mask = smu->driver_workload_mask |
- smu->user_dpm_profile.user_workload_mask;
-
if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
- smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
- smu_bump_power_profile_mode(smu, workload, 0);
+ smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
+ if (enable)
+ smu_power_profile_mode_get(smu, type);
+ else
+ smu_power_profile_mode_put(smu, type);
+ ret = smu_bump_power_profile_mode(smu, NULL, 0);
+ if (ret) {
+ if (enable)
+ smu_power_profile_mode_put(smu, type);
+ else
+ smu_power_profile_mode_get(smu, type);
+ return ret;
+ }
+ }
return 0;
}
@@ -2966,9 +2992,10 @@ static int smu_read_sensor(void *handle,
int *size_arg)
{
struct smu_context *smu = handle;
+ struct amdgpu_device *adev = smu->adev;
struct smu_umd_pstate_table *pstate_table =
&smu->pstate_table;
- int ret = 0;
+ int i, ret = 0;
uint32_t *size, size_val;
if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
@@ -3014,7 +3041,13 @@ static int smu_read_sensor(void *handle,
*size = 4;
break;
case AMDGPU_PP_SENSOR_VCN_POWER_STATE:
- *(uint32_t *)data = atomic_read(&smu->smu_power.power_gate.vcn_gated) ? 0 : 1;
+ *(uint32_t *)data = 0;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (!atomic_read(&smu->smu_power.power_gate.vcn_gated[i])) {
+ *(uint32_t *)data = 1;
+ break;
+ }
+ }
*size = 4;
break;
case AMDGPU_PP_SENSOR_MIN_FAN_RPM:
@@ -3074,21 +3107,33 @@ static int smu_set_power_profile_mode(void *handle,
uint32_t param_size)
{
struct smu_context *smu = handle;
- int ret;
+ bool custom = false;
+ int ret = 0;
if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
!smu->ppt_funcs->set_power_profile_mode)
return -EOPNOTSUPP;
- if (smu->user_dpm_profile.user_workload_mask &
- (1 << smu->workload_priority[param[param_size]]))
- return 0;
+ if (param[param_size] == PP_SMC_POWER_PROFILE_CUSTOM) {
+ custom = true;
+ /* clear frontend mask so custom changes propogate */
+ smu->workload_mask = 0;
+ }
- smu->user_dpm_profile.user_workload_mask =
- (1 << smu->workload_priority[param[param_size]]);
- smu->workload_mask = smu->user_dpm_profile.user_workload_mask |
- smu->driver_workload_mask;
- ret = smu_bump_power_profile_mode(smu, param, param_size);
+ if ((param[param_size] != smu->power_profile_mode) || custom) {
+ /* clear the old user preference */
+ smu_power_profile_mode_put(smu, smu->power_profile_mode);
+ /* set the new user preference */
+ smu_power_profile_mode_get(smu, param[param_size]);
+ ret = smu_bump_power_profile_mode(smu,
+ custom ? param : NULL,
+ custom ? param_size : 0);
+ if (ret)
+ smu_power_profile_mode_put(smu, param[param_size]);
+ else
+ /* store the user's preference */
+ smu->power_profile_mode = param[param_size];
+ }
return ret;
}
@@ -3870,3 +3915,13 @@ int smu_send_rma_reason(struct smu_context *smu)
return ret;
}
+
+int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
+{
+ int ret = 0;
+
+ if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma)
+ ret = smu->ppt_funcs->reset_sdma(smu, inst_mask);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index d665c47f19b7..3630593bce61 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -240,7 +240,6 @@ struct smu_user_dpm_profile {
/* user clock state information */
uint32_t clk_mask[SMU_CLK_COUNT];
uint32_t clk_dependency;
- uint32_t user_workload_mask;
};
#define SMU_TABLE_INIT(tables, table_id, s, a, d) \
@@ -400,7 +399,7 @@ struct smu_dpm_context {
struct smu_power_gate {
bool uvd_gated;
bool vce_gated;
- atomic_t vcn_gated;
+ atomic_t vcn_gated[AMDGPU_MAX_VCN_INSTANCES];
atomic_t jpeg_gated;
atomic_t vpe_gated;
atomic_t umsch_mm_gated;
@@ -557,12 +556,13 @@ struct smu_context {
uint32_t hard_min_uclk_req_from_dal;
bool disable_uclk_switch;
+ /* asic agnostic workload mask */
uint32_t workload_mask;
- uint32_t driver_workload_mask;
- uint32_t workload_priority[WORKLOAD_POLICY_MAX];
- uint32_t workload_setting[WORKLOAD_POLICY_MAX];
+ /* default/user workload preference */
uint32_t power_profile_mode;
- uint32_t default_power_profile_mode;
+ uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT];
+ /* backend specific custom workload settings */
+ long *custom_profile_params;
bool pm_enabled;
bool is_apu;
@@ -733,9 +733,12 @@ struct pptable_funcs {
* @set_power_profile_mode: Set a power profile mode. Also used to
* create/set custom power profile modes.
* &input: Power profile mode parameters.
- * &size: Size of &input.
+ * &workload_mask: mask of workloads to enable
+ * &custom_params: custom profile parameters
+ * &custom_params_max_idx: max valid idx into custom_params
*/
- int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size);
+ int (*set_power_profile_mode)(struct smu_context *smu, u32 workload_mask,
+ long *custom_params, u32 custom_params_max_idx);
/**
* @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power
@@ -1370,6 +1373,11 @@ struct pptable_funcs {
int (*send_rma_reason)(struct smu_context *smu);
/**
+ * @reset_sdma: message SMU to soft reset sdma instance.
+ */
+ int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask);
+
+ /**
* @get_ecc_table: message SMU to get ECC INFO table.
*/
ssize_t (*get_ecc_info)(struct smu_context *smu, void *table);
@@ -1628,6 +1636,7 @@ void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
int smu_send_rma_reason(struct smu_context *smu);
+int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask);
int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type,
int level);
ssize_t smu_get_pm_policy_info(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
index 0f96b8c59a0e..274b3e1cc4fb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
@@ -34,6 +34,8 @@
#define NUM_PCIE_BITRATES 4
#define NUM_XGMI_BITRATES 4
#define NUM_XGMI_WIDTHS 3
+#define NUM_SOC_P2S_TABLES 3
+#define NUM_TDP_GROUPS 4
typedef enum {
/*0*/ FEATURE_DATA_CALCULATION = 0,
@@ -80,8 +82,10 @@ typedef enum {
/*41*/ FEATURE_CXL_QOS = 41,
/*42*/ FEATURE_SOC_DC_RTC = 42,
/*43*/ FEATURE_GFX_DC_RTC = 43,
+/*44*/ FEATURE_DVM_MIN_PSM = 44,
+/*45*/ FEATURE_PRC = 45,
-/*44*/ NUM_FEATURES = 44
+/*46*/ NUM_FEATURES = 46
} FEATURE_LIST_e;
//enum for MPIO PCIe gen speed msgs
@@ -123,7 +127,7 @@ typedef enum {
VOLTAGE_GUARDBAND_COUNT
} GFX_GUARDBAND_e;
-#define SMU_METRICS_TABLE_VERSION 0xE
+#define SMU_METRICS_TABLE_VERSION 0xF
typedef struct __attribute__((packed, aligned(4))) {
uint32_t AccumulationCounter;
@@ -234,6 +238,9 @@ typedef struct __attribute__((packed, aligned(4))) {
//PCIE BW Data and error count
uint32_t PCIeOtherEndRecoveryAcc; // The Pcie counter itself is accumulated
+
+ //Total App Clock Counter
+ uint64_t GfxclkBelowHostLimitAcc[8];
} MetricsTableX_t;
typedef struct __attribute__((packed, aligned(4))) {
@@ -328,13 +335,14 @@ typedef struct __attribute__((packed, aligned(4))) {
uint32_t JpegBusy[32];
} MetricsTableA_t;
-#define SMU_VF_METRICS_TABLE_VERSION 0x3
+#define SMU_VF_METRICS_TABLE_VERSION 0x5
typedef struct __attribute__((packed, aligned(4))) {
uint32_t AccumulationCounter;
uint32_t InstGfxclk_TargFreq;
uint64_t AccGfxclk_TargFreq;
uint64_t AccGfxRsmuDpm_Busy;
+ uint64_t AccGfxclkBelowHostLimit;
} VfMetricsTable_t;
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index 41cb681927e2..7b65a27fb302 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
@@ -93,7 +93,9 @@
#define PPSMC_MSG_SelectPLPDMode 0x40
#define PPSMC_MSG_RmaDueToBadPageThreshold 0x43
#define PPSMC_MSG_SelectPstatePolicy 0x44
-#define PPSMC_Message_Count 0x45
+#define PPSMC_MSG_ResetSDMA2 0x45
+#define PPSMC_MSG_ResetSDMA 0x4D
+#define PPSMC_Message_Count 0x4E
//PPSMC Reset Types for driver msg argument
#define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index a299dc4a8071..b0dab9797c70 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -275,7 +275,9 @@
__SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \
__SMU_DUMMY_MAP(SelectPstatePolicy), \
__SMU_DUMMY_MAP(MALLPowerController), \
- __SMU_DUMMY_MAP(MALLPowerState),
+ __SMU_DUMMY_MAP(MALLPowerState), \
+ __SMU_DUMMY_MAP(ResetSDMA), \
+ __SMU_DUMMY_MAP(ResetSDMA2),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index ae3563d71fa0..356d9422b411 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -303,5 +303,7 @@ int smu_v13_0_set_wbrf_exclusion_ranges(struct smu_context *smu,
int smu_v13_0_get_boot_freq_by_index(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *value);
+
+void smu_v13_0_interrupt_work(struct smu_context *smu);
#endif
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 12125303bb79..8aa61a9f7778 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1445,97 +1445,120 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
return size;
}
-static int arcturus_set_power_profile_mode(struct smu_context *smu,
- long *input,
- uint32_t size)
+#define ARCTURUS_CUSTOM_PARAMS_COUNT 10
+#define ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT 2
+#define ARCTURUS_CUSTOM_PARAMS_SIZE (ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT * ARCTURUS_CUSTOM_PARAMS_COUNT * sizeof(long))
+
+static int arcturus_set_power_profile_mode_coeff(struct smu_context *smu,
+ long *input)
{
DpmActivityMonitorCoeffInt_t activity_monitor;
- int workload_type = 0;
- uint32_t profile_mode = input[size];
- int ret = 0;
+ int ret, idx;
- if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
- return -EINVAL;
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+ WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor),
+ false);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+ return ret;
}
- if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) &&
- (smu->smc_fw_version >= 0x360d00)) {
- if (size != 10)
- return -EINVAL;
+ idx = 0 * ARCTURUS_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Gfxclk */
+ activity_monitor.Gfx_FPS = input[idx + 1];
+ activity_monitor.Gfx_UseRlcBusy = input[idx + 2];
+ activity_monitor.Gfx_MinActiveFreqType = input[idx + 3];
+ activity_monitor.Gfx_MinActiveFreq = input[idx + 4];
+ activity_monitor.Gfx_BoosterFreqType = input[idx + 5];
+ activity_monitor.Gfx_BoosterFreq = input[idx + 6];
+ activity_monitor.Gfx_PD_Data_limit_c = input[idx + 7];
+ activity_monitor.Gfx_PD_Data_error_coeff = input[idx + 8];
+ activity_monitor.Gfx_PD_Data_error_rate_coeff = input[idx + 9];
+ }
+ idx = 1 * ARCTURUS_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Uclk */
+ activity_monitor.Mem_FPS = input[idx + 1];
+ activity_monitor.Mem_UseRlcBusy = input[idx + 2];
+ activity_monitor.Mem_MinActiveFreqType = input[idx + 3];
+ activity_monitor.Mem_MinActiveFreq = input[idx + 4];
+ activity_monitor.Mem_BoosterFreqType = input[idx + 5];
+ activity_monitor.Mem_BoosterFreq = input[idx + 6];
+ activity_monitor.Mem_PD_Data_limit_c = input[idx + 7];
+ activity_monitor.Mem_PD_Data_error_coeff = input[idx + 8];
+ activity_monitor.Mem_PD_Data_error_rate_coeff = input[idx + 9];
+ }
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
- WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor),
- false);
- if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
- return ret;
- }
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+ WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor),
+ true);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ return ret;
+ }
- switch (input[0]) {
- case 0: /* Gfxclk */
- activity_monitor.Gfx_FPS = input[1];
- activity_monitor.Gfx_UseRlcBusy = input[2];
- activity_monitor.Gfx_MinActiveFreqType = input[3];
- activity_monitor.Gfx_MinActiveFreq = input[4];
- activity_monitor.Gfx_BoosterFreqType = input[5];
- activity_monitor.Gfx_BoosterFreq = input[6];
- activity_monitor.Gfx_PD_Data_limit_c = input[7];
- activity_monitor.Gfx_PD_Data_error_coeff = input[8];
- activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9];
- break;
- case 1: /* Uclk */
- activity_monitor.Mem_FPS = input[1];
- activity_monitor.Mem_UseRlcBusy = input[2];
- activity_monitor.Mem_MinActiveFreqType = input[3];
- activity_monitor.Mem_MinActiveFreq = input[4];
- activity_monitor.Mem_BoosterFreqType = input[5];
- activity_monitor.Mem_BoosterFreq = input[6];
- activity_monitor.Mem_PD_Data_limit_c = input[7];
- activity_monitor.Mem_PD_Data_error_coeff = input[8];
- activity_monitor.Mem_PD_Data_error_rate_coeff = input[9];
- break;
- default:
+ return ret;
+}
+
+static int arcturus_set_power_profile_mode(struct smu_context *smu,
+ u32 workload_mask,
+ long *custom_params,
+ u32 custom_params_max_idx)
+{
+ u32 backend_workload_mask = 0;
+ int ret, idx = -1, i;
+
+ smu_cmn_get_backend_workload_mask(smu, workload_mask,
+ &backend_workload_mask);
+
+ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
+ if (smu->smc_fw_version < 0x360d00)
return -EINVAL;
+ if (!smu->custom_profile_params) {
+ smu->custom_profile_params =
+ kzalloc(ARCTURUS_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
+ if (!smu->custom_profile_params)
+ return -ENOMEM;
}
-
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
- WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor),
- true);
+ if (custom_params && custom_params_max_idx) {
+ if (custom_params_max_idx != ARCTURUS_CUSTOM_PARAMS_COUNT)
+ return -EINVAL;
+ if (custom_params[0] >= ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT)
+ return -EINVAL;
+ idx = custom_params[0] * ARCTURUS_CUSTOM_PARAMS_COUNT;
+ smu->custom_profile_params[idx] = 1;
+ for (i = 1; i < custom_params_max_idx; i++)
+ smu->custom_profile_params[idx + i] = custom_params[i];
+ }
+ ret = arcturus_set_power_profile_mode_coeff(smu,
+ smu->custom_profile_params);
if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
return ret;
}
- }
-
- /*
- * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
- * Not all profile modes are supported on arcturus.
- */
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- profile_mode);
- if (workload_type < 0) {
- dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
- return -EINVAL;
+ } else if (smu->custom_profile_params) {
+ memset(smu->custom_profile_params, 0, ARCTURUS_CUSTOM_PARAMS_SIZE);
}
ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_SetWorkloadMask,
- smu->workload_mask,
- NULL);
+ SMU_MSG_SetWorkloadMask,
+ backend_workload_mask,
+ NULL);
if (ret) {
- dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
+ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
+ workload_mask);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
return ret;
}
- smu_cmn_assign_power_profile(smu);
-
- return 0;
+ return ret;
}
static int arcturus_set_performance_level(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 211635dabed8..7fad5dfb39c4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2006,90 +2006,122 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf)
return size;
}
-static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
+#define NAVI10_CUSTOM_PARAMS_COUNT 10
+#define NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT 3
+#define NAVI10_CUSTOM_PARAMS_SIZE (NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT * NAVI10_CUSTOM_PARAMS_COUNT * sizeof(long))
+
+static int navi10_set_power_profile_mode_coeff(struct smu_context *smu,
+ long *input)
{
DpmActivityMonitorCoeffInt_t activity_monitor;
- int workload_type, ret = 0;
+ int ret, idx;
- smu->power_profile_mode = input[size];
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor), false);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+ return ret;
+ }
- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
- return -EINVAL;
+ idx = 0 * NAVI10_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Gfxclk */
+ activity_monitor.Gfx_FPS = input[idx + 1];
+ activity_monitor.Gfx_MinFreqStep = input[idx + 2];
+ activity_monitor.Gfx_MinActiveFreqType = input[idx + 3];
+ activity_monitor.Gfx_MinActiveFreq = input[idx + 4];
+ activity_monitor.Gfx_BoosterFreqType = input[idx + 5];
+ activity_monitor.Gfx_BoosterFreq = input[idx + 6];
+ activity_monitor.Gfx_PD_Data_limit_c = input[idx + 7];
+ activity_monitor.Gfx_PD_Data_error_coeff = input[idx + 8];
+ activity_monitor.Gfx_PD_Data_error_rate_coeff = input[idx + 9];
+ }
+ idx = 1 * NAVI10_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Socclk */
+ activity_monitor.Soc_FPS = input[idx + 1];
+ activity_monitor.Soc_MinFreqStep = input[idx + 2];
+ activity_monitor.Soc_MinActiveFreqType = input[idx + 3];
+ activity_monitor.Soc_MinActiveFreq = input[idx + 4];
+ activity_monitor.Soc_BoosterFreqType = input[idx + 5];
+ activity_monitor.Soc_BoosterFreq = input[idx + 6];
+ activity_monitor.Soc_PD_Data_limit_c = input[idx + 7];
+ activity_monitor.Soc_PD_Data_error_coeff = input[idx + 8];
+ activity_monitor.Soc_PD_Data_error_rate_coeff = input[idx + 9];
+ }
+ idx = 2 * NAVI10_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Memclk */
+ activity_monitor.Mem_FPS = input[idx + 1];
+ activity_monitor.Mem_MinFreqStep = input[idx + 2];
+ activity_monitor.Mem_MinActiveFreqType = input[idx + 3];
+ activity_monitor.Mem_MinActiveFreq = input[idx + 4];
+ activity_monitor.Mem_BoosterFreqType = input[idx + 5];
+ activity_monitor.Mem_BoosterFreq = input[idx + 6];
+ activity_monitor.Mem_PD_Data_limit_c = input[idx + 7];
+ activity_monitor.Mem_PD_Data_error_coeff = input[idx + 8];
+ activity_monitor.Mem_PD_Data_error_rate_coeff = input[idx + 9];
+ }
+
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor), true);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ return ret;
}
- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
- if (size != 10)
- return -EINVAL;
+ return ret;
+}
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor), false);
- if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
- return ret;
- }
+static int navi10_set_power_profile_mode(struct smu_context *smu,
+ u32 workload_mask,
+ long *custom_params,
+ u32 custom_params_max_idx)
+{
+ u32 backend_workload_mask = 0;
+ int ret, idx = -1, i;
- switch (input[0]) {
- case 0: /* Gfxclk */
- activity_monitor.Gfx_FPS = input[1];
- activity_monitor.Gfx_MinFreqStep = input[2];
- activity_monitor.Gfx_MinActiveFreqType = input[3];
- activity_monitor.Gfx_MinActiveFreq = input[4];
- activity_monitor.Gfx_BoosterFreqType = input[5];
- activity_monitor.Gfx_BoosterFreq = input[6];
- activity_monitor.Gfx_PD_Data_limit_c = input[7];
- activity_monitor.Gfx_PD_Data_error_coeff = input[8];
- activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9];
- break;
- case 1: /* Socclk */
- activity_monitor.Soc_FPS = input[1];
- activity_monitor.Soc_MinFreqStep = input[2];
- activity_monitor.Soc_MinActiveFreqType = input[3];
- activity_monitor.Soc_MinActiveFreq = input[4];
- activity_monitor.Soc_BoosterFreqType = input[5];
- activity_monitor.Soc_BoosterFreq = input[6];
- activity_monitor.Soc_PD_Data_limit_c = input[7];
- activity_monitor.Soc_PD_Data_error_coeff = input[8];
- activity_monitor.Soc_PD_Data_error_rate_coeff = input[9];
- break;
- case 2: /* Memclk */
- activity_monitor.Mem_FPS = input[1];
- activity_monitor.Mem_MinFreqStep = input[2];
- activity_monitor.Mem_MinActiveFreqType = input[3];
- activity_monitor.Mem_MinActiveFreq = input[4];
- activity_monitor.Mem_BoosterFreqType = input[5];
- activity_monitor.Mem_BoosterFreq = input[6];
- activity_monitor.Mem_PD_Data_limit_c = input[7];
- activity_monitor.Mem_PD_Data_error_coeff = input[8];
- activity_monitor.Mem_PD_Data_error_rate_coeff = input[9];
- break;
- default:
- return -EINVAL;
- }
+ smu_cmn_get_backend_workload_mask(smu, workload_mask,
+ &backend_workload_mask);
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor), true);
+ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
+ if (!smu->custom_profile_params) {
+ smu->custom_profile_params = kzalloc(NAVI10_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
+ if (!smu->custom_profile_params)
+ return -ENOMEM;
+ }
+ if (custom_params && custom_params_max_idx) {
+ if (custom_params_max_idx != NAVI10_CUSTOM_PARAMS_COUNT)
+ return -EINVAL;
+ if (custom_params[0] >= NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT)
+ return -EINVAL;
+ idx = custom_params[0] * NAVI10_CUSTOM_PARAMS_COUNT;
+ smu->custom_profile_params[idx] = 1;
+ for (i = 1; i < custom_params_max_idx; i++)
+ smu->custom_profile_params[idx + i] = custom_params[i];
+ }
+ ret = navi10_set_power_profile_mode_coeff(smu,
+ smu->custom_profile_params);
if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
return ret;
}
+ } else if (smu->custom_profile_params) {
+ memset(smu->custom_profile_params, 0, NAVI10_CUSTOM_PARAMS_SIZE);
}
- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- smu->power_profile_mode);
- if (workload_type < 0)
- return -EINVAL;
-
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
- smu->workload_mask, NULL);
- if (ret)
- dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
- else
- smu_cmn_assign_power_profile(smu);
+ backend_workload_mask, NULL);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
+ workload_mask);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index d0ed0d060a8a..19a25fdc2f5b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1157,19 +1157,15 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu,
int inst)
{
struct amdgpu_device *adev = smu->adev;
- int i, ret = 0;
+ int ret = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* vcn dpm on is a prerequisite for vcn power gate messages */
- if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) {
- ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
- 0x10000 * i, NULL);
- if (ret)
- return ret;
- }
+ if (adev->vcn.harvest_config & (1 << inst))
+ return ret;
+ /* vcn dpm on is a prerequisite for vcn power gate messages */
+ if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) {
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
+ 0x10000 * inst, NULL);
}
return ret;
@@ -1708,93 +1704,126 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char *
return size;
}
-static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
+#define SIENNA_CICHLID_CUSTOM_PARAMS_COUNT 10
+#define SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT 3
+#define SIENNA_CICHLID_CUSTOM_PARAMS_SIZE (SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT * sizeof(long))
+
+static int sienna_cichlid_set_power_profile_mode_coeff(struct smu_context *smu,
+ long *input)
{
DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
DpmActivityMonitorCoeffInt_t *activity_monitor =
&(activity_monitor_external.DpmActivityMonitorCoeffInt);
- int workload_type, ret = 0;
+ int ret, idx;
- smu->power_profile_mode = input[size];
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external), false);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+ return ret;
+ }
- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
- return -EINVAL;
+ idx = 0 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Gfxclk */
+ activity_monitor->Gfx_FPS = input[idx + 1];
+ activity_monitor->Gfx_MinFreqStep = input[idx + 2];
+ activity_monitor->Gfx_MinActiveFreqType = input[idx + 3];
+ activity_monitor->Gfx_MinActiveFreq = input[idx + 4];
+ activity_monitor->Gfx_BoosterFreqType = input[idx + 5];
+ activity_monitor->Gfx_BoosterFreq = input[idx + 6];
+ activity_monitor->Gfx_PD_Data_limit_c = input[idx + 7];
+ activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 8];
+ activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 9];
+ }
+ idx = 1 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Socclk */
+ activity_monitor->Fclk_FPS = input[idx + 1];
+ activity_monitor->Fclk_MinFreqStep = input[idx + 2];
+ activity_monitor->Fclk_MinActiveFreqType = input[idx + 3];
+ activity_monitor->Fclk_MinActiveFreq = input[idx + 4];
+ activity_monitor->Fclk_BoosterFreqType = input[idx + 5];
+ activity_monitor->Fclk_BoosterFreq = input[idx + 6];
+ activity_monitor->Fclk_PD_Data_limit_c = input[idx + 7];
+ activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 8];
+ activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 9];
+ }
+ idx = 2 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Memclk */
+ activity_monitor->Mem_FPS = input[idx + 1];
+ activity_monitor->Mem_MinFreqStep = input[idx + 2];
+ activity_monitor->Mem_MinActiveFreqType = input[idx + 3];
+ activity_monitor->Mem_MinActiveFreq = input[idx + 4];
+ activity_monitor->Mem_BoosterFreqType = input[idx + 5];
+ activity_monitor->Mem_BoosterFreq = input[idx + 6];
+ activity_monitor->Mem_PD_Data_limit_c = input[idx + 7];
+ activity_monitor->Mem_PD_Data_error_coeff = input[idx + 8];
+ activity_monitor->Mem_PD_Data_error_rate_coeff = input[idx + 9];
}
- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
- if (size != 10)
- return -EINVAL;
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external), true);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ return ret;
+ }
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor_external), false);
- if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
- return ret;
- }
+ return ret;
+}
- switch (input[0]) {
- case 0: /* Gfxclk */
- activity_monitor->Gfx_FPS = input[1];
- activity_monitor->Gfx_MinFreqStep = input[2];
- activity_monitor->Gfx_MinActiveFreqType = input[3];
- activity_monitor->Gfx_MinActiveFreq = input[4];
- activity_monitor->Gfx_BoosterFreqType = input[5];
- activity_monitor->Gfx_BoosterFreq = input[6];
- activity_monitor->Gfx_PD_Data_limit_c = input[7];
- activity_monitor->Gfx_PD_Data_error_coeff = input[8];
- activity_monitor->Gfx_PD_Data_error_rate_coeff = input[9];
- break;
- case 1: /* Socclk */
- activity_monitor->Fclk_FPS = input[1];
- activity_monitor->Fclk_MinFreqStep = input[2];
- activity_monitor->Fclk_MinActiveFreqType = input[3];
- activity_monitor->Fclk_MinActiveFreq = input[4];
- activity_monitor->Fclk_BoosterFreqType = input[5];
- activity_monitor->Fclk_BoosterFreq = input[6];
- activity_monitor->Fclk_PD_Data_limit_c = input[7];
- activity_monitor->Fclk_PD_Data_error_coeff = input[8];
- activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9];
- break;
- case 2: /* Memclk */
- activity_monitor->Mem_FPS = input[1];
- activity_monitor->Mem_MinFreqStep = input[2];
- activity_monitor->Mem_MinActiveFreqType = input[3];
- activity_monitor->Mem_MinActiveFreq = input[4];
- activity_monitor->Mem_BoosterFreqType = input[5];
- activity_monitor->Mem_BoosterFreq = input[6];
- activity_monitor->Mem_PD_Data_limit_c = input[7];
- activity_monitor->Mem_PD_Data_error_coeff = input[8];
- activity_monitor->Mem_PD_Data_error_rate_coeff = input[9];
- break;
- default:
- return -EINVAL;
- }
+static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu,
+ u32 workload_mask,
+ long *custom_params,
+ u32 custom_params_max_idx)
+{
+ u32 backend_workload_mask = 0;
+ int ret, idx = -1, i;
+
+ smu_cmn_get_backend_workload_mask(smu, workload_mask,
+ &backend_workload_mask);
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor_external), true);
+ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
+ if (!smu->custom_profile_params) {
+ smu->custom_profile_params =
+ kzalloc(SIENNA_CICHLID_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
+ if (!smu->custom_profile_params)
+ return -ENOMEM;
+ }
+ if (custom_params && custom_params_max_idx) {
+ if (custom_params_max_idx != SIENNA_CICHLID_CUSTOM_PARAMS_COUNT)
+ return -EINVAL;
+ if (custom_params[0] >= SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT)
+ return -EINVAL;
+ idx = custom_params[0] * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
+ smu->custom_profile_params[idx] = 1;
+ for (i = 1; i < custom_params_max_idx; i++)
+ smu->custom_profile_params[idx + i] = custom_params[i];
+ }
+ ret = sienna_cichlid_set_power_profile_mode_coeff(smu,
+ smu->custom_profile_params);
if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
return ret;
}
+ } else if (smu->custom_profile_params) {
+ memset(smu->custom_profile_params, 0, SIENNA_CICHLID_CUSTOM_PARAMS_SIZE);
}
- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- smu->power_profile_mode);
- if (workload_type < 0)
- return -EINVAL;
-
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
- smu->workload_mask, NULL);
- if (ret)
- dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
- else
- smu_cmn_assign_power_profile(smu);
+ backend_workload_mask, NULL);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
+ workload_mask);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 480cf3cb204d..189c6a32b6bd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -105,7 +105,8 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
return 0;
amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index f89c487dce72..a55ea76d7399 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -1056,42 +1056,27 @@ static int vangogh_get_power_profile_mode(struct smu_context *smu,
return size;
}
-static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
+static int vangogh_set_power_profile_mode(struct smu_context *smu,
+ u32 workload_mask,
+ long *custom_params,
+ u32 custom_params_max_idx)
{
- int workload_type, ret;
- uint32_t profile_mode = input[size];
+ u32 backend_workload_mask = 0;
+ int ret;
- if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
- return -EINVAL;
- }
-
- if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
- profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
- return 0;
-
- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- profile_mode);
- if (workload_type < 0) {
- dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
- profile_mode);
- return -EINVAL;
- }
+ smu_cmn_get_backend_workload_mask(smu, workload_mask,
+ &backend_workload_mask);
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
- smu->workload_mask,
- NULL);
+ backend_workload_mask,
+ NULL);
if (ret) {
- dev_err_once(smu->adev->dev, "Fail to set workload type %d\n",
- workload_type);
+ dev_err_once(smu->adev->dev, "Fail to set workload mask 0x%08x\n",
+ workload_mask);
return ret;
}
- smu_cmn_assign_power_profile(smu);
-
- return 0;
+ return ret;
}
static int vangogh_set_soft_freq_limited_range(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 75a9ea87f419..37d82a71a2d7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -864,44 +864,27 @@ static int renoir_force_clk_levels(struct smu_context *smu,
return ret;
}
-static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
+static int renoir_set_power_profile_mode(struct smu_context *smu,
+ u32 workload_mask,
+ long *custom_params,
+ u32 custom_params_max_idx)
{
- int workload_type, ret;
- uint32_t profile_mode = input[size];
+ int ret;
+ u32 backend_workload_mask = 0;
- if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
- return -EINVAL;
- }
-
- if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
- profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
- return 0;
-
- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- profile_mode);
- if (workload_type < 0) {
- /*
- * TODO: If some case need switch to powersave/default power mode
- * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving.
- */
- dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
- return -EINVAL;
- }
+ smu_cmn_get_backend_workload_mask(smu, workload_mask,
+ &backend_workload_mask);
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
- smu->workload_mask,
- NULL);
+ backend_workload_mask,
+ NULL);
if (ret) {
- dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
+ dev_err_once(smu->adev->dev, "Failed to set workload mask 0x08%x\n",
+ workload_mask);
return ret;
}
- smu_cmn_assign_power_profile(smu);
-
- return 0;
+ return ret;
}
static int renoir_set_peak_clock_by_device(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index 2bfea740dace..fbbdfa54f6a2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -103,7 +103,8 @@ int smu_v13_0_init_microcode(struct smu_context *smu)
return 0;
amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
if (err)
goto out;
@@ -1320,11 +1321,11 @@ static int smu_v13_0_set_irq_state(struct amdgpu_device *adev,
return 0;
}
-static int smu_v13_0_ack_ac_dc_interrupt(struct smu_context *smu)
+void smu_v13_0_interrupt_work(struct smu_context *smu)
{
- return smu_cmn_send_smc_msg(smu,
- SMU_MSG_ReenableAcDcInterrupt,
- NULL);
+ smu_cmn_send_smc_msg(smu,
+ SMU_MSG_ReenableAcDcInterrupt,
+ NULL);
}
#define THM_11_0__SRCID__THM_DIG_THERM_L2H 0 /* ASIC_TEMP > CG_THERMAL_INT.DIG_THERM_INTH */
@@ -1377,12 +1378,12 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
switch (ctxid) {
case SMU_IH_INTERRUPT_CONTEXT_ID_AC:
dev_dbg(adev->dev, "Switched to AC mode!\n");
- smu_v13_0_ack_ac_dc_interrupt(smu);
+ schedule_work(&smu->interrupt_work);
adev->pm.ac_power = true;
break;
case SMU_IH_INTERRUPT_CONTEXT_ID_DC:
dev_dbg(adev->dev, "Switched to DC mode!\n");
- smu_v13_0_ack_ac_dc_interrupt(smu);
+ schedule_work(&smu->interrupt_work);
adev->pm.ac_power = false;
break;
case SMU_IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING:
@@ -2108,18 +2109,14 @@ int smu_v13_0_set_vcn_enable(struct smu_context *smu,
int inst)
{
struct amdgpu_device *adev = smu->adev;
- int i, ret = 0;
+ int ret = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << inst))
+ return ret;
- ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
- i << 16U, NULL);
- if (ret)
- return ret;
- }
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
+ inst << 16U, NULL);
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 80c6b1e523aa..0551a3311217 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2571,111 +2571,129 @@ static int smu_v13_0_0_get_power_profile_mode(struct smu_context *smu,
return size;
}
-static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
- long *input,
- uint32_t size)
+#define SMU_13_0_0_CUSTOM_PARAMS_COUNT 9
+#define SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT 2
+#define SMU_13_0_0_CUSTOM_PARAMS_SIZE (SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT * SMU_13_0_0_CUSTOM_PARAMS_COUNT * sizeof(long))
+
+static int smu_v13_0_0_set_power_profile_mode_coeff(struct smu_context *smu,
+ long *input)
{
DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
DpmActivityMonitorCoeffInt_t *activity_monitor =
&(activity_monitor_external.DpmActivityMonitorCoeffInt);
- int workload_type, ret = 0;
- u32 workload_mask;
-
- smu->power_profile_mode = input[size];
+ int ret, idx;
- if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
- return -EINVAL;
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+ WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external),
+ false);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+ return ret;
}
- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
- if (size != 9)
- return -EINVAL;
-
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
- WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor_external),
- false);
- if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
- return ret;
- }
-
- switch (input[0]) {
- case 0: /* Gfxclk */
- activity_monitor->Gfx_FPS = input[1];
- activity_monitor->Gfx_MinActiveFreqType = input[2];
- activity_monitor->Gfx_MinActiveFreq = input[3];
- activity_monitor->Gfx_BoosterFreqType = input[4];
- activity_monitor->Gfx_BoosterFreq = input[5];
- activity_monitor->Gfx_PD_Data_limit_c = input[6];
- activity_monitor->Gfx_PD_Data_error_coeff = input[7];
- activity_monitor->Gfx_PD_Data_error_rate_coeff = input[8];
- break;
- case 1: /* Fclk */
- activity_monitor->Fclk_FPS = input[1];
- activity_monitor->Fclk_MinActiveFreqType = input[2];
- activity_monitor->Fclk_MinActiveFreq = input[3];
- activity_monitor->Fclk_BoosterFreqType = input[4];
- activity_monitor->Fclk_BoosterFreq = input[5];
- activity_monitor->Fclk_PD_Data_limit_c = input[6];
- activity_monitor->Fclk_PD_Data_error_coeff = input[7];
- activity_monitor->Fclk_PD_Data_error_rate_coeff = input[8];
- break;
- default:
- return -EINVAL;
- }
+ idx = 0 * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Gfxclk */
+ activity_monitor->Gfx_FPS = input[idx + 1];
+ activity_monitor->Gfx_MinActiveFreqType = input[idx + 2];
+ activity_monitor->Gfx_MinActiveFreq = input[idx + 3];
+ activity_monitor->Gfx_BoosterFreqType = input[idx + 4];
+ activity_monitor->Gfx_BoosterFreq = input[idx + 5];
+ activity_monitor->Gfx_PD_Data_limit_c = input[idx + 6];
+ activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 7];
+ activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 8];
+ }
+ idx = 1 * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Fclk */
+ activity_monitor->Fclk_FPS = input[idx + 1];
+ activity_monitor->Fclk_MinActiveFreqType = input[idx + 2];
+ activity_monitor->Fclk_MinActiveFreq = input[idx + 3];
+ activity_monitor->Fclk_BoosterFreqType = input[idx + 4];
+ activity_monitor->Fclk_BoosterFreq = input[idx + 5];
+ activity_monitor->Fclk_PD_Data_limit_c = input[idx + 6];
+ activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 7];
+ activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 8];
+ }
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
- WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor_external),
- true);
- if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
- return ret;
- }
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+ WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external),
+ true);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ return ret;
}
- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- smu->power_profile_mode);
+ return ret;
+}
- if (workload_type < 0)
- return -EINVAL;
+static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
+ u32 workload_mask,
+ long *custom_params,
+ u32 custom_params_max_idx)
+{
+ u32 backend_workload_mask = 0;
+ int workload_type, ret, idx = -1, i;
- workload_mask = 1 << workload_type;
+ smu_cmn_get_backend_workload_mask(smu, workload_mask,
+ &backend_workload_mask);
/* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */
- if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
- ((smu->adev->pm.fw_version == 0x004e6601) ||
- (smu->adev->pm.fw_version >= 0x004e7300))) ||
- (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
- smu->adev->pm.fw_version >= 0x00504500)) {
+ if ((workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE)) &&
+ ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
+ ((smu->adev->pm.fw_version == 0x004e6601) ||
+ (smu->adev->pm.fw_version >= 0x004e7300))) ||
+ (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+ smu->adev->pm.fw_version >= 0x00504500))) {
workload_type = smu_cmn_to_asic_specific_index(smu,
CMN2ASIC_MAPPING_WORKLOAD,
PP_SMC_POWER_PROFILE_POWERSAVING);
if (workload_type >= 0)
- workload_mask |= 1 << workload_type;
+ backend_workload_mask |= 1 << workload_type;
}
- smu->workload_mask |= workload_mask;
- ret = smu_cmn_send_smc_msg_with_param(smu,
- SMU_MSG_SetWorkloadMask,
- smu->workload_mask,
- NULL);
- if (!ret) {
- smu_cmn_assign_power_profile(smu);
- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) {
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- PP_SMC_POWER_PROFILE_FULLSCREEN3D);
- smu->power_profile_mode = smu->workload_mask & (1 << workload_type)
- ? PP_SMC_POWER_PROFILE_FULLSCREEN3D
- : PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
+ if (!smu->custom_profile_params) {
+ smu->custom_profile_params =
+ kzalloc(SMU_13_0_0_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
+ if (!smu->custom_profile_params)
+ return -ENOMEM;
}
+ if (custom_params && custom_params_max_idx) {
+ if (custom_params_max_idx != SMU_13_0_0_CUSTOM_PARAMS_COUNT)
+ return -EINVAL;
+ if (custom_params[0] >= SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT)
+ return -EINVAL;
+ idx = custom_params[0] * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
+ smu->custom_profile_params[idx] = 1;
+ for (i = 1; i < custom_params_max_idx; i++)
+ smu->custom_profile_params[idx + i] = custom_params[i];
+ }
+ ret = smu_v13_0_0_set_power_profile_mode_coeff(smu,
+ smu->custom_profile_params);
+ if (ret) {
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
+ } else if (smu->custom_profile_params) {
+ memset(smu->custom_profile_params, 0, SMU_13_0_0_CUSTOM_PARAMS_SIZE);
+ }
+
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_SetWorkloadMask,
+ backend_workload_mask,
+ NULL);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
+ workload_mask);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
+ return ret;
}
return ret;
@@ -3202,6 +3220,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.is_asic_wbrf_supported = smu_v13_0_0_wbrf_support_check,
.enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
.set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges,
+ .interrupt_work = smu_v13_0_interrupt_work,
};
void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index ab3c93ddce46..8ab30b2f7119 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -119,6 +119,21 @@ static inline bool smu_v13_0_6_is_other_end_count_available(struct smu_context *
}
}
+static inline bool smu_v13_0_6_is_blw_host_limit_available(struct smu_context *smu)
+{
+ if (smu->adev->flags & AMD_IS_APU)
+ return smu->smc_fw_version >= 0x04556F00;
+
+ switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ return smu->smc_fw_version >= 0x557900;
+ case IP_VERSION(13, 0, 14):
+ return smu->smc_fw_version >= 0x05551000;
+ default:
+ return false;
+ }
+}
+
struct mca_bank_ipid {
enum amdgpu_mca_ip ip;
uint16_t hwid;
@@ -193,6 +208,8 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0),
MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0),
MSG_MAP(SelectPstatePolicy, PPSMC_MSG_SelectPstatePolicy, 0),
+ MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0),
+ MSG_MAP(ResetSDMA2, PPSMC_MSG_ResetSDMA2, 0),
};
// clang-format on
@@ -304,7 +321,8 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu)
amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix,
sizeof(ucode_prefix));
- ret = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix);
+ ret = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
if (ret)
goto out;
@@ -2356,6 +2374,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
gpu_metrics->average_umc_activity =
SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, flag));
+ gpu_metrics->mem_max_bandwidth =
+ SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, flag));
+
gpu_metrics->curr_socket_power =
SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, flag));
/* Energy counter reported in 15.259uJ (2^-16) units */
@@ -2494,6 +2515,11 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
SMUQ10_ROUND(metrics_x->GfxBusy[inst]);
gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] =
SMUQ10_ROUND(metrics_x->GfxBusyAcc[inst]);
+
+ if (smu_v13_0_6_is_blw_host_limit_available(smu))
+ gpu_metrics->xcp_stats[i].gfx_below_host_limit_acc[idx] =
+ SMUQ10_ROUND(metrics_x->GfxclkBelowHostLimitAcc
+ [inst]);
idx++;
}
}
@@ -2716,6 +2742,41 @@ static int smu_v13_0_6_send_rma_reason(struct smu_context *smu)
return ret;
}
+static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
+{
+ uint32_t smu_program;
+ int ret = 0;
+
+ smu_program = (smu->smc_fw_version >> 24) & 0xff;
+ switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ if (((smu_program == 7) && (smu->smc_fw_version > 0x07550700)) ||
+ ((smu_program == 0) && (smu->smc_fw_version > 0x00557700)))
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_ResetSDMA, inst_mask, NULL);
+ else if ((smu_program == 4) &&
+ (smu->smc_fw_version > 0x4556e6c))
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_ResetSDMA2, inst_mask, NULL);
+ break;
+ case IP_VERSION(13, 0, 14):
+ if ((smu_program == 5) &&
+ (smu->smc_fw_version > 0x05550f00))
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_ResetSDMA2, inst_mask, NULL);
+ break;
+ default:
+ break;
+ }
+
+ if (ret)
+ dev_err(smu->adev->dev,
+ "failed to send ResetSDMA event with mask 0x%x\n",
+ inst_mask);
+
+ return ret;
+}
+
static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
{
struct smu_context *smu = adev->powerplay.pp_handle;
@@ -3385,6 +3446,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
.i2c_fini = smu_v13_0_6_i2c_control_fini,
.send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num,
.send_rma_reason = smu_v13_0_6_send_rma_reason,
+ .reset_sdma = smu_v13_0_6_reset_sdma,
};
void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 4fd0354bd312..55ef18517b0f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -2530,79 +2530,110 @@ out:
return result;
}
-static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
+#define SMU_13_0_7_CUSTOM_PARAMS_COUNT 8
+#define SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT 2
+#define SMU_13_0_7_CUSTOM_PARAMS_SIZE (SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT * SMU_13_0_7_CUSTOM_PARAMS_COUNT * sizeof(long))
+
+static int smu_v13_0_7_set_power_profile_mode_coeff(struct smu_context *smu,
+ long *input)
{
DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
DpmActivityMonitorCoeffInt_t *activity_monitor =
&(activity_monitor_external.DpmActivityMonitorCoeffInt);
- int workload_type, ret = 0;
+ int ret, idx;
- smu->power_profile_mode = input[size];
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external), false);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+ return ret;
+ }
- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) {
- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
- return -EINVAL;
+ idx = 0 * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Gfxclk */
+ activity_monitor->Gfx_ActiveHystLimit = input[idx + 1];
+ activity_monitor->Gfx_IdleHystLimit = input[idx + 2];
+ activity_monitor->Gfx_FPS = input[idx + 3];
+ activity_monitor->Gfx_MinActiveFreqType = input[idx + 4];
+ activity_monitor->Gfx_BoosterFreqType = input[idx + 5];
+ activity_monitor->Gfx_MinActiveFreq = input[idx + 6];
+ activity_monitor->Gfx_BoosterFreq = input[idx + 7];
+ }
+ idx = 1 * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Fclk */
+ activity_monitor->Fclk_ActiveHystLimit = input[idx + 1];
+ activity_monitor->Fclk_IdleHystLimit = input[idx + 2];
+ activity_monitor->Fclk_FPS = input[idx + 3];
+ activity_monitor->Fclk_MinActiveFreqType = input[idx + 4];
+ activity_monitor->Fclk_BoosterFreqType = input[idx + 5];
+ activity_monitor->Fclk_MinActiveFreq = input[idx + 6];
+ activity_monitor->Fclk_BoosterFreq = input[idx + 7];
}
- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
- if (size != 8)
- return -EINVAL;
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external), true);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ return ret;
+ }
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor_external), false);
- if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
- return ret;
- }
+ return ret;
+}
- switch (input[0]) {
- case 0: /* Gfxclk */
- activity_monitor->Gfx_ActiveHystLimit = input[1];
- activity_monitor->Gfx_IdleHystLimit = input[2];
- activity_monitor->Gfx_FPS = input[3];
- activity_monitor->Gfx_MinActiveFreqType = input[4];
- activity_monitor->Gfx_BoosterFreqType = input[5];
- activity_monitor->Gfx_MinActiveFreq = input[6];
- activity_monitor->Gfx_BoosterFreq = input[7];
- break;
- case 1: /* Fclk */
- activity_monitor->Fclk_ActiveHystLimit = input[1];
- activity_monitor->Fclk_IdleHystLimit = input[2];
- activity_monitor->Fclk_FPS = input[3];
- activity_monitor->Fclk_MinActiveFreqType = input[4];
- activity_monitor->Fclk_BoosterFreqType = input[5];
- activity_monitor->Fclk_MinActiveFreq = input[6];
- activity_monitor->Fclk_BoosterFreq = input[7];
- break;
- default:
- return -EINVAL;
+static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu,
+ u32 workload_mask,
+ long *custom_params,
+ u32 custom_params_max_idx)
+{
+ u32 backend_workload_mask = 0;
+ int ret, idx = -1, i;
+
+ smu_cmn_get_backend_workload_mask(smu, workload_mask,
+ &backend_workload_mask);
+
+ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
+ if (!smu->custom_profile_params) {
+ smu->custom_profile_params =
+ kzalloc(SMU_13_0_7_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
+ if (!smu->custom_profile_params)
+ return -ENOMEM;
}
-
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor_external), true);
+ if (custom_params && custom_params_max_idx) {
+ if (custom_params_max_idx != SMU_13_0_7_CUSTOM_PARAMS_COUNT)
+ return -EINVAL;
+ if (custom_params[0] >= SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT)
+ return -EINVAL;
+ idx = custom_params[0] * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
+ smu->custom_profile_params[idx] = 1;
+ for (i = 1; i < custom_params_max_idx; i++)
+ smu->custom_profile_params[idx + i] = custom_params[i];
+ }
+ ret = smu_v13_0_7_set_power_profile_mode_coeff(smu,
+ smu->custom_profile_params);
if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
return ret;
}
+ } else if (smu->custom_profile_params) {
+ memset(smu->custom_profile_params, 0, SMU_13_0_7_CUSTOM_PARAMS_SIZE);
}
- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- smu->power_profile_mode);
- if (workload_type < 0)
- return -EINVAL;
-
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
- smu->workload_mask, NULL);
+ backend_workload_mask, NULL);
- if (ret)
- dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
- else
- smu_cmn_assign_power_profile(smu);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
+ workload_mask);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
return ret;
}
@@ -2766,6 +2797,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.is_asic_wbrf_supported = smu_v13_0_7_wbrf_support_check,
.enable_uclk_shadow = smu_v13_0_enable_uclk_shadow,
.set_wbrf_exclusion_ranges = smu_v13_0_set_wbrf_exclusion_ranges,
+ .interrupt_work = smu_v13_0_interrupt_work,
};
void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
@@ -2779,4 +2811,5 @@ void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
smu->workload_map = smu_v13_0_7_workload_map;
smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION;
smu_v13_0_set_smu_mailbox_registers(smu);
+ smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index a87040cb2f2e..9b2f4fe1578b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -79,7 +79,8 @@ int smu_v14_0_init_microcode(struct smu_context *smu)
return 0;
amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
if (err)
goto out;
@@ -1511,29 +1512,24 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu,
int inst)
{
struct amdgpu_device *adev = smu->adev;
- int i, ret = 0;
+ int ret = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << inst))
+ return ret;
- if (smu->is_apu) {
- if (i == 0)
- ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0,
- i << 16U, NULL);
- else if (i == 1)
- ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1,
- i << 16U, NULL);
- } else {
+ if (smu->is_apu) {
+ if (inst == 0)
ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
- i << 16U, NULL);
- }
-
- if (ret)
- return ret;
+ SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0,
+ inst << 16U, NULL);
+ else if (inst == 1)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1,
+ inst << 16U, NULL);
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
+ inst << 16U, NULL);
}
return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 687a0f5ac94f..5cad09c5f2ff 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -1739,89 +1739,120 @@ static int smu_v14_0_2_get_power_profile_mode(struct smu_context *smu,
return size;
}
-static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
- long *input,
- uint32_t size)
+#define SMU_14_0_2_CUSTOM_PARAMS_COUNT 9
+#define SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT 2
+#define SMU_14_0_2_CUSTOM_PARAMS_SIZE (SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT * SMU_14_0_2_CUSTOM_PARAMS_COUNT * sizeof(long))
+
+static int smu_v14_0_2_set_power_profile_mode_coeff(struct smu_context *smu,
+ long *input)
{
DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
DpmActivityMonitorCoeffInt_t *activity_monitor =
&(activity_monitor_external.DpmActivityMonitorCoeffInt);
- int workload_type, ret = 0;
- uint32_t current_profile_mode = smu->power_profile_mode;
- smu->power_profile_mode = input[size];
+ int ret, idx;
- if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
- return -EINVAL;
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+ WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external),
+ false);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+ return ret;
}
- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
- if (size != 9)
- return -EINVAL;
+ idx = 0 * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Gfxclk */
+ activity_monitor->Gfx_FPS = input[idx + 1];
+ activity_monitor->Gfx_MinActiveFreqType = input[idx + 2];
+ activity_monitor->Gfx_MinActiveFreq = input[idx + 3];
+ activity_monitor->Gfx_BoosterFreqType = input[idx + 4];
+ activity_monitor->Gfx_BoosterFreq = input[idx + 5];
+ activity_monitor->Gfx_PD_Data_limit_c = input[idx + 6];
+ activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 7];
+ activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 8];
+ }
+ idx = 1 * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
+ if (input[idx]) {
+ /* Fclk */
+ activity_monitor->Fclk_FPS = input[idx + 1];
+ activity_monitor->Fclk_MinActiveFreqType = input[idx + 2];
+ activity_monitor->Fclk_MinActiveFreq = input[idx + 3];
+ activity_monitor->Fclk_BoosterFreqType = input[idx + 4];
+ activity_monitor->Fclk_BoosterFreq = input[idx + 5];
+ activity_monitor->Fclk_PD_Data_limit_c = input[idx + 6];
+ activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 7];
+ activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 8];
+ }
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
- WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor_external),
- false);
- if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
- return ret;
- }
+ ret = smu_cmn_update_table(smu,
+ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+ WORKLOAD_PPLIB_CUSTOM_BIT,
+ (void *)(&activity_monitor_external),
+ true);
+ if (ret) {
+ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+ return ret;
+ }
- switch (input[0]) {
- case 0: /* Gfxclk */
- activity_monitor->Gfx_FPS = input[1];
- activity_monitor->Gfx_MinActiveFreqType = input[2];
- activity_monitor->Gfx_MinActiveFreq = input[3];
- activity_monitor->Gfx_BoosterFreqType = input[4];
- activity_monitor->Gfx_BoosterFreq = input[5];
- activity_monitor->Gfx_PD_Data_limit_c = input[6];
- activity_monitor->Gfx_PD_Data_error_coeff = input[7];
- activity_monitor->Gfx_PD_Data_error_rate_coeff = input[8];
- break;
- case 1: /* Fclk */
- activity_monitor->Fclk_FPS = input[1];
- activity_monitor->Fclk_MinActiveFreqType = input[2];
- activity_monitor->Fclk_MinActiveFreq = input[3];
- activity_monitor->Fclk_BoosterFreqType = input[4];
- activity_monitor->Fclk_BoosterFreq = input[5];
- activity_monitor->Fclk_PD_Data_limit_c = input[6];
- activity_monitor->Fclk_PD_Data_error_coeff = input[7];
- activity_monitor->Fclk_PD_Data_error_rate_coeff = input[8];
- break;
- default:
- return -EINVAL;
- }
+ return ret;
+}
- ret = smu_cmn_update_table(smu,
- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
- WORKLOAD_PPLIB_CUSTOM_BIT,
- (void *)(&activity_monitor_external),
- true);
- if (ret) {
- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
- return ret;
- }
- }
+static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
+ u32 workload_mask,
+ long *custom_params,
+ u32 custom_params_max_idx)
+{
+ u32 backend_workload_mask = 0;
+ int ret, idx = -1, i;
+
+ smu_cmn_get_backend_workload_mask(smu, workload_mask,
+ &backend_workload_mask);
- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
+ /* disable deep sleep if compute is enabled */
+ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE))
smu_v14_0_deep_sleep_control(smu, false);
- else if (current_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
+ else
smu_v14_0_deep_sleep_control(smu, true);
- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
- workload_type = smu_cmn_to_asic_specific_index(smu,
- CMN2ASIC_MAPPING_WORKLOAD,
- smu->power_profile_mode);
- if (workload_type < 0)
- return -EINVAL;
+ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
+ if (!smu->custom_profile_params) {
+ smu->custom_profile_params =
+ kzalloc(SMU_14_0_2_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
+ if (!smu->custom_profile_params)
+ return -ENOMEM;
+ }
+ if (custom_params && custom_params_max_idx) {
+ if (custom_params_max_idx != SMU_14_0_2_CUSTOM_PARAMS_COUNT)
+ return -EINVAL;
+ if (custom_params[0] >= SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT)
+ return -EINVAL;
+ idx = custom_params[0] * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
+ smu->custom_profile_params[idx] = 1;
+ for (i = 1; i < custom_params_max_idx; i++)
+ smu->custom_profile_params[idx + i] = custom_params[i];
+ }
+ ret = smu_v14_0_2_set_power_profile_mode_coeff(smu,
+ smu->custom_profile_params);
+ if (ret) {
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
+ } else if (smu->custom_profile_params) {
+ memset(smu->custom_profile_params, 0, SMU_14_0_2_CUSTOM_PARAMS_SIZE);
+ }
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
- smu->workload_mask, NULL);
-
- if (!ret)
- smu_cmn_assign_power_profile(smu);
+ backend_workload_mask, NULL);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
+ workload_mask);
+ if (idx != -1)
+ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
return ret;
}
@@ -2065,7 +2096,7 @@ static int smu_v14_0_2_enable_gfx_features(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
- if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(14, 0, 2))
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 2))
return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableAllSmuFeatures,
FEATURE_PWR_GFX, NULL);
else
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index dbbd3759bff3..9f55207ea9bc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -1144,14 +1144,6 @@ int smu_cmn_set_mp1_state(struct smu_context *smu,
return ret;
}
-void smu_cmn_assign_power_profile(struct smu_context *smu)
-{
- uint32_t index;
- index = fls(smu->workload_mask);
- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
- smu->power_profile_mode = smu->workload_setting[index];
-}
-
bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev)
{
struct pci_dev *p = NULL;
@@ -1229,3 +1221,28 @@ void smu_cmn_generic_plpd_policy_desc(struct smu_dpm_policy *policy)
{
policy->desc = &xgmi_plpd_policy_desc;
}
+
+void smu_cmn_get_backend_workload_mask(struct smu_context *smu,
+ u32 workload_mask,
+ u32 *backend_workload_mask)
+{
+ int workload_type;
+ u32 profile_mode;
+
+ *backend_workload_mask = 0;
+
+ for (profile_mode = 0; profile_mode < PP_SMC_POWER_PROFILE_COUNT; profile_mode++) {
+ if (!(workload_mask & (1 << profile_mode)))
+ continue;
+
+ /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+ workload_type = smu_cmn_to_asic_specific_index(smu,
+ CMN2ASIC_MAPPING_WORKLOAD,
+ profile_mode);
+
+ if (workload_type < 0)
+ continue;
+
+ *backend_workload_mask |= 1 << workload_type;
+ }
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index 8a801e389659..a020277dec3e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -130,8 +130,6 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev);
int smu_cmn_set_mp1_state(struct smu_context *smu,
enum pp_mp1_state mp1_state);
-void smu_cmn_assign_power_profile(struct smu_context *smu);
-
/*
* Helper function to make sysfs_emit_at() happy. Align buf to
* the current page boundary and record the offset.
@@ -149,5 +147,9 @@ bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev);
void smu_cmn_generic_soc_policy_desc(struct smu_dpm_policy *policy);
void smu_cmn_generic_plpd_policy_desc(struct smu_dpm_policy *policy);
+void smu_cmn_get_backend_workload_mask(struct smu_context *smu,
+ u32 workload_mask,
+ u32 *backend_workload_mask);
+
#endif
#endif