diff options
Diffstat (limited to 'drivers/gpu/drm/amd/display/dc/dml')
14 files changed, 912 insertions, 512 deletions
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 359f6e9a1da0..86a3b5bfd699 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -61,7 +61,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) @@ -71,6 +70,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) @@ -82,7 +82,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare @@ -131,6 +130,7 @@ DML += dcn321/dcn321_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o +DML += dcn314/dcn314_fpu.o DML += dsc/rc_calc_fpu.o DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index ca44df4fca74..d34e0f1314d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -30,6 +30,7 @@ #include "dchubbub.h" #include "dcn20/dcn20_resource.h" #include "dcn21/dcn21_resource.h" +#include "clk_mgr/dcn21/rn_clk_mgr.h" #include "dcn20_fpu.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 7ef66e511ec8..d211cf6d234c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -26,6 +26,7 @@ #include "clk_mgr.h" #include "dcn20/dcn20_resource.h" #include "dcn301/dcn301_resource.h" +#include "clk_mgr/dcn301/vg_clk_mgr.h" #include "dml/dcn20/dcn20_fpu.h" #include "dcn301_fpu.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index e36cfa5985ea..149a1b17cdf3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -25,6 +25,9 @@ #include "resource.h" #include "clk_mgr.h" +#include "dcn31/dcn31_resource.h" +#include "dcn315/dcn315_resource.h" +#include "dcn316/dcn316_resource.h" #include "dml/dcn20/dcn20_fpu.h" #include "dcn31_fpu.h" @@ -114,7 +117,7 @@ struct _vcs_dpi_ip_params_st dcn3_1_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { /*TODO: correct dispclk/dppclk voltage level determination*/ .clock_limits = { { @@ -259,7 +262,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { .sr_exit_time_us = 9.0, .sr_enter_plus_exit_time_us = 11.0, .sr_exit_z8_time_us = 50.0, @@ -355,7 +358,7 @@ struct _vcs_dpi_ip_params_st dcn3_16_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { /*TODO: correct dispclk/dppclk voltage level determination*/ .clock_limits = { { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 3fab19134480..d63b4209b14c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -26,7 +26,7 @@ #include "dc.h" #include "dc_link.h" #include "../display_mode_lib.h" -#include "dml/dcn30/display_mode_vba_30.h" +#include "../dcn30/display_mode_vba_30.h" #include "display_mode_vba_31.h" #include "../dml_inline_defs.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index 66b82e4f05c6..35d10b4d018b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -27,7 +27,7 @@ #include "../display_mode_vba.h" #include "../dml_inline_defs.h" #include "display_rq_dlg_calc_31.h" -#include "dml/dcn30/display_mode_vba_30.h" +#include "../dcn30/display_mode_vba_30.h" static bool is_dual_plane(enum source_format_class source_format) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c new file mode 100644 index 000000000000..34a5d0f87b5f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "clk_mgr.h" +#include "resource.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn314_fpu.h" +#include "dml/dcn20/dcn20_fpu.h" +#include "dml/display_mode_vba.h" + +struct _vcs_dpi_ip_params_st dcn3_14_ip = { + .VBlankNomDefaultUS = 668, + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1792, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 1, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 2, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 3, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 4, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + + +void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st *clock_limits = + dcn3_14_soc.clock_limits; + unsigned int i, closest_clk_lvl; + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) { + + dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; + + if (bw_params->num_channels > 0) + dcn3_14_soc.num_chans = bw_params->num_channels; + + ASSERT(dcn3_14_soc.num_chans); + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_14_soc.num_states - 1; + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + } + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + + if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio) + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_14_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_14_soc.num_states = clk_table->num_entries; + } + } + + if (max_dispclk_mhz) { + dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); +} + +static bool is_dual_plane(enum surface_pixel_format format) +{ + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; +} + +int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe; + bool upscaled = false; + + dc_assert_fp_enabled(); + + dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min + && pipe->stream->adjust.v_total_min > timing->v_total) + pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; + + if (pipe->plane_state && + (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || + pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) + upscaled = true; + + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; + pipes[pipe_cnt].pipe.src.gpuvm = true; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + pipes[pipe_cnt].pipe.src.dcc_rate = 3; + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + + pipe_cnt++; + } + context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE; + + dc->config.enable_4to1MPC = false; + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { + if (is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { + dc->config.enable_4to1MPC = true; + } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { + /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + pipes[0].pipe.src.unbounded_req_mode = true; + } + } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count + && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; + } else if (context->stream_count >= 3 && upscaled) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine && + pipe->stream->apply_seamless_boot_optimization) { + + if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) { + context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1; + break; + } + } + } + + return pipe_cnt; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h new file mode 100644 index 000000000000..d32c5bb99f4c --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN314_FPU_H__ +#define __DCN314_FPU_H__ + +#define DCN3_14_DEFAULT_DET_SIZE 384 +#define DCN3_14_MAX_DET_SIZE 384 +#define DCN3_14_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_14_CRB_SEGMENT_SIZE_KB 64 + +void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); +int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 66453546e24f..8118cfc5b405 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -473,8 +473,11 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, // DML calculation for MALL region doesn't take into account FW delay // and required pstate allow width for multi-display cases + /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned + * to 2 swaths (i.e. 16 lines) + */ phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + - pstate_width_fw_delay_lines; + pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines; // For backporch of phantom pipe, use vstartup of the main pipe phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); @@ -490,6 +493,7 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, phantom_stream->timing.v_front_porch + phantom_stream->timing.v_sync_width + phantom_bp; + phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing } /** @@ -983,9 +987,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, * DML favors voltage over p-state, but we're more interested in * supporting p-state over voltage. We can't support p-state in * prefetch mode > 0 so try capping the prefetch mode to start. + * Override present for testing. */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + if (dc->debug.dml_disallow_alternate_prefetch_modes) + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_uclk_fclk_and_stutter; + else + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); /* This may adjust vlevel and maxMpcComb */ if (*vlevel < context->bw_ctx.dml.soc.num_states) @@ -1014,7 +1024,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched * enough to support MCLK switching. */ - if (*vlevel == context->bw_ctx.dml.soc.num_states) { + if (*vlevel == context->bw_ctx.dml.soc.num_states && + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final == + dm_prefetch_support_uclk_fclk_and_stutter) { context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_stutter; /* There are params (such as FabricClock) that need to be recalculated @@ -1344,7 +1356,8 @@ bool dcn32_internal_validate_bw(struct dc *dc, int split[MAX_PIPES] = { 0 }; bool merge[MAX_PIPES] = { false }; bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; + int pipe_cnt, i, pipe_idx; + int vlevel = context->bw_ctx.dml.soc.num_states; struct vba_vars_st *vba = &context->bw_ctx.dml.vba; dc_assert_fp_enabled(); @@ -1373,17 +1386,22 @@ bool dcn32_internal_validate_bw(struct dc *dc, DC_FP_END(); } - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + if (fast_validate || + (dc->debug.dml_disallow_alternate_prefetch_modes && + (vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) { /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. + * If dml_disallow_alternate_prefetch_modes is false, then we have already + * tried alternate prefetch modes during full validation. + * + * If mode is unsupported or there is no p-state support, then + * fall back to favouring voltage. * - * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if - * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2) + * If Prefetch mode 0 failed for this config, or passed with Max UCLK, then try + * to support with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2) */ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_fclk_and_stutter; + dm_prefetch_support_fclk_and_stutter; vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); @@ -2098,6 +2116,13 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } + if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000) + != dc->bb_overrides.fclk_clock_change_latency_ns + && dc->bb_overrides.fclk_clock_change_latency_ns) { + dcn3_2_soc.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + } + if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) != dc->bb_overrides.dummy_clock_change_latency_ns && dc->bb_overrides.dummy_clock_change_latency_ns) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 890612db08dc..cb2025771646 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -221,7 +221,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman // VBA_DELTA // Calculate DET size, swath height dml32_CalculateSwathAndDETConfiguration( - &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -461,7 +460,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { dml32_CalculateVMRowAndSwath( - &v->dummy_vars.dml32_CalculateVMRowAndSwath, mode_lib->vba.NumberOfActiveSurfaces, v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters, v->SurfaceSizeInMALL, @@ -757,9 +755,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; - v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( - &v->dummy_vars.dml32_CalculatePrefetchSchedule, - v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelaySCL, @@ -1167,7 +1163,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, mode_lib->vba.USRRetrainingRequiredFinal, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], @@ -1952,7 +1947,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } dml32_CalculateSwathAndDETConfiguration( - &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -2549,7 +2543,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } dml32_CalculateSwathAndDETConfiguration( - &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -2749,7 +2742,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateVMRowAndSwath( - &v->dummy_vars.dml32_CalculateVMRowAndSwath, mode_lib->vba.NumberOfActiveSurfaces, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters, mode_lib->vba.SurfaceSizeInMALL, @@ -3266,7 +3258,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( - &v->dummy_vars.dml32_CalculatePrefetchSchedule, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, mode_lib->vba.DSCDelayPerState[i][k], @@ -3566,7 +3557,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, mode_lib->vba.USRRetrainingRequiredFinal, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.PrefetchModePerState[i][j], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 07f8f3b8626b..05fc14a47fba 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -391,7 +391,6 @@ void dml32_CalculateBytePerPixelAndBlockSizes( } // CalculateBytePerPixelAndBlockSizes void dml32_CalculateSwathAndDETConfiguration( - struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, @@ -456,10 +455,18 @@ void dml32_CalculateSwathAndDETConfiguration( bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport) { + unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; + unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpSwathSizeBytesY; + unsigned int RoundedUpSwathSizeBytesC; + double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; + double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; unsigned int k; - - st_vars->TotalActiveDPP = 0; - st_vars->NoChromaSurfaces = true; + unsigned int TotalActiveDPP = 0; + bool NoChromaSurfaces = true; + unsigned int DETBufferSizeInKByteForSwathCalculation; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); @@ -494,43 +501,43 @@ void dml32_CalculateSwathAndDETConfiguration( DPPPerSurface, /* Output */ - st_vars->SwathWidthdoubleDPP, - st_vars->SwathWidthdoubleDPPChroma, + SwathWidthdoubleDPP, + SwathWidthdoubleDPPChroma, SwathWidth, SwathWidthChroma, - st_vars->MaximumSwathHeightY, - st_vars->MaximumSwathHeightC, + MaximumSwathHeightY, + MaximumSwathHeightC, swath_width_luma_ub, swath_width_chroma_ub); for (k = 0; k < NumberOfActiveSurfaces; ++k) { - st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k]; - st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k]; + RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; + RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, - st_vars->RoundedUpMaxSwathSizeBytesY[k]); + RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, - st_vars->RoundedUpMaxSwathSizeBytesC[k]); + RoundedUpMaxSwathSizeBytesC[k]); #endif if (SourcePixelFormat[k] == dm_420_10) { - st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256); - st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256); + RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); + RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); + TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { - st_vars->NoChromaSurfaces = false; + NoChromaSurfaces = false; } } @@ -540,10 +547,10 @@ void dml32_CalculateSwathAndDETConfiguration( // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req - *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512); + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); if (*CompBufReservedSpaceNeedAdjustment == 1) { - *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512; + *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; } #ifdef __DML_VBA_DEBUG__ @@ -551,7 +558,7 @@ void dml32_CalculateSwathAndDETConfiguration( dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); #endif - *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); dml32_CalculateDETBufferSize(DETSizeOverride, UseMALLForPStateChange, @@ -566,8 +573,8 @@ void dml32_CalculateSwathAndDETConfiguration( SourcePixelFormat, ReadBandwidthLuma, ReadBandwidthChroma, - st_vars->RoundedUpMaxSwathSizeBytesY, - st_vars->RoundedUpMaxSwathSizeBytesC, + RoundedUpMaxSwathSizeBytesY, + RoundedUpMaxSwathSizeBytesC, DPPPerSurface, /* Output */ @@ -575,7 +582,7 @@ void dml32_CalculateSwathAndDETConfiguration( CompressedBufferSizeInkByte); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP); + dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); @@ -586,42 +593,42 @@ void dml32_CalculateSwathAndDETConfiguration( *ViewportSizeSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == + DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, - st_vars->DETBufferSizeInKByteForSwathCalculation); + DETBufferSizeInKByteForSwathCalculation); #endif - if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= - st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; - SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; - st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; - st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; - } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && - st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= - st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; - st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; - st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; - } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && - st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <= - st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; - SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; - st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; - st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; + if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = MaximumSwathHeightY[k]; + SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; + } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && + RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = MaximumSwathHeightC[k]; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; + } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && + RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = MaximumSwathHeightY[k]; + SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; } else { - SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; - st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; - st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; + SwathHeightY[k] = MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = MaximumSwathHeightC[k] / 2; + RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; + RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; } - if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 > - st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) + if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > + DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { *ViewportSizeSupport = false; @@ -636,7 +643,7 @@ void dml32_CalculateSwathAndDETConfiguration( #endif DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; DETBufferSizeC[k] = 0; - } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) { + } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); #endif @@ -654,11 +661,11 @@ void dml32_CalculateSwathAndDETConfiguration( dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, - k, st_vars->RoundedUpMaxSwathSizeBytesY[k]); + k, RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, - k, st_vars->RoundedUpMaxSwathSizeBytesC[k]); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC); + k, RoundedUpMaxSwathSizeBytesC[k]); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); @@ -1867,7 +1874,6 @@ void dml32_CalculateSurfaceSizeInMall( } // CalculateSurfaceSizeInMall void dml32_CalculateVMRowAndSwath( - struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], @@ -1933,6 +1939,21 @@ void dml32_CalculateVMRowAndSwath( unsigned int BIGK_FRAGMENT_SIZE[]) { unsigned int k; + unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; + unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; + unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; + bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (HostVMEnable == true) { @@ -1954,15 +1975,15 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && !IsVertical(myPipe[k].SourceRotation)) { - st_vars->PTEBufferSizeInRequestsForLuma[k] = + PTEBufferSizeInRequestsForLuma[k] = (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; - st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k]; + PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; } else { - st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; - st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; + PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; + PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; } - st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( + PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -1982,21 +2003,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - st_vars->PTEBufferSizeInRequestsForChroma[k], + PTEBufferSizeInRequestsForChroma[k], myPipe[k].PitchC, myPipe[k].DCCMetaPitchC, myPipe[k].BlockWidthC, myPipe[k].BlockHeightC, /* Output */ - &st_vars->MetaRowByteC[k], - &st_vars->PixelPTEBytesPerRowC[k], + &MetaRowByteC[k], + &PixelPTEBytesPerRowC[k], &dpte_row_width_chroma_ub[k], &dpte_row_height_chroma[k], &dpte_row_height_linear_chroma[k], - &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k], - &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k], - &st_vars->dpte_row_height_chroma_one_row_per_frame[k], + &PixelPTEBytesPerRowC_one_row_per_frame[k], + &dpte_row_width_chroma_ub_one_row_per_frame[k], + &dpte_row_height_chroma_one_row_per_frame[k], &meta_req_width_chroma[k], &meta_req_height_chroma[k], &meta_row_width_chroma[k], @@ -2024,19 +2045,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillC[k], &MaxNumSwathC[k]); } else { - st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; - st_vars->PTEBufferSizeInRequestsForChroma[k] = 0; - st_vars->PixelPTEBytesPerRowC[k] = 0; - st_vars->PDEAndMetaPTEBytesFrameC = 0; - st_vars->MetaRowByteC[k] = 0; + PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; + PTEBufferSizeInRequestsForChroma[k] = 0; + PixelPTEBytesPerRowC[k] = 0; + PDEAndMetaPTEBytesFrameC = 0; + MetaRowByteC[k] = 0; MaxNumSwathC[k] = 0; PrefetchSourceLinesC[k] = 0; - st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0; - st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; - st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + dpte_row_height_chroma_one_row_per_frame[k] = 0; + dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; } - st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( + PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -2056,21 +2077,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - st_vars->PTEBufferSizeInRequestsForLuma[k], + PTEBufferSizeInRequestsForLuma[k], myPipe[k].PitchY, myPipe[k].DCCMetaPitchY, myPipe[k].BlockWidthY, myPipe[k].BlockHeightY, /* Output */ - &st_vars->MetaRowByteY[k], - &st_vars->PixelPTEBytesPerRowY[k], + &MetaRowByteY[k], + &PixelPTEBytesPerRowY[k], &dpte_row_width_luma_ub[k], &dpte_row_height_luma[k], &dpte_row_height_linear_luma[k], - &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k], - &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k], - &st_vars->dpte_row_height_luma_one_row_per_frame[k], + &PixelPTEBytesPerRowY_one_row_per_frame[k], + &dpte_row_width_luma_ub_one_row_per_frame[k], + &dpte_row_height_luma_one_row_per_frame[k], &meta_req_width[k], &meta_req_height[k], &meta_row_width[k], @@ -2098,19 +2119,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillY[k], &MaxNumSwathY[k]); - PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC; - MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k]; + PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; + MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; - if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] && - st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) { + if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && + PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { PTEBufferSizeNotExceeded[k] = true; } else { PTEBufferSizeNotExceeded[k] = false; } - st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * - st_vars->PTEBufferSizeInRequestsForLuma[k] && - st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]); + one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * + PTEBufferSizeInRequestsForLuma[k] && + PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); } dml32_CalculateMALLUseForStaticScreen( @@ -2118,7 +2139,7 @@ void dml32_CalculateVMRowAndSwath( MALLAllocatedForDCN, UseMALLForStaticScreen, // mode SurfaceSizeInMALL, - st_vars->one_row_per_frame_fits_in_buffer, + one_row_per_frame_fits_in_buffer, /* Output */ UsesMALLForStaticScreen); // boolen @@ -2144,13 +2165,13 @@ void dml32_CalculateVMRowAndSwath( !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); if (use_one_row_for_frame[k]) { - dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k]; - dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k]; - st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k]; - dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k]; - dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k]; - st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k]; - PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k]; + dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; + dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; + PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; + dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; + dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; + PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; + PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; } if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) @@ -2158,7 +2179,7 @@ void dml32_CalculateVMRowAndSwath( else DCCMetaBufferSizeNotExceeded[k] = false; - PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k]; + PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; if (use_one_row_for_frame[k]) PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; @@ -2169,11 +2190,11 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].VRatioChroma, myPipe[k].DCCEnable, myPipe[k].HTotal / myPipe[k].PixelClock, - st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k], + MetaRowByteY[k], MetaRowByteC[k], meta_row_height[k], meta_row_height_chroma[k], - st_vars->PixelPTEBytesPerRowY[k], - st_vars->PixelPTEBytesPerRowC[k], + PixelPTEBytesPerRowY[k], + PixelPTEBytesPerRowC[k], dpte_row_height_luma[k], dpte_row_height_chroma[k], @@ -2189,12 +2210,12 @@ void dml32_CalculateVMRowAndSwath( dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", __func__, k, dpte_row_width_luma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", __func__, k, dpte_row_height_chroma[k]); dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", __func__, k, dpte_row_width_chroma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", __func__, k, PTEBufferSizeNotExceeded[k]); @@ -3342,7 +3363,6 @@ double dml32_CalculateExtraLatency( } // CalculateExtraLatency bool dml32_CalculatePrefetchSchedule( - struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, @@ -3406,18 +3426,45 @@ bool dml32_CalculatePrefetchSchedule( double *VReadyOffsetPix) { bool MyError = false; - - st_vars->TimeForFetchingMetaPTE = 0; - st_vars->TimeForFetchingRowInVBlank = 0; - st_vars->LinesToRequestPrefetchPixelData = 0; - st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__; - st_vars->Tsw_est1 = 0; - st_vars->Tsw_est3 = 0; + unsigned int DPPCycles, DISPCLKCycles; + double DSTTotalPixelsAfterScaler; + double LineTime; + double dst_y_prefetch_equ; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE = 0; + double TimeForFetchingRowInVBlank = 0; + double LinesToRequestPrefetchPixelData = 0; + unsigned int HostVMDynamicLevelsTrips; + double trip_to_mem; + double Tvm_trips; + double Tr0_trips; + double Tvm_trips_rounded; + double Tr0_trips_rounded; + double Lsw_oto; + double Tpre_rounded; + double prefetch_bw_equ; + double Tvm_equ; + double Tr0_equ; + double Tdmbf; + double Tdmec; + double Tdmsks; + double prefetch_sw_bytes; + double bytes_pp; + double dep_bytes; + unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; + double min_Lsw; + double Tsw_est1 = 0; + double Tsw_est3 = 0; if (GPUVMEnable == true && HostVMEnable == true) - st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; else - st_vars->HostVMDynamicLevelsTrips = 0; + HostVMDynamicLevelsTrips = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); @@ -3440,19 +3487,19 @@ bool dml32_CalculatePrefetchSchedule( TSetup, /* output */ - &st_vars->Tdmbf, - &st_vars->Tdmec, - &st_vars->Tdmsks, + &Tdmbf, + &Tdmec, + &Tdmsks, VUpdateOffsetPix, VUpdateWidthPix, VReadyOffsetPix); - st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock; - st_vars->trip_to_mem = UrgentLatency; - st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); + LineTime = myPipe->HTotal / myPipe->PixelClock; + trip_to_mem = UrgentLatency; + Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); if (DynamicMetadataVMEnabled == true) - *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem; + *Tdmdl = TWait + Tvm_trips + trip_to_mem; else *Tdmdl = TWait + UrgentExtraLatency; @@ -3462,15 +3509,15 @@ bool dml32_CalculatePrefetchSchedule( #endif if (DynamicMetadataEnable == true) { - if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) { + if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", - __func__, st_vars->Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); + __func__, Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", - __func__, st_vars->Tdmsks); + __func__, Tdmsks); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); #endif @@ -3482,21 +3529,21 @@ bool dml32_CalculatePrefetchSchedule( } *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && - GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0); + GPUVMEnable == true ? TWait + Tvm_trips : 0); if (myPipe->ScalerEnabled) - st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; else - st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; - st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; - st_vars->DISPCLKCycles = DISPCLKDelaySubtotal; + DISPCLKCycles = DISPCLKDelaySubtotal; if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) return true; - *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles * + *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * myPipe->PixelClock / myPipe->Dispclk + DSCDelay; *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) @@ -3506,10 +3553,10 @@ bool dml32_CalculatePrefetchSchedule( + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles); + dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); - dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles); + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); @@ -3522,9 +3569,9 @@ bool dml32_CalculatePrefetchSchedule( else *DSTYAfterScaler = 0; - st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; - *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); - *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); @@ -3532,132 +3579,132 @@ bool dml32_CalculatePrefetchSchedule( MyError = false; - st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1); + Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); if (GPUVMEnable == true) { - st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; - st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; + Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; + Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; if (GPUVMPageTableLevels >= 3) { - *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem * - (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); + *Tno_bw = UrgentExtraLatency + trip_to_mem * + (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { - st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) / - 4.0 * st_vars->LineTime; // VBA_ERROR + Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / + 4.0 * LineTime; // VBA_ERROR *Tno_bw = UrgentExtraLatency; } else { *Tno_bw = 0; } } else if (myPipe->DCCEnable == true) { - st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; - st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; + Tvm_trips_rounded = LineTime / 4.0; + Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; *Tno_bw = 0; } else { - st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; - st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0; + Tvm_trips_rounded = LineTime / 4.0; + Tr0_trips_rounded = LineTime / 2.0; *Tno_bw = 0; } - st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0); - st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0); + Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); + Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) { - st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; + bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; } else { - st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; + bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; } - st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; - st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, - st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime)); + prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, + prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); - st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre; - st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0); - st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0; + min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; + min_Lsw = dml_max(min_Lsw, 1.0); + Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; if (GPUVMEnable == true) { - st_vars->Tvm_oto = dml_max3( - st_vars->Tvm_trips, - *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto, - st_vars->LineTime / 4.0); + Tvm_oto = dml_max3( + Tvm_trips, + *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, + LineTime / 4.0); } else - st_vars->Tvm_oto = st_vars->LineTime / 4.0; + Tvm_oto = LineTime / 4.0; if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - st_vars->Tr0_oto = dml_max4( - st_vars->Tr0_trips, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto, - (st_vars->LineTime - st_vars->Tvm_oto)/2.0, - st_vars->LineTime / 4.0); + Tr0_oto = dml_max4( + Tr0_trips, + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, + (LineTime - Tvm_oto)/2.0, + LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips); - dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto); - dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4); + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); #endif } else - st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0; + Tr0_oto = (LineTime - Tvm_oto) / 2.0; - st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0; - st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0; - st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto; + Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; + Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; + dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; - st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime - + dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); - dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw); + dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); - dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem); + dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); - dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes); - dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp); + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); + dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips); - dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips); - dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto); - dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto); - dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines); - dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines); - dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto); - dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto); - dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ); + dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); + dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); #endif - st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0; - st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime; + dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; + Tpre_rounded = dst_y_prefetch_equ * LineTime; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ); - dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime); + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); + dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", - __func__, VStartup * st_vars->LineTime); + __func__, VStartup * LineTime); dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", __func__, *DSTYAfterScaler); #endif - st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, + dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); - if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes) - st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes; + if (prefetch_sw_bytes < dep_bytes) + prefetch_sw_bytes = 2 * dep_bytes; *PrefetchBandwidth = 0; *DestinationLinesToRequestVMInVBlank = 0; @@ -3665,61 +3712,61 @@ bool dml32_CalculatePrefetchSchedule( *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; - if (st_vars->dst_y_prefetch_equ > 1) { + if (dst_y_prefetch_equ > 1) { double PrefetchBandwidth1; double PrefetchBandwidth2; double PrefetchBandwidth3; double PrefetchBandwidth4; - if (st_vars->Tpre_rounded - *Tno_bw > 0) { + if (Tpre_rounded - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw); - st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1; + + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); + Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; } else PrefetchBandwidth1 = 0; - if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw) - && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) { + if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) + && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw); + / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); } - if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0) - PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) / - (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded); + if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / + (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); else PrefetchBandwidth2 = 0; - if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) { + if (Tpre_rounded - Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded); - st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3; + + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); + Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; } else PrefetchBandwidth3 = 0; if (VStartup == MaxVStartup && - (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * - st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) { + (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * + LineTime - Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded); + / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); } - if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) { - PrefetchBandwidth4 = st_vars->prefetch_sw_bytes / - (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded); + if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { + PrefetchBandwidth4 = prefetch_sw_bytes / + (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); } else { PrefetchBandwidth4 = 0; } #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded); + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); - dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded); - dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1); - dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); + dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); + dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); @@ -3732,9 +3779,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth1 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 - >= st_vars->Tvm_trips_rounded + >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) { + / PrefetchBandwidth1 >= Tr0_trips_rounded) { Case1OK = true; } else { Case1OK = false; @@ -3745,9 +3792,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth2 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 - >= st_vars->Tvm_trips_rounded + >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) { + / PrefetchBandwidth2 < Tr0_trips_rounded) { Case2OK = true; } else { Case2OK = false; @@ -3758,9 +3805,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth3 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < - st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * + Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= - st_vars->Tr0_trips_rounded) { + Tr0_trips_rounded) { Case3OK = true; } else { Case3OK = false; @@ -3770,80 +3817,80 @@ bool dml32_CalculatePrefetchSchedule( } if (Case1OK) - st_vars->prefetch_bw_equ = PrefetchBandwidth1; + prefetch_bw_equ = PrefetchBandwidth1; else if (Case2OK) - st_vars->prefetch_bw_equ = PrefetchBandwidth2; + prefetch_bw_equ = PrefetchBandwidth2; else if (Case3OK) - st_vars->prefetch_bw_equ = PrefetchBandwidth3; + prefetch_bw_equ = PrefetchBandwidth3; else - st_vars->prefetch_bw_equ = PrefetchBandwidth4; + prefetch_bw_equ = PrefetchBandwidth4; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); - dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); #endif - if (st_vars->prefetch_bw_equ > 0) { + if (prefetch_bw_equ > 0) { if (GPUVMEnable == true) { - st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * - HostVMInefficiencyFactor / st_vars->prefetch_bw_equ, - st_vars->Tvm_trips, st_vars->LineTime / 4); + Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * + HostVMInefficiencyFactor / prefetch_bw_equ, + Tvm_trips, LineTime / 4); } else { - st_vars->Tvm_equ = st_vars->LineTime / 4; + Tvm_equ = LineTime / 4; } if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * - HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips, - (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4); + Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * + HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, + (LineTime - Tvm_equ) / 2, LineTime / 4); } else { - st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2; + Tr0_equ = (LineTime - Tvm_equ) / 2; } } else { - st_vars->Tvm_equ = 0; - st_vars->Tr0_equ = 0; + Tvm_equ = 0; + Tr0_equ = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); #endif } } - if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) { - *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto; - st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto; - st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto; - *PrefetchBandwidth = st_vars->prefetch_bw_oto; + if (dst_y_prefetch_oto < dst_y_prefetch_equ) { + *DestinationLinesForPrefetch = dst_y_prefetch_oto; + TimeForFetchingMetaPTE = Tvm_oto; + TimeForFetchingRowInVBlank = Tr0_oto; + *PrefetchBandwidth = prefetch_bw_oto; } else { - *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ; - st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ; - st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ; - *PrefetchBandwidth = st_vars->prefetch_bw_equ; + *DestinationLinesForPrefetch = dst_y_prefetch_equ; + TimeForFetchingMetaPTE = Tvm_equ; + TimeForFetchingRowInVBlank = Tr0_equ; + *PrefetchBandwidth = prefetch_bw_equ; } - *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0; + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; *DestinationLinesToRequestRowInVBlank = - dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0; + dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; - st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - + LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); - dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); #endif - if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) { - *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData; + if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { + *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); @@ -3851,12 +3898,12 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); #endif if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { - if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / - st_vars->LinesToRequestPrefetchPixelData, + LinesToRequestPrefetchPixelData, (double) MaxNumSwathY * SwathHeightY / - (st_vars->LinesToRequestPrefetchPixelData - + (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); } else { @@ -3870,7 +3917,7 @@ bool dml32_CalculatePrefetchSchedule( #endif } - *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); #ifdef __DML_VBA_DEBUG__ @@ -3879,11 +3926,11 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); #endif if ((SwathHeightC > 4)) { - if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { *VRatioPrefetchC = dml_max(*VRatioPrefetchC, (double) MaxNumSwathC * SwathHeightC / - (st_vars->LinesToRequestPrefetchPixelData - + (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); } else { @@ -3898,25 +3945,25 @@ bool dml32_CalculatePrefetchSchedule( } *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY - / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub - / st_vars->LineTime; + / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub + / LineTime; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); - dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); #endif *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / - st_vars->LinesToRequestPrefetchPixelData + LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC - * swath_width_chroma_ub / st_vars->LineTime; + * swath_width_chroma_ub / LineTime; } else { MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", - __func__, st_vars->LinesToRequestPrefetchPixelData); + __func__, LinesToRequestPrefetchPixelData); #endif *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; @@ -3925,15 +3972,15 @@ bool dml32_CalculatePrefetchSchedule( } #ifdef __DML_VBA_DEBUG__ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", - (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime + - 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE); - dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE); + (double)LinesToRequestPrefetchPixelData * LineTime + + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); dml_print("DML: To: %fus - time for propagation from scaler to optc\n", - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime); + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime - - st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler + - ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - + TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); #endif @@ -3941,7 +3988,7 @@ bool dml32_CalculatePrefetchSchedule( MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", - __func__, st_vars->dst_y_prefetch_equ); + __func__, dst_y_prefetch_equ); #endif } @@ -3957,10 +4004,10 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); #endif prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / - (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime); + (*DestinationLinesToRequestVMInVBlank * LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif @@ -3977,7 +4024,7 @@ bool dml32_CalculatePrefetchSchedule( prefetch_row_bw = 0; } else if (*DestinationLinesToRequestRowInVBlank > 0) { prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / - (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime); + (*DestinationLinesToRequestRowInVBlank * LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); @@ -4000,12 +4047,12 @@ bool dml32_CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - st_vars->TimeForFetchingMetaPTE = 0; - st_vars->TimeForFetchingRowInVBlank = 0; + TimeForFetchingMetaPTE = 0; + TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - st_vars->LinesToRequestPrefetchPixelData = 0; + LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; @@ -4159,7 +4206,6 @@ void dml32_CalculateFlipSchedule( } // CalculateFlipSchedule void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, @@ -4221,15 +4267,37 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( double ActiveDRAMClockChangeLatencyMargin[]) { unsigned int i, j, k; - - st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0; - st_vars->DRAMClockChangeSupportNumber = 0; - st_vars->DRAMClockChangeMethod = 0; - st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; - st_vars->MinActiveFCLKChangeMargin = 0.; - st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; - st_vars->TotalPixelBW = 0.0; - st_vars->TotalActiveWriteback = 0; + unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; + unsigned int DRAMClockChangeSupportNumber = 0; + unsigned int LastSurfaceWithoutMargin; + unsigned int DRAMClockChangeMethod = 0; + bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; + double MinActiveFCLKChangeMargin = 0.; + double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; + double ActiveClockChangeLatencyHidingY; + double ActiveClockChangeLatencyHidingC; + double ActiveClockChangeLatencyHiding; + double EffectiveDETBufferSizeY; + double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; + double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; + double TotalPixelBW = 0.0; + bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double LinesInDETY[DC__NUM_DPP__MAX]; + double LinesInDETC[DC__NUM_DPP__MAX]; + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; + unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; + double FullDETBufferingTimeY; + double FullDETBufferingTimeC; + double WritebackDRAMClockChangeLatencyMargin; + double WritebackFCLKChangeLatencyMargin; + double WritebackLatencyHiding; + bool SameTimingForFCLKChange; + + unsigned int TotalActiveWriteback = 0; + unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; + unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency @@ -4261,13 +4329,13 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif - st_vars->TotalActiveWriteback = 0; + TotalActiveWriteback = 0; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (WritebackEnable[k] == true) - st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1; + TotalActiveWriteback = TotalActiveWriteback + 1; } - if (st_vars->TotalActiveWriteback <= 1) { + if (TotalActiveWriteback <= 1) { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; } else { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency @@ -4277,7 +4345,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark + mmSOCParameters.USRRetrainingLatency; - if (st_vars->TotalActiveWriteback <= 1) { + if (TotalActiveWriteback <= 1) { Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.WritebackLatency; Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency @@ -4307,14 +4375,14 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif for (k = 0; k < NumberOfActiveSurfaces; ++k) { - st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + + TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); - st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); + LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); + LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); #ifdef __DML_VBA_DEBUG__ @@ -4325,72 +4393,72 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); #endif - st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); - st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); - st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k]; + EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); + EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + EffectiveDETBufferSizeY = DETBufferSizeY[k]; if (UnboundedRequestEnabled) { - st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY + EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + CompressedBufferSizeInkByte * 1024 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) - / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW; + / (HTotal[k] / PixelClock[k]) / TotalPixelBW; } - st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; - st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]); - st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; + LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); + FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; - st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY + ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY + ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; } if (BytePerPixelDETC[k] > 0) { - st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; - st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]); - st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) + LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); + FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; - st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC + ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC + ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; } - st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY, - st_vars->ActiveClockChangeLatencyHidingC); + ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, + ActiveClockChangeLatencyHidingC); } else { - st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY; + ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; } - ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->DRAMClockChangeWatermark; - st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->FCLKChangeWatermark; - st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; + USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; if (WritebackEnable[k]) { - st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 + WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); if (WritebackPixelFormat[k] == dm_444_64) - st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2; + WritebackLatencyHiding = WritebackLatencyHiding / 2; - st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding + WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding - Watermark->WritebackDRAMClockChangeWatermark; - st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding + WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding - Watermark->WritebackFCLKChangeWatermark; ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], - st_vars->WritebackFCLKChangeLatencyMargin); - st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k], - st_vars->WritebackDRAMClockChangeLatencyMargin); + WritebackFCLKChangeLatencyMargin); + ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], + WritebackDRAMClockChangeLatencyMargin); } MaxActiveDRAMClockChangeLatencySupported[k] = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? @@ -4409,41 +4477,41 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (DRRDisplay[i] || DRRDisplay[j]))) { - st_vars->SynchronizedSurfaces[i][j] = true; + SynchronizedSurfaces[i][j] = true; } else { - st_vars->SynchronizedSurfaces[i][j] = false; + SynchronizedSurfaces[i][j] = false; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin || - st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) { - st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; - st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k]; - st_vars->SurfaceWithMinActiveFCLKChangeMargin = k; + (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || + ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { + FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; + MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; + SurfaceWithMinActiveFCLKChangeMargin = k; } } - *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; + *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; - st_vars->SameTimingForFCLKChange = true; + SameTimingForFCLKChange = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) { + if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (st_vars->SameTimingForFCLKChange || - st_vars->ActiveFCLKChangeLatencyMargin[k] < - st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { - st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k]; + (SameTimingForFCLKChange || + ActiveFCLKChangeLatencyMargin[k] < + SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { + SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; } - st_vars->SameTimingForFCLKChange = false; + SameTimingForFCLKChange = false; } } - if (st_vars->MinActiveFCLKChangeMargin > 0) { + if (MinActiveFCLKChangeMargin > 0) { *FCLKChangeSupport = dm_fclock_change_vactive; - } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && + } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && (PrefetchMode <= 1)) { *FCLKChangeSupport = dm_fclock_change_vblank; } else { @@ -4453,7 +4521,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *USRRetrainingSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (st_vars->USRRetrainingLatencyMargin[k] < 0)) { + (USRRetrainingLatencyMargin[k] < 0)) { *USRRetrainingSupport = false; } } @@ -4464,42 +4532,42 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && ActiveDRAMClockChangeLatencyMargin[k] < 0) { if (PrefetchMode > 0) { - st_vars->DRAMClockChangeSupportNumber = 2; - } else if (st_vars->DRAMClockChangeSupportNumber == 0) { - st_vars->DRAMClockChangeSupportNumber = 1; - st_vars->LastSurfaceWithoutMargin = k; - } else if (st_vars->DRAMClockChangeSupportNumber == 1 && - !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) { - st_vars->DRAMClockChangeSupportNumber = 2; + DRAMClockChangeSupportNumber = 2; + } else if (DRAMClockChangeSupportNumber == 0) { + DRAMClockChangeSupportNumber = 1; + LastSurfaceWithoutMargin = k; + } else if (DRAMClockChangeSupportNumber == 1 && + !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { + DRAMClockChangeSupportNumber = 2; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - st_vars->DRAMClockChangeMethod = 1; + DRAMClockChangeMethod = 1; else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - st_vars->DRAMClockChangeMethod = 2; + DRAMClockChangeMethod = 2; } - if (st_vars->DRAMClockChangeMethod == 0) { - if (st_vars->DRAMClockChangeSupportNumber == 0) + if (DRAMClockChangeMethod == 0) { + if (DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - else if (st_vars->DRAMClockChangeSupportNumber == 1) + else if (DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; - } else if (st_vars->DRAMClockChangeMethod == 1) { - if (st_vars->DRAMClockChangeSupportNumber == 0) + } else if (DRAMClockChangeMethod == 1) { + if (DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; - else if (st_vars->DRAMClockChangeSupportNumber == 1) + else if (DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } else { - if (st_vars->DRAMClockChangeSupportNumber == 0) + if (DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; - else if (st_vars->DRAMClockChangeSupportNumber == 1) + else if (DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; @@ -4513,7 +4581,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); - src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k]; + src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; #ifdef __DML_VBA_DEBUG__ @@ -4521,7 +4589,7 @@ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DET dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); -dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]); +dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); @@ -4532,7 +4600,7 @@ dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l if (BytePerPixelDETC[k] > 0) { src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); - src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k]; + src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 37a314ce284b..d293856ba906 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -30,7 +30,6 @@ #include "os_types.h" #include "../dc_features.h" #include "../display_mode_structs.h" -#include "dml/display_mode_vba.h" unsigned int dml32_dscceComputeDelay( unsigned int bpc, @@ -82,7 +81,6 @@ void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( double *DPPCLKUsingSingleDPP); void dml32_CalculateSwathAndDETConfiguration( - struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, @@ -362,7 +360,6 @@ void dml32_CalculateSurfaceSizeInMall( bool *ExceededMALLSize); void dml32_CalculateVMRowAndSwath( - struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], @@ -715,7 +712,6 @@ double dml32_CalculateExtraLatency( unsigned int HostVMMaxNonCachedPageTableLevels); bool dml32_CalculatePrefetchSchedule( - struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, @@ -811,7 +807,6 @@ void dml32_CalculateFlipSchedule( bool *ImmediateFlipSupportedForPipe); void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( - struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 84b4b00f29cb..c87091683b5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -498,6 +498,13 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } + if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000) + != dc->bb_overrides.fclk_clock_change_latency_ns + && dc->bb_overrides.fclk_clock_change_latency_ns) { + dcn3_21_soc.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + } + if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) != dc->bb_overrides.dummy_clock_change_latency_ns && dc->bb_overrides.dummy_clock_change_latency_ns) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 8460aefe7b6d..492aec634b68 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -182,108 +182,6 @@ void Calculate256BBlockSizes( unsigned int *BlockWidth256BytesY, unsigned int *BlockWidth256BytesC); -struct dml32_CalculateSwathAndDETConfiguration { - unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; - unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpSwathSizeBytesY; - unsigned int RoundedUpSwathSizeBytesC; - double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; - double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; - unsigned int TotalActiveDPP; - bool NoChromaSurfaces; - unsigned int DETBufferSizeInKByteForSwathCalculation; -}; - -struct dml32_CalculateVMRowAndSwath { - unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; - unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; - unsigned int PDEAndMetaPTEBytesFrameY; - unsigned int PDEAndMetaPTEBytesFrameC; - unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; - unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; - bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; -}; - -struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport { - unsigned int SurfaceWithMinActiveFCLKChangeMargin; - unsigned int DRAMClockChangeSupportNumber; - unsigned int LastSurfaceWithoutMargin; - unsigned int DRAMClockChangeMethod; - bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin; - double MinActiveFCLKChangeMargin; - double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank; - double ActiveClockChangeLatencyHidingY; - double ActiveClockChangeLatencyHidingC; - double ActiveClockChangeLatencyHiding; - double EffectiveDETBufferSizeY; - double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; - double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; - double TotalPixelBW; - bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; - double EffectiveLBLatencyHidingY; - double EffectiveLBLatencyHidingC; - double LinesInDETY[DC__NUM_DPP__MAX]; - double LinesInDETC[DC__NUM_DPP__MAX]; - unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; - unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; - double FullDETBufferingTimeY; - double FullDETBufferingTimeC; - double WritebackDRAMClockChangeLatencyMargin; - double WritebackFCLKChangeLatencyMargin; - double WritebackLatencyHiding; - bool SameTimingForFCLKChange; - unsigned int TotalActiveWriteback; - unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; - unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; -}; - -struct dml32_CalculatePrefetchSchedule { - unsigned int DPPCycles, DISPCLKCycles; - double DSTTotalPixelsAfterScaler; - double LineTime; - double dst_y_prefetch_equ; - double prefetch_bw_oto; - double Tvm_oto; - double Tr0_oto; - double Tvm_oto_lines; - double Tr0_oto_lines; - double dst_y_prefetch_oto; - double TimeForFetchingMetaPTE; - double TimeForFetchingRowInVBlank; - double LinesToRequestPrefetchPixelData; - unsigned int HostVMDynamicLevelsTrips; - double trip_to_mem; - double Tvm_trips; - double Tr0_trips; - double Tvm_trips_rounded; - double Tr0_trips_rounded; - double Lsw_oto; - double Tpre_rounded; - double prefetch_bw_equ; - double Tvm_equ; - double Tr0_equ; - double Tdmbf; - double Tdmec; - double Tdmsks; - double prefetch_sw_bytes; - double bytes_pp; - double dep_bytes; - unsigned int max_vratio_pre; - double min_Lsw; - double Tsw_est1; - double Tsw_est3; -}; - struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -355,10 +253,6 @@ struct dummy_vars { struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation; struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; - struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; - struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; - struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport; - struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule; }; struct vba_vars_st { |