From 97041ed37718dc9ba30aa23ca74093dc93ac89fb Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Thu, 13 Apr 2023 16:22:51 +0200 Subject: drm/amdgpu: Increase GFX6 graphics ring size. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To ensure it supports 192 IBs per submission, so we can keep a simplified IB limit in the follow up patch without having to look at IP or GPU version. Reviewed-by: Christian König Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index c41219e23151..d9ce4d1c50e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3073,7 +3073,7 @@ static int gfx_v6_0_sw_init(void *handle) ring = &adev->gfx.gfx_ring[i]; ring->ring_obj = NULL; sprintf(ring->name, "gfx"); - r = amdgpu_ring_init(adev, ring, 1024, + r = amdgpu_ring_init(adev, ring, 2048, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); -- cgit v1.2.3 From d51ac6d0a23caf1005cb640f8533161c5d2dd0c0 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 24 May 2022 11:22:49 +0800 Subject: drm/amdgpu: add xcc index argument to select_sh_se function v2 v1: To support multiple XCD case (Le) v2: introduce xcc index to gfx_v11_0_select_sh_se (Hawking) Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 16 +++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +-- drivers/gpu/drm/amd/amdgpu/cik.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 16 +++++------ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 24 ++++++++--------- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 33 ++++++++++++----------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 33 ++++++++++++----------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 ++++++++++----------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 20 +++++++------- drivers/gpu/drm/amd/amdgpu/nv.c | 4 +-- drivers/gpu/drm/amd/amdgpu/si.c | 4 +-- drivers/gpu/drm/amd/amdgpu/soc15.c | 4 +-- drivers/gpu/drm/amd/amdgpu/soc21.c | 4 +-- drivers/gpu/drm/amd/amdgpu/vi.c | 4 +-- 18 files changed, 110 insertions(+), 106 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index ebb35633058c..ae06d1f2af93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -787,7 +787,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, for (se_idx = 0; se_idx < se_cnt; se_idx++) { for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { - amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, 0); queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS); /* @@ -820,7 +820,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); soc15_grbm_select(adev, 0, 0, 0, 0, 0); unlock_spi_csq_mutexes(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f60753f97ac5..cc64ae550bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -136,7 +136,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se_bank, - sh_bank, instance_bank); + sh_bank, instance_bank, 0); } else if (use_ring) { mutex_lock(&adev->srbm_mutex); amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); @@ -169,7 +169,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, end: if (use_bank) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } else if (use_ring) { amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); @@ -263,7 +263,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se, rd->id.grbm.sh, - rd->id.grbm.instance); + rd->id.grbm.instance, 0); } if (rd->id.use_srbm) { @@ -295,7 +295,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } end: if (rd->id.use_grbm) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -907,13 +907,13 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); + amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0); x = 0; if (adev->gfx.funcs->read_wave_data) adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -1001,7 +1001,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); + amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0); if (bank == 0) { if (adev->gfx.funcs->read_wave_vgprs) @@ -1011,7 +1011,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); } - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index a9e41d7970ea..77e2f714e357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -237,7 +237,7 @@ struct amdgpu_gfx_funcs { /* get the gpu clock counter */ uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields); void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, @@ -386,7 +386,7 @@ struct amdgpu_gfx { }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) -#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) +#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid)) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index de6d10390ab2..5641cf05d856 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1141,12 +1141,12 @@ static uint32_t cik_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index accc0a7251b9..323f5b8927ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3490,7 +3490,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev); @@ -4712,7 +4712,7 @@ static int gfx_v10_0_sw_fini(void *handle) } static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -4772,13 +4772,13 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v10_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -4907,7 +4907,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); /* * Set corresponding TCP bits for the inactive WGPs in @@ -4940,7 +4940,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -9540,7 +9540,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); @@ -9561,7 +9561,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 4fbefe236fc7..50d0ff9ca259 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -112,7 +112,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); @@ -1484,7 +1484,7 @@ static int gfx_v11_0_sw_fini(void *handle) } static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -6473,7 +6473,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { mask = 1; counter = 0; - gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 8 && j < 2) gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); @@ -6505,7 +6505,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, active_cu_number += counter; } } - gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index d9ce4d1c50e4..7cb72bf1acdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1285,7 +1285,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) } static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -1438,12 +1438,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on SI */ - gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); } /* GRBM_GFX_INDEX has a different offset on SI */ - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) @@ -1459,14 +1459,14 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v6_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -1487,7 +1487,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -1496,7 +1496,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG); } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1535,7 +1535,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = RREG32(mmSPI_STATIC_THREAD_MGMT_3); active_cu = gfx_v6_0_get_cu_enabled(adev); @@ -1550,7 +1550,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) } } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -2391,7 +2391,7 @@ static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable) WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); if (!enable) { - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmSPI_LB_CU_MASK, 0x00ff); } } @@ -3571,7 +3571,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v6_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -3593,7 +3593,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 46740ad9a80f..d055e44eee1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1552,7 +1552,8 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) * Select which SE, SH combinations to address. */ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, u32 instance) + u32 se_num, u32 sh_num, u32 instance, + int xcc_id) { u32 data; @@ -1732,13 +1733,13 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on CI+ */ - gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); } /* GRBM_GFX_INDEX has a different offset on CI+ */ - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } /** @@ -1761,13 +1762,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v7_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -1790,7 +1791,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -1801,7 +1802,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG_1); } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1911,7 +1912,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev) * making sure that the following register writes will be broadcasted * to all the shaders */ - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -3301,7 +3302,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -3309,7 +3310,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3474,7 +3475,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_LB_CNTR_MAX, 0x00008000); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(mmRLC_LB_PARAMS, 0x00600408); WREG32(mmRLC_LB_CNTL, 0x80000004); @@ -3530,7 +3531,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | @@ -3584,7 +3585,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | @@ -3635,7 +3636,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK; @@ -5115,7 +5116,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v7_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -5136,7 +5137,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 8a43e87de49f..b60480876149 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3395,7 +3395,8 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) } static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, u32 instance) + u32 se_num, u32 sh_num, u32 instance, + int xcc_id) { u32 data; @@ -3579,13 +3580,13 @@ gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on VI */ - gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); } /* GRBM_GFX_INDEX has a different offset on VI */ - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) @@ -3601,13 +3602,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v8_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -3630,7 +3631,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -3641,7 +3642,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG_1); } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -3788,7 +3789,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev) * making sure that the following register writes will be broadcasted * to all the shaders */ - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_FIFO_SIZE, (adev->gfx.config.sc_prim_fifo_size_frontend << @@ -3819,7 +3820,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -3827,7 +3828,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } if (k == adev->usec_timeout) { gfx_v8_0_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -3835,7 +3836,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -5481,7 +5482,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, { uint32_t data; - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); @@ -6723,11 +6724,11 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data, */ if (from_wq) { mutex_lock(&adev->grbm_idx_mutex); - gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); + gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0); sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -7116,7 +7117,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v8_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -7137,7 +7138,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4939fd61355b..2fa7adef18a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1504,7 +1504,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) mask = 1; cu_bitmap = 0; counter = 0; - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (cu_info->bitmap[i][j] & mask) { @@ -1523,7 +1523,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = cu_bitmap; } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1545,7 +1545,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1594,7 +1594,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -2241,7 +2241,7 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) } void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance) + u32 instance, int xcc_id) { u32 data; @@ -2290,13 +2290,13 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v9_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -2433,7 +2433,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -2441,7 +2441,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } if (k == adev->usec_timeout) { amdgpu_gfx_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -2449,7 +2449,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -6608,7 +6608,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - amdgpu_gfx_select_se_sh(adev, j, 0x0, k); + amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); } } @@ -6670,7 +6670,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - amdgpu_gfx_select_se_sh(adev, j, 0, k); + amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); if (reg_value) @@ -6685,7 +6685,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += sec_count; err_data->ue_count += ded_count; - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); gfx_v9_0_query_utc_edc_status(adev, err_data); @@ -7145,7 +7145,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); gfx_v9_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); @@ -7178,7 +7178,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h index dfe8d4841f58..f9f6edc5e558 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h @@ -27,6 +27,6 @@ extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block; void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance); + u32 instance, int xcc_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 93438770ca1a..d648a29c33e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -777,7 +777,7 @@ void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev) { u32 tmp; - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); tmp = 0; tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL, PATTERN_MODE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f1c04140e717..b67be666f38a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -568,7 +568,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance) + u32 instance, + int xcc_id) { u32 data; @@ -591,7 +592,7 @@ static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, xcc_id, regGRBM_GFX_INDEX, data); } static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) @@ -907,13 +908,13 @@ static void gfx_v9_4_3_setup_rb(struct amdgpu_device *adev, int xcc_id) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); data = gfx_v9_4_3_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -1146,7 +1147,7 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -1154,7 +1155,8 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, } if (k == adev->usec_timeout) { gfx_v9_4_3_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, + xcc_id); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -1162,7 +1164,7 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev, } } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3065,7 +3067,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, 0); gfx_v9_4_3_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); @@ -3098,7 +3100,7 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 47420b403871..148049782f50 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -371,12 +371,12 @@ static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 7f99e130acd0..f64b87b11b1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1181,12 +1181,12 @@ static uint32_t si_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 4b79a8933476..4d1487a9836c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -363,12 +363,12 @@ static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_n mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 514bfc705d5a..6ef4be9322d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -288,12 +288,12 @@ static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_n mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 531f173ade2d..8e70581960fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -762,12 +762,12 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { -- cgit v1.2.3 From 553f973a0d7bbe95ea5da46979d926a9c0ada109 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Tue, 11 Oct 2022 09:52:58 -0400 Subject: drm/amd/amdgpu: Update debugfs for XCC support (v3) This patch updates the 'regs2' interface for MMIO registers to add a new IOCTL command for a 'v2' state data that includes the XCC ID. This patch then updates amdgpu_gfx_select_se_sh() and amdgpu_gfx_select_me_pipe_q() (and the implementations in the gfx drivers) to support an additional parameter. This patch then creates a new debugfs interface "gprwave" which is a merge of shader GPR and wave status access. This new inteface uses an IOCTL to select banks as well as XCC identity. (v2) Fix missing xcc_id in wave_ind function (v3) Fix pm runtime calls and mutex locking (v4) Fix bad label Signed-off-by: Tom St Denis Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 202 +++++++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h | 36 ++++- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 60 ++++----- 10 files changed, 275 insertions(+), 75 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index df94cd2c4b39..e94d0cf3f793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -139,7 +139,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, sh_bank, instance_bank, 0); } else if (use_ring) { mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); + amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0); } if (pm_pg_lock) @@ -172,7 +172,7 @@ end: amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } else if (use_ring) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } @@ -263,14 +263,14 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se, - rd->id.grbm.sh, - rd->id.grbm.instance, 0); + rd->id.grbm.sh, + rd->id.grbm.instance, rd->id.xcc_id); } if (rd->id.use_srbm) { mutex_lock(&adev->srbm_mutex); amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe, - rd->id.srbm.queue, rd->id.srbm.vmid); + rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id); } if (rd->id.pg_lock) @@ -296,12 +296,12 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } end: if (rd->id.use_grbm) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id); mutex_unlock(&adev->grbm_idx_mutex); } if (rd->id.use_srbm) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id); mutex_unlock(&adev->srbm_mutex); } @@ -320,19 +320,45 @@ end: static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) { struct amdgpu_debugfs_regs2_data *rd = f->private_data; + struct amdgpu_debugfs_regs2_iocdata v1_data; int r; + mutex_lock(&rd->lock); + switch (cmd) { - case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: - mutex_lock(&rd->lock); - r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata *)data, + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2: + r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data, sizeof(rd->id)); - mutex_unlock(&rd->lock); - return r ? -EINVAL : 0; + if (r) + r = -EINVAL; + goto done; + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: + r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data, + sizeof(v1_data)); + if (r) { + r = -EINVAL; + goto done; + } + goto v1_copy; default: - return -EINVAL; - } - return 0; + r = -EINVAL; + goto done; + } + +v1_copy: + rd->id.use_srbm = v1_data.use_srbm; + rd->id.use_grbm = v1_data.use_grbm; + rd->id.pg_lock = v1_data.pg_lock; + rd->id.grbm.se = v1_data.grbm.se; + rd->id.grbm.sh = v1_data.grbm.sh; + rd->id.grbm.instance = v1_data.grbm.instance; + rd->id.srbm.me = v1_data.srbm.me; + rd->id.srbm.pipe = v1_data.srbm.pipe; + rd->id.srbm.queue = v1_data.srbm.queue; + rd->id.xcc_id = 0; +done: + mutex_unlock(&rd->lock); + return r; } static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos) @@ -345,6 +371,135 @@ static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1); } +static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_gprwave_data *rd; + + rd = kzalloc(sizeof *rd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + rd->adev = file_inode(file)->i_private; + file->private_data = rd; + mutex_init(&rd->lock); + + return 0; +} + +static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_gprwave_data *rd = file->private_data; + mutex_destroy(&rd->lock); + kfree(file->private_data); + return 0; +} + +static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos) +{ + struct amdgpu_debugfs_gprwave_data *rd = f->private_data; + struct amdgpu_device *adev = rd->adev; + ssize_t result = 0; + int r; + uint32_t *data, x; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + r = amdgpu_virt_enable_access_debugfs(adev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + data = kcalloc(1024, sizeof(*data), GFP_KERNEL); + if (!data) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + amdgpu_virt_disable_access_debugfs(adev); + return -ENOMEM; + } + + /* switch to the specific se/sh/cu */ + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id); + + if (!rd->id.gpr_or_wave) { + x = 0; + if (adev->gfx.funcs->read_wave_data) + adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x); + } else { + x = size >> 2; + if (rd->id.gpr.vpgr_or_sgpr) { + if (adev->gfx.funcs->read_wave_vgprs) + adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data); + } else { + if (adev->gfx.funcs->read_wave_sgprs) + adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data); + } + } + + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + if (!x) { + result = -EINVAL; + goto done; + } + + while (size && (*pos < x * 4)) { + uint32_t value; + + value = data[*pos >> 2]; + r = put_user(value, (uint32_t *)buf); + if (r) { + result = r; + goto done; + } + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + +done: + amdgpu_virt_disable_access_debugfs(adev); + kfree(data); + return result; +} + +static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data) +{ + struct amdgpu_debugfs_gprwave_data *rd = f->private_data; + int r; + + mutex_lock(&rd->lock); + + switch (cmd) { + case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE: + r = copy_from_user(&rd->id, (struct amdgpu_debugfs_gprwave_iocdata *)data, sizeof rd->id); + if (r) + return r ? -EINVAL : 0; + goto done; + default: + r = -EINVAL; + goto done; + } + +done: + mutex_unlock(&rd->lock); + return r; +} + + + /** * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register @@ -913,7 +1068,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, x = 0; if (adev->gfx.funcs->read_wave_data) - adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); + adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x); amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); @@ -1007,10 +1162,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, if (bank == 0) { if (adev->gfx.funcs->read_wave_vgprs) - adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); + adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data); } else { if (adev->gfx.funcs->read_wave_sgprs) - adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); + adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data); } amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); @@ -1341,6 +1496,15 @@ static const struct file_operations amdgpu_debugfs_regs2_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gprwave_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl, + .read = amdgpu_debugfs_gprwave_read, + .open = amdgpu_debugfs_gprwave_open, + .release = amdgpu_debugfs_gprwave_release, + .llseek = default_llseek +}; + static const struct file_operations amdgpu_debugfs_regs_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_regs_read, @@ -1418,6 +1582,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = { static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, + &amdgpu_debugfs_gprwave_fops, &amdgpu_debugfs_regs_didt_fops, &amdgpu_debugfs_regs_pcie_fops, &amdgpu_debugfs_regs_smc_fops, @@ -1434,6 +1599,7 @@ static const struct file_operations *debugfs_regs[] = { static const char * const debugfs_regs_names[] = { "amdgpu_regs", "amdgpu_regs2", + "amdgpu_gprwave", "amdgpu_regs_didt", "amdgpu_regs_pcie", "amdgpu_regs_smc", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 93f9875154db..2b2d75763875 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -247,16 +247,16 @@ struct amdgpu_gfx_funcs { uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance, int xcc_id); - void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, + void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields); - void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, + void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst); - void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, + void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, - u32 queue, u32 vmid); + u32 queue, u32 vmid, u32 xcc_id); void (*init_spm_golden)(struct amdgpu_device *adev); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); int (*get_gfx_shadow_info)(struct amdgpu_device *adev, @@ -405,7 +405,7 @@ struct amdgpu_gfx { #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) -#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid)) +#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id))) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) #define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h index 919d9d401750..107f9bb0e24f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h @@ -35,17 +35,51 @@ struct amdgpu_debugfs_regs2_iocdata { } srbm; }; +struct amdgpu_debugfs_regs2_iocdata_v2 { + __u32 use_srbm, use_grbm, pg_lock; + struct { + __u32 se, sh, instance; + } grbm; + struct { + __u32 me, pipe, queue, vmid; + } srbm; + u32 xcc_id; +}; + +struct amdgpu_debugfs_gprwave_iocdata { + u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id; + struct { + u32 thread, vpgr_or_sgpr; + } gpr; +}; + /* * MMIO debugfs state data (per file* handle) */ struct amdgpu_debugfs_regs2_data { struct amdgpu_device *adev; struct mutex lock; - struct amdgpu_debugfs_regs2_iocdata id; + struct amdgpu_debugfs_regs2_iocdata_v2 id; +}; + +struct amdgpu_debugfs_gprwave_data { + struct amdgpu_device *adev; + struct mutex lock; + struct amdgpu_debugfs_gprwave_iocdata id; }; enum AMDGPU_DEBUGFS_REGS2_CMDS { AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, +}; + +enum AMDGPU_DEBUGFS_GPRWAVE_CMDS { + AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0, }; +//reg2 interface #define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2) + +//gprwave interface +#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 7b585141e10e..89158c72753e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4291,7 +4291,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } -static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* in gfx10 the SIMD_ID is specified as part of the INSTANCE * field when performing a select_se_sh so it should be @@ -4318,7 +4318,7 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } -static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -4329,7 +4329,7 @@ static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, dst); } -static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -4340,7 +4340,7 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { nv_grbm_select(adev, me, pipe, q, vm); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 790df2cc3480..4b7224de879e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -765,7 +765,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); } -static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* in gfx11 the SIMD_ID is specified as part of the INSTANCE * field when performing a select_se_sh so it should be @@ -791,7 +791,7 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } -static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -802,7 +802,7 @@ static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, dst); } -static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -813,7 +813,7 @@ static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { soc21_grbm_select(adev, me, pipe, q, vm); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 7cb72bf1acdd..809558c718e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2968,7 +2968,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -2993,7 +2993,7 @@ static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -3003,7 +3003,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { DRM_INFO("Not implemented\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index d56dda5fc588..0f0c12bbe228 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4112,7 +4112,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -4137,7 +4137,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -4147,7 +4147,7 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { cik_srbm_select(adev, me, pipe, q, vm); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2ae7f167985f..6d0589dc1d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3419,7 +3419,7 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, } static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { vi_srbm_select(adev, me, pipe, q, vm); } @@ -5217,7 +5217,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -5242,7 +5242,7 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index da69177dc76f..cc005e3bcd40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1788,7 +1788,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } -static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 1 wave data */ dst[(*no_fields)++] = 1; @@ -1809,7 +1809,7 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -1818,7 +1818,7 @@ static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } -static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -1829,7 +1829,7 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { soc15_grbm_select(adev, me, pipe, q, vm, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index c5ffcd70ec7b..76e3571ec5c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -536,21 +536,21 @@ static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data); } -static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t address) { - WREG32_SOC15_RLC(GC, GET_INST(GC, 0), regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (address << SQ_IND_INDEX__INDEX__SHIFT) | (SQ_IND_INDEX__FORCE_READ_MASK)); - return RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_IND_DATA); + return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); } -static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, +static void wave_read_regs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t regno, uint32_t num, uint32_t *out) { - WREG32_SOC15_RLC(GC, GET_INST(GC, 0), regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (regno << SQ_IND_INDEX__INDEX__SHIFT) | @@ -558,53 +558,53 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, (SQ_IND_INDEX__FORCE_READ_MASK) | (SQ_IND_INDEX__AUTO_INCR_MASK)); while (num--) - *(out++) = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_IND_DATA); + *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); } static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, - uint32_t simd, uint32_t wave, + uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 1 wave data */ dst[(*no_fields)++] = 1; - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); -} - -static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_MODE); +} + +static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { - wave_read_regs(adev, simd, wave, 0, + wave_read_regs(adev, xcc_id, simd, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } -static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) { - wave_read_regs(adev, simd, wave, thread, + wave_read_regs(adev, xcc_id, simd, wave, thread, start + SQIND_WAVE_VGPRS_OFFSET, size, dst); } static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { - soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, 0)); + soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); } static enum amdgpu_gfx_partition -- cgit v1.2.3 From c6a64ad9b7f7182b5e2439a740574300b2e61951 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 30 May 2023 11:52:45 +0530 Subject: drm/amdgpu: Initialize xcc mask For ASICs which are not initialized through discovery, initialize GFX cluster as 1. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + 3 files changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 809558c718e3..da6caff78c22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3028,6 +3028,7 @@ static int gfx_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), GFX6_NUM_COMPUTE_RINGS); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 0f0c12bbe228..20fcd86a3e79 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4179,6 +4179,7 @@ static int gfx_v7_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2f1ef75e126c..51c1745c8369 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -5262,6 +5262,7 @@ static int gfx_v8_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); -- cgit v1.2.3