diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/si.c')
-rw-r--r-- | drivers/gpu/drm/radeon/si.c | 1337 |
1 files changed, 1296 insertions, 41 deletions
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index a1b0da6b5808..234906709067 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -32,40 +32,43 @@ #include "sid.h" #include "atom.h" #include "si_blit_shaders.h" +#include "clearstate_si.h" +#include "radeon_ucode.h" -#define SI_PFP_UCODE_SIZE 2144 -#define SI_PM4_UCODE_SIZE 2144 -#define SI_CE_UCODE_SIZE 2144 -#define SI_RLC_UCODE_SIZE 2048 -#define SI_MC_UCODE_SIZE 7769 -#define OLAND_MC_UCODE_SIZE 7863 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin"); MODULE_FIRMWARE("radeon/TAHITI_me.bin"); MODULE_FIRMWARE("radeon/TAHITI_ce.bin"); MODULE_FIRMWARE("radeon/TAHITI_mc.bin"); MODULE_FIRMWARE("radeon/TAHITI_rlc.bin"); +MODULE_FIRMWARE("radeon/TAHITI_smc.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_me.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin"); +MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin"); MODULE_FIRMWARE("radeon/VERDE_pfp.bin"); MODULE_FIRMWARE("radeon/VERDE_me.bin"); MODULE_FIRMWARE("radeon/VERDE_ce.bin"); MODULE_FIRMWARE("radeon/VERDE_mc.bin"); MODULE_FIRMWARE("radeon/VERDE_rlc.bin"); +MODULE_FIRMWARE("radeon/VERDE_smc.bin"); MODULE_FIRMWARE("radeon/OLAND_pfp.bin"); MODULE_FIRMWARE("radeon/OLAND_me.bin"); MODULE_FIRMWARE("radeon/OLAND_ce.bin"); MODULE_FIRMWARE("radeon/OLAND_mc.bin"); MODULE_FIRMWARE("radeon/OLAND_rlc.bin"); +MODULE_FIRMWARE("radeon/OLAND_smc.bin"); MODULE_FIRMWARE("radeon/HAINAN_pfp.bin"); MODULE_FIRMWARE("radeon/HAINAN_me.bin"); MODULE_FIRMWARE("radeon/HAINAN_ce.bin"); MODULE_FIRMWARE("radeon/HAINAN_mc.bin"); MODULE_FIRMWARE("radeon/HAINAN_rlc.bin"); +MODULE_FIRMWARE("radeon/HAINAN_smc.bin"); +static void si_pcie_gen3_enable(struct radeon_device *rdev); +static void si_program_aspm(struct radeon_device *rdev); extern int r600_ih_ring_alloc(struct radeon_device *rdev); extern void r600_ih_ring_fini(struct radeon_device *rdev); extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev); @@ -75,6 +78,228 @@ extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev); extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev); extern bool evergreen_is_display_hung(struct radeon_device *rdev); +static const u32 verde_rlc_save_restore_register_list[] = +{ + (0x8000 << 16) | (0x98f4 >> 2), + 0x00000000, + (0x8040 << 16) | (0x98f4 >> 2), + 0x00000000, + (0x8000 << 16) | (0xe80 >> 2), + 0x00000000, + (0x8040 << 16) | (0xe80 >> 2), + 0x00000000, + (0x8000 << 16) | (0x89bc >> 2), + 0x00000000, + (0x8040 << 16) | (0x89bc >> 2), + 0x00000000, + (0x8000 << 16) | (0x8c1c >> 2), + 0x00000000, + (0x8040 << 16) | (0x8c1c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x98f0 >> 2), + 0x00000000, + (0x9c00 << 16) | (0xe7c >> 2), + 0x00000000, + (0x8000 << 16) | (0x9148 >> 2), + 0x00000000, + (0x8040 << 16) | (0x9148 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9150 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x897c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8d8c >> 2), + 0x00000000, + (0x9c00 << 16) | (0xac54 >> 2), + 0X00000000, + 0x3, + (0x9c00 << 16) | (0x98f8 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9910 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9914 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9918 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x991c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9920 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9924 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9928 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x992c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9930 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9934 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9938 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x993c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9940 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9944 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9948 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x994c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9950 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9954 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9958 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x995c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9960 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9964 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9968 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x996c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9970 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9974 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9978 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x997c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9980 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9984 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9988 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x998c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8c00 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8c14 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8c04 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8c08 >> 2), + 0x00000000, + (0x8000 << 16) | (0x9b7c >> 2), + 0x00000000, + (0x8040 << 16) | (0x9b7c >> 2), + 0x00000000, + (0x8000 << 16) | (0xe84 >> 2), + 0x00000000, + (0x8040 << 16) | (0xe84 >> 2), + 0x00000000, + (0x8000 << 16) | (0x89c0 >> 2), + 0x00000000, + (0x8040 << 16) | (0x89c0 >> 2), + 0x00000000, + (0x8000 << 16) | (0x914c >> 2), + 0x00000000, + (0x8040 << 16) | (0x914c >> 2), + 0x00000000, + (0x8000 << 16) | (0x8c20 >> 2), + 0x00000000, + (0x8040 << 16) | (0x8c20 >> 2), + 0x00000000, + (0x8000 << 16) | (0x9354 >> 2), + 0x00000000, + (0x8040 << 16) | (0x9354 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9060 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9364 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9100 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x913c >> 2), + 0x00000000, + (0x8000 << 16) | (0x90e0 >> 2), + 0x00000000, + (0x8000 << 16) | (0x90e4 >> 2), + 0x00000000, + (0x8000 << 16) | (0x90e8 >> 2), + 0x00000000, + (0x8040 << 16) | (0x90e0 >> 2), + 0x00000000, + (0x8040 << 16) | (0x90e4 >> 2), + 0x00000000, + (0x8040 << 16) | (0x90e8 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8bcc >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8b24 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x88c4 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8e50 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8c0c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8e58 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8e5c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9508 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x950c >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9494 >> 2), + 0x00000000, + (0x9c00 << 16) | (0xac0c >> 2), + 0x00000000, + (0x9c00 << 16) | (0xac10 >> 2), + 0x00000000, + (0x9c00 << 16) | (0xac14 >> 2), + 0x00000000, + (0x9c00 << 16) | (0xae00 >> 2), + 0x00000000, + (0x9c00 << 16) | (0xac08 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x88d4 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x88c8 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x88cc >> 2), + 0x00000000, + (0x9c00 << 16) | (0x89b0 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8b10 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x8a14 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9830 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9834 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9838 >> 2), + 0x00000000, + (0x9c00 << 16) | (0x9a10 >> 2), + 0x00000000, + (0x8000 << 16) | (0x9870 >> 2), + 0x00000000, + (0x8000 << 16) | (0x9874 >> 2), + 0x00000000, + (0x8001 << 16) | (0x9870 >> 2), + 0x00000000, + (0x8001 << 16) | (0x9874 >> 2), + 0x00000000, + (0x8040 << 16) | (0x9870 >> 2), + 0x00000000, + (0x8040 << 16) | (0x9874 >> 2), + 0x00000000, + (0x8041 << 16) | (0x9870 >> 2), + 0x00000000, + (0x8041 << 16) | (0x9874 >> 2), + 0x00000000, + 0x00000000 +}; + static const u32 tahiti_golden_rlc_registers[] = { 0xc424, 0xffffffff, 0x00601005, @@ -1320,6 +1545,7 @@ static int si_init_microcode(struct radeon_device *rdev) const char *chip_name; const char *rlc_chip_name; size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size; + size_t smc_req_size; char fw_name[30]; int err; @@ -1341,6 +1567,7 @@ static int si_init_microcode(struct radeon_device *rdev) ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; mc_req_size = SI_MC_UCODE_SIZE * 4; + smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4); break; case CHIP_PITCAIRN: chip_name = "PITCAIRN"; @@ -1350,6 +1577,7 @@ static int si_init_microcode(struct radeon_device *rdev) ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; mc_req_size = SI_MC_UCODE_SIZE * 4; + smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4); break; case CHIP_VERDE: chip_name = "VERDE"; @@ -1359,6 +1587,7 @@ static int si_init_microcode(struct radeon_device *rdev) ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; mc_req_size = SI_MC_UCODE_SIZE * 4; + smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4); break; case CHIP_OLAND: chip_name = "OLAND"; @@ -1368,6 +1597,7 @@ static int si_init_microcode(struct radeon_device *rdev) ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; mc_req_size = OLAND_MC_UCODE_SIZE * 4; + smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4); break; case CHIP_HAINAN: chip_name = "HAINAN"; @@ -1377,6 +1607,7 @@ static int si_init_microcode(struct radeon_device *rdev) ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; mc_req_size = OLAND_MC_UCODE_SIZE * 4; + smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4); break; default: BUG(); } @@ -1439,6 +1670,17 @@ static int si_init_microcode(struct radeon_device *rdev) err = -EINVAL; } + snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); + err = request_firmware(&rdev->smc_fw, fw_name, &pdev->dev); + if (err) + goto out; + if (rdev->smc_fw->size != smc_req_size) { + printk(KERN_ERR + "si_smc: Bogus length %zu in firmware \"%s\"\n", + rdev->smc_fw->size, fw_name); + err = -EINVAL; + } + out: platform_device_unregister(pdev); @@ -1457,6 +1699,8 @@ out: rdev->rlc_fw = NULL; release_firmware(rdev->mc_fw); rdev->mc_fw = NULL; + release_firmware(rdev->smc_fw); + rdev->smc_fw = NULL; } return err; } @@ -1792,7 +2036,8 @@ static void dce6_program_watermarks(struct radeon_device *rdev, u32 lb_size, u32 num_heads) { struct drm_display_mode *mode = &radeon_crtc->base.mode; - struct dce6_wm_params wm; + struct dce6_wm_params wm_low, wm_high; + u32 dram_channels; u32 pixel_period; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; @@ -1808,38 +2053,83 @@ static void dce6_program_watermarks(struct radeon_device *rdev, priority_a_cnt = 0; priority_b_cnt = 0; - wm.yclk = rdev->pm.current_mclk * 10; - wm.sclk = rdev->pm.current_sclk * 10; - wm.disp_clk = mode->clock; - wm.src_width = mode->crtc_hdisplay; - wm.active_time = mode->crtc_hdisplay * pixel_period; - wm.blank_time = line_time - wm.active_time; - wm.interlaced = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - wm.interlaced = true; - wm.vsc = radeon_crtc->vsc; - wm.vtaps = 1; - if (radeon_crtc->rmx_type != RMX_OFF) - wm.vtaps = 2; - wm.bytes_per_pixel = 4; /* XXX: get this from fb config */ - wm.lb_size = lb_size; if (rdev->family == CHIP_ARUBA) - wm.dram_channels = evergreen_get_number_of_dram_channels(rdev); + dram_channels = evergreen_get_number_of_dram_channels(rdev); else - wm.dram_channels = si_get_number_of_dram_channels(rdev); - wm.num_heads = num_heads; + dram_channels = si_get_number_of_dram_channels(rdev); + + /* watermark for high clocks */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + wm_high.yclk = + radeon_dpm_get_mclk(rdev, false) * 10; + wm_high.sclk = + radeon_dpm_get_sclk(rdev, false) * 10; + } else { + wm_high.yclk = rdev->pm.current_mclk * 10; + wm_high.sclk = rdev->pm.current_sclk * 10; + } + + wm_high.disp_clk = mode->clock; + wm_high.src_width = mode->crtc_hdisplay; + wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.blank_time = line_time - wm_high.active_time; + wm_high.interlaced = false; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + wm_high.interlaced = true; + wm_high.vsc = radeon_crtc->vsc; + wm_high.vtaps = 1; + if (radeon_crtc->rmx_type != RMX_OFF) + wm_high.vtaps = 2; + wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */ + wm_high.lb_size = lb_size; + wm_high.dram_channels = dram_channels; + wm_high.num_heads = num_heads; + + /* watermark for low clocks */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + wm_low.yclk = + radeon_dpm_get_mclk(rdev, true) * 10; + wm_low.sclk = + radeon_dpm_get_sclk(rdev, true) * 10; + } else { + wm_low.yclk = rdev->pm.current_mclk * 10; + wm_low.sclk = rdev->pm.current_sclk * 10; + } + + wm_low.disp_clk = mode->clock; + wm_low.src_width = mode->crtc_hdisplay; + wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.blank_time = line_time - wm_low.active_time; + wm_low.interlaced = false; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + wm_low.interlaced = true; + wm_low.vsc = radeon_crtc->vsc; + wm_low.vtaps = 1; + if (radeon_crtc->rmx_type != RMX_OFF) + wm_low.vtaps = 2; + wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */ + wm_low.lb_size = lb_size; + wm_low.dram_channels = dram_channels; + wm_low.num_heads = num_heads; /* set for high clocks */ - latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535); + latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535); /* set for low clocks */ - /* wm.yclk = low clk; wm.sclk = low clk */ - latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535); + latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ - if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) || - !dce6_average_bandwidth_vs_available_bandwidth(&wm) || - !dce6_check_latency_hiding(&wm) || + if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) || + !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) || + !dce6_check_latency_hiding(&wm_high) || + (rdev->disp_priority == 2)) { + DRM_DEBUG_KMS("force priority to high\n"); + priority_a_cnt |= PRIORITY_ALWAYS_ON; + priority_b_cnt |= PRIORITY_ALWAYS_ON; + } + if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) || + !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) || + !dce6_check_latency_hiding(&wm_low) || (rdev->disp_priority == 2)) { DRM_DEBUG_KMS("force priority to high\n"); priority_a_cnt |= PRIORITY_ALWAYS_ON; @@ -1895,6 +2185,10 @@ static void dce6_program_watermarks(struct radeon_device *rdev, WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt); WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt); + /* save values for DPM */ + radeon_crtc->line_time = line_time; + radeon_crtc->wm_high = latency_watermark_a; + radeon_crtc->wm_low = latency_watermark_b; } void dce6_bandwidth_update(struct radeon_device *rdev) @@ -3535,8 +3829,8 @@ static void si_mc_program(struct radeon_device *rdev) } } -static void si_vram_gtt_location(struct radeon_device *rdev, - struct radeon_mc *mc) +void si_vram_gtt_location(struct radeon_device *rdev, + struct radeon_mc *mc) { if (mc->mc_vram_size > 0xFFC0000000ULL) { /* leave room for at least 1024M GTT */ @@ -4282,6 +4576,450 @@ void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) } /* + * Power and clock gating + */ +static void si_wait_for_rlc_serdes(struct radeon_device *rdev) +{ + int i; + + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0) + break; + udelay(1); + } + + for (i = 0; i < rdev->usec_timeout; i++) { + if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0) + break; + udelay(1); + } +} + +static void si_enable_gui_idle_interrupt(struct radeon_device *rdev, + bool enable) +{ + u32 tmp = RREG32(CP_INT_CNTL_RING0); + u32 mask; + int i; + + if (enable) + tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + else + tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); + WREG32(CP_INT_CNTL_RING0, tmp); + + if (!enable) { + /* read a gfx register */ + tmp = RREG32(DB_DEPTH_INFO); + + mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS; + for (i = 0; i < rdev->usec_timeout; i++) { + if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS)) + break; + udelay(1); + } + } +} + +static void si_set_uvd_dcm(struct radeon_device *rdev, + bool sw_mode) +{ + u32 tmp, tmp2; + + tmp = RREG32(UVD_CGC_CTRL); + tmp &= ~(CLK_OD_MASK | CG_DT_MASK); + tmp |= DCM | CG_DT(1) | CLK_OD(4); + + if (sw_mode) { + tmp &= ~0x7ffff800; + tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7); + } else { + tmp |= 0x7ffff800; + tmp2 = 0; + } + + WREG32(UVD_CGC_CTRL, tmp); + WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2); +} + +static void si_init_uvd_internal_cg(struct radeon_device *rdev) +{ + bool hw_mode = true; + + if (hw_mode) { + si_set_uvd_dcm(rdev, false); + } else { + u32 tmp = RREG32(UVD_CGC_CTRL); + tmp &= ~DCM; + WREG32(UVD_CGC_CTRL, tmp); + } +} + +static u32 si_halt_rlc(struct radeon_device *rdev) +{ + u32 data, orig; + + orig = data = RREG32(RLC_CNTL); + + if (data & RLC_ENABLE) { + data &= ~RLC_ENABLE; + WREG32(RLC_CNTL, data); + + si_wait_for_rlc_serdes(rdev); + } + + return orig; +} + +static void si_update_rlc(struct radeon_device *rdev, u32 rlc) +{ + u32 tmp; + + tmp = RREG32(RLC_CNTL); + if (tmp != rlc) + WREG32(RLC_CNTL, rlc); +} + +static void si_enable_dma_pg(struct radeon_device *rdev, bool enable) +{ + u32 data, orig; + + orig = data = RREG32(DMA_PG); + if (enable) + data |= PG_CNTL_ENABLE; + else + data &= ~PG_CNTL_ENABLE; + if (orig != data) + WREG32(DMA_PG, data); +} + +static void si_init_dma_pg(struct radeon_device *rdev) +{ + u32 tmp; + + WREG32(DMA_PGFSM_WRITE, 0x00002000); + WREG32(DMA_PGFSM_CONFIG, 0x100010ff); + + for (tmp = 0; tmp < 5; tmp++) + WREG32(DMA_PGFSM_WRITE, 0); +} + +static void si_enable_gfx_cgpg(struct radeon_device *rdev, + bool enable) +{ + u32 tmp; + + if (enable) { + tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10); + WREG32(RLC_TTOP_D, tmp); + + tmp = RREG32(RLC_PG_CNTL); + tmp |= GFX_PG_ENABLE; + WREG32(RLC_PG_CNTL, tmp); + + tmp = RREG32(RLC_AUTO_PG_CTRL); + tmp |= AUTO_PG_EN; + WREG32(RLC_AUTO_PG_CTRL, tmp); + } else { + tmp = RREG32(RLC_AUTO_PG_CTRL); + tmp &= ~AUTO_PG_EN; + WREG32(RLC_AUTO_PG_CTRL, tmp); + + tmp = RREG32(DB_RENDER_CONTROL); + } +} + +static void si_init_gfx_cgpg(struct radeon_device *rdev) +{ + u32 tmp; + + WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); + + tmp = RREG32(RLC_PG_CNTL); + tmp |= GFX_PG_SRC; + WREG32(RLC_PG_CNTL, tmp); + + WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8); + + tmp = RREG32(RLC_AUTO_PG_CTRL); + + tmp &= ~GRBM_REG_SGIT_MASK; + tmp |= GRBM_REG_SGIT(0x700); + tmp &= ~PG_AFTER_GRBM_REG_ST_MASK; + WREG32(RLC_AUTO_PG_CTRL, tmp); +} + +static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) +{ + u32 mask = 0, tmp, tmp1; + int i; + + si_select_se_sh(rdev, se, sh); + tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); + tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); + si_select_se_sh(rdev, 0xffffffff, 0xffffffff); + + tmp &= 0xffff0000; + + tmp |= tmp1; + tmp >>= 16; + + for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) { + mask <<= 1; + mask |= 1; + } + + return (~tmp) & mask; +} + +static void si_init_ao_cu_mask(struct radeon_device *rdev) +{ + u32 i, j, k, active_cu_number = 0; + u32 mask, counter, cu_bitmap; + u32 tmp = 0; + + for (i = 0; i < rdev->config.si.max_shader_engines; i++) { + for (j = 0; j < rdev->config.si.max_sh_per_se; j++) { + mask = 1; + cu_bitmap = 0; + counter = 0; + for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) { + if (si_get_cu_active_bitmap(rdev, i, j) & mask) { + if (counter < 2) + cu_bitmap |= mask; + counter++; + } + mask <<= 1; + } + + active_cu_number += counter; + tmp |= (cu_bitmap << (i * 16 + j * 8)); + } + } + + WREG32(RLC_PG_AO_CU_MASK, tmp); + + tmp = RREG32(RLC_MAX_PG_CU); + tmp &= ~MAX_PU_CU_MASK; + tmp |= MAX_PU_CU(active_cu_number); + WREG32(RLC_MAX_PG_CU, tmp); +} + +static void si_enable_cgcg(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig, tmp; + + orig = data = RREG32(RLC_CGCG_CGLS_CTRL); + + si_enable_gui_idle_interrupt(rdev, enable); + + if (enable) { + WREG32(RLC_GCPM_GENERAL_3, 0x00000080); + + tmp = si_halt_rlc(rdev); + + WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); + WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); + WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff); + + si_wait_for_rlc_serdes(rdev); + + si_update_rlc(rdev, tmp); + + WREG32(RLC_SERDES_WR_CTRL, 0x007000ff); + + data |= CGCG_EN | CGLS_EN; + } else { + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + RREG32(CB_CGTT_SCLK_CTRL); + + data &= ~(CGCG_EN | CGLS_EN); + } + + if (orig != data) + WREG32(RLC_CGCG_CGLS_CTRL, data); +} + +static void si_enable_mgcg(struct radeon_device *rdev, + bool enable) +{ + u32 data, orig, tmp = 0; + + if (enable) { + orig = data = RREG32(CGTS_SM_CTRL_REG); + data = 0x96940200; + if (orig != data) + WREG32(CGTS_SM_CTRL_REG, data); + + orig = data = RREG32(CP_MEM_SLP_CNTL); + data |= CP_MEM_LS_EN; + if (orig != data) + WREG32(CP_MEM_SLP_CNTL, data); + + orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + data &= 0xffffffc0; + if (orig != data) + WREG32(RLC_CGTT_MGCG_OVERRIDE, data); + + tmp = si_halt_rlc(rdev); + + WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); + WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); + WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff); + + si_update_rlc(rdev, tmp); + } else { + orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + data |= 0x00000003; + if (orig != data) + WREG32(RLC_CGTT_MGCG_OVERRIDE, data); + + data = RREG32(CP_MEM_SLP_CNTL); + if (data & CP_MEM_LS_EN) { + data &= ~CP_MEM_LS_EN; + WREG32(CP_MEM_SLP_CNTL, data); + } + orig = data = RREG32(CGTS_SM_CTRL_REG); + data |= LS_OVERRIDE | OVERRIDE; + if (orig != data) + WREG32(CGTS_SM_CTRL_REG, data); + + tmp = si_halt_rlc(rdev); + + WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff); + WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff); + WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff); + + si_update_rlc(rdev, tmp); + } +} + +static void si_enable_uvd_mgcg(struct radeon_device *rdev, + bool enable) +{ + u32 orig, data, tmp; + + if (enable) { + tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); + tmp |= 0x3fff; + WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp); + + orig = data = RREG32(UVD_CGC_CTRL); + data |= DCM; + if (orig != data) + WREG32(UVD_CGC_CTRL, data); + + WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0); + WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0); + } else { + tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL); + tmp &= ~0x3fff; + WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp); + + orig = data = RREG32(UVD_CGC_CTRL); + data &= ~DCM; + if (orig != data) + WREG32(UVD_CGC_CTRL, data); + + WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff); + WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff); + } +} + +static const u32 mc_cg_registers[] = +{ + MC_HUB_MISC_HUB_CG, + MC_HUB_MISC_SIP_CG, + MC_HUB_MISC_VM_CG, + MC_XPB_CLK_GAT, + ATC_MISC_CG, + MC_CITF_MISC_WR_CG, + MC_CITF_MISC_RD_CG, + MC_CITF_MISC_VM_CG, + VM_L2_CG, +}; + +static void si_enable_mc_ls(struct radeon_device *rdev, + bool enable) +{ + int i; + u32 orig, data; + + for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) { + orig = data = RREG32(mc_cg_registers[i]); + if (enable) + data |= MC_LS_ENABLE; + else + data &= ~MC_LS_ENABLE; + if (data != orig) + WREG32(mc_cg_registers[i], data); + } +} + + +static void si_init_cg(struct radeon_device *rdev) +{ + bool has_uvd = true; + + si_enable_mgcg(rdev, true); + si_enable_cgcg(rdev, true); + /* disable MC LS on Tahiti */ + if (rdev->family == CHIP_TAHITI) + si_enable_mc_ls(rdev, false); + if (has_uvd) { + si_enable_uvd_mgcg(rdev, true); + si_init_uvd_internal_cg(rdev); + } +} + +static void si_fini_cg(struct radeon_device *rdev) +{ + bool has_uvd = true; + + if (has_uvd) + si_enable_uvd_mgcg(rdev, false); + si_enable_cgcg(rdev, false); + si_enable_mgcg(rdev, false); +} + +static void si_init_pg(struct radeon_device *rdev) +{ + bool has_pg = false; + + /* only cape verde supports PG */ + if (rdev->family == CHIP_VERDE) + has_pg = true; + + if (has_pg) { + si_init_ao_cu_mask(rdev); + si_init_dma_pg(rdev); + si_enable_dma_pg(rdev, true); + si_init_gfx_cgpg(rdev); + si_enable_gfx_cgpg(rdev, true); + } else { + WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); + WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8); + } +} + +static void si_fini_pg(struct radeon_device *rdev) +{ + bool has_pg = false; + + /* only cape verde supports PG */ + if (rdev->family == CHIP_VERDE) + has_pg = true; + + if (has_pg) { + si_enable_dma_pg(rdev, false); + si_enable_gfx_cgpg(rdev, false); + } +} + +/* * RLC */ void si_rlc_fini(struct radeon_device *rdev) @@ -4313,8 +5051,15 @@ void si_rlc_fini(struct radeon_device *rdev) } } +#define RLC_CLEAR_STATE_END_MARKER 0x00000001 + int si_rlc_init(struct radeon_device *rdev) { + volatile u32 *dst_ptr; + u32 dws, data, i, j, k, reg_num; + u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index; + u64 reg_list_mc_addr; + const struct cs_section_def *cs_data = si_cs_data; int r; /* save restore block */ @@ -4335,18 +5080,44 @@ int si_rlc_init(struct radeon_device *rdev) } r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM, &rdev->rlc.save_restore_gpu_addr); - radeon_bo_unreserve(rdev->rlc.save_restore_obj); if (r) { + radeon_bo_unreserve(rdev->rlc.save_restore_obj); dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r); si_rlc_fini(rdev); return r; } + if (rdev->family == CHIP_VERDE) { + r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr); + if (r) { + dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r); + si_rlc_fini(rdev); + return r; + } + /* write the sr buffer */ + dst_ptr = rdev->rlc.sr_ptr; + for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) { + dst_ptr[i] = verde_rlc_save_restore_register_list[i]; + } + radeon_bo_kunmap(rdev->rlc.save_restore_obj); + } + radeon_bo_unreserve(rdev->rlc.save_restore_obj); + /* clear state block */ + reg_list_num = 0; + dws = 0; + for (i = 0; cs_data[i].section != NULL; i++) { + for (j = 0; cs_data[i].section[j].extent != NULL; j++) { + reg_list_num++; + dws += cs_data[i].section[j].reg_count; + } + } + reg_list_blk_index = (3 * reg_list_num + 2); + dws += reg_list_blk_index; + if (rdev->rlc.clear_state_obj == NULL) { - r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, NULL, - &rdev->rlc.clear_state_obj); + r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); si_rlc_fini(rdev); @@ -4360,24 +5131,113 @@ int si_rlc_init(struct radeon_device *rdev) } r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM, &rdev->rlc.clear_state_gpu_addr); - radeon_bo_unreserve(rdev->rlc.clear_state_obj); if (r) { + + radeon_bo_unreserve(rdev->rlc.clear_state_obj); dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r); si_rlc_fini(rdev); return r; } + r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr); + if (r) { + dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r); + si_rlc_fini(rdev); + return r; + } + /* set up the cs buffer */ + dst_ptr = rdev->rlc.cs_ptr; + reg_list_hdr_blk_index = 0; + reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4); + data = upper_32_bits(reg_list_mc_addr); + dst_ptr[reg_list_hdr_blk_index] = data; + reg_list_hdr_blk_index++; + for (i = 0; cs_data[i].section != NULL; i++) { + for (j = 0; cs_data[i].section[j].extent != NULL; j++) { + reg_num = cs_data[i].section[j].reg_count; + data = reg_list_mc_addr & 0xffffffff; + dst_ptr[reg_list_hdr_blk_index] = data; + reg_list_hdr_blk_index++; + + data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff; + dst_ptr[reg_list_hdr_blk_index] = data; + reg_list_hdr_blk_index++; + + data = 0x08000000 | (reg_num * 4); + dst_ptr[reg_list_hdr_blk_index] = data; + reg_list_hdr_blk_index++; + + for (k = 0; k < reg_num; k++) { + data = cs_data[i].section[j].extent[k]; + dst_ptr[reg_list_blk_index + k] = data; + } + reg_list_mc_addr += reg_num * 4; + reg_list_blk_index += reg_num; + } + } + dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER; + + radeon_bo_kunmap(rdev->rlc.clear_state_obj); + radeon_bo_unreserve(rdev->rlc.clear_state_obj); return 0; } +static void si_rlc_reset(struct radeon_device *rdev) +{ + u32 tmp = RREG32(GRBM_SOFT_RESET); + + tmp |= SOFT_RESET_RLC; + WREG32(GRBM_SOFT_RESET, tmp); + udelay(50); + tmp &= ~SOFT_RESET_RLC; + WREG32(GRBM_SOFT_RESET, tmp); + udelay(50); +} + static void si_rlc_stop(struct radeon_device *rdev) { WREG32(RLC_CNTL, 0); + + si_enable_gui_idle_interrupt(rdev, false); + + si_wait_for_rlc_serdes(rdev); } static void si_rlc_start(struct radeon_device *rdev) { WREG32(RLC_CNTL, RLC_ENABLE); + + si_enable_gui_idle_interrupt(rdev, true); + + udelay(50); +} + +static bool si_lbpw_supported(struct radeon_device *rdev) +{ + u32 tmp; + + /* Enable LBPW only for DDR3 */ + tmp = RREG32(MC_SEQ_MISC0); + if ((tmp & 0xF0000000) == 0xB0000000) + return true; + return false; +} + +static void si_enable_lbpw(struct radeon_device *rdev, bool enable) +{ + u32 tmp; + + tmp = RREG32(RLC_LB_CNTL); + if (enable) + tmp |= LOAD_BALANCE_ENABLE; + else + tmp &= ~LOAD_BALANCE_ENABLE; + WREG32(RLC_LB_CNTL, tmp); + + if (!enable) { + si_select_se_sh(rdev, 0xffffffff, 0xffffffff); + WREG32(SPI_LB_CU_MASK, 0x00ff); + } } static int si_rlc_resume(struct radeon_device *rdev) @@ -4390,14 +5250,18 @@ static int si_rlc_resume(struct radeon_device *rdev) si_rlc_stop(rdev); + si_rlc_reset(rdev); + + si_init_pg(rdev); + + si_init_cg(rdev); + WREG32(RLC_RL_BASE, 0); WREG32(RLC_RL_SIZE, 0); WREG32(RLC_LB_CNTL, 0); WREG32(RLC_LB_CNTR_MAX, 0xffffffff); WREG32(RLC_LB_CNTR_INIT, 0); - - WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8); - WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8); + WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(RLC_MC_CNTL, 0); WREG32(RLC_UCODE_CNTL, 0); @@ -4409,6 +5273,8 @@ static int si_rlc_resume(struct radeon_device *rdev) } WREG32(RLC_UCODE_ADDR, 0); + si_enable_lbpw(rdev, si_lbpw_supported(rdev)); + si_rlc_start(rdev); return 0; @@ -4578,6 +5444,7 @@ int si_irq_set(struct radeon_device *rdev) u32 grbm_int_cntl = 0; u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; u32 dma_cntl, dma_cntl1; + u32 thermal_int = 0; if (!rdev->irq.installed) { WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); @@ -4603,6 +5470,9 @@ int si_irq_set(struct radeon_device *rdev) dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE; dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; + thermal_int = RREG32(CG_THERMAL_INT) & + ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW); + /* enable CP interrupts on all rings */ if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { DRM_DEBUG("si_irq_set: sw int gfx\n"); @@ -4689,6 +5559,11 @@ int si_irq_set(struct radeon_device *rdev) WREG32(GRBM_INT_CNTL, grbm_int_cntl); + if (rdev->irq.dpm_thermal) { + DRM_DEBUG("dpm thermal\n"); + thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW; + } + if (rdev->num_crtc >= 2) { WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2); @@ -4724,6 +5599,8 @@ int si_irq_set(struct radeon_device *rdev) WREG32(DC_HPD6_INT_CONTROL, hpd6); } + WREG32(CG_THERMAL_INT, thermal_int); + return 0; } @@ -4888,6 +5765,7 @@ int si_irq_process(struct radeon_device *rdev) u32 src_id, src_data, ring_id; u32 ring_index; bool queue_hotplug = false; + bool queue_thermal = false; if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE; @@ -5158,6 +6036,16 @@ restart_ih: DRM_DEBUG("IH: DMA trap\n"); radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX); break; + case 230: /* thermal low to high */ + DRM_DEBUG("IH: thermal low to high\n"); + rdev->pm.dpm.thermal.high_to_low = false; + queue_thermal = true; + break; + case 231: /* thermal high to low */ + DRM_DEBUG("IH: thermal high to low\n"); + rdev->pm.dpm.thermal.high_to_low = true; + queue_thermal = true; + break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); break; @@ -5176,6 +6064,8 @@ restart_ih: } if (queue_hotplug) schedule_work(&rdev->hotplug_work); + if (queue_thermal && rdev->pm.dpm_enabled) + schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); atomic_set(&rdev->ih.lock, 0); @@ -5270,6 +6160,11 @@ static int si_startup(struct radeon_device *rdev) struct radeon_ring *ring; int r; + /* enable pcie gen2/3 link */ + si_pcie_gen3_enable(rdev); + /* enable aspm */ + si_program_aspm(rdev); + if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw || !rdev->rlc_fw || !rdev->mc_fw) { r = si_init_microcode(rdev); @@ -5609,6 +6504,8 @@ void si_fini(struct radeon_device *rdev) cayman_dma_fini(rdev); si_irq_fini(rdev); si_rlc_fini(rdev); + si_fini_cg(rdev); + si_fini_pg(rdev); radeon_wb_fini(rdev); radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev); @@ -5735,3 +6632,361 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) return 0; } + +static void si_pcie_gen3_enable(struct radeon_device *rdev) +{ + struct pci_dev *root = rdev->pdev->bus->self; + int bridge_pos, gpu_pos; + u32 speed_cntl, mask, current_data_rate; + int ret, i; + u16 tmp16; + + if (radeon_pcie_gen2 == 0) + return; + + if (rdev->flags & RADEON_IS_IGP) + return; + + if (!(rdev->flags & RADEON_IS_PCIE)) + return; + + ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask); + if (ret != 0) + return; + + if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) + return; + + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> + LC_CURRENT_DATA_RATE_SHIFT; + if (mask & DRM_PCIE_SPEED_80) { + if (current_data_rate == 2) { + DRM_INFO("PCIE gen 3 link speeds already enabled\n"); + return; + } + DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n"); + } else if (mask & DRM_PCIE_SPEED_50) { + if (current_data_rate == 1) { + DRM_INFO("PCIE gen 2 link speeds already enabled\n"); + return; + } + DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); + } + + bridge_pos = pci_pcie_cap(root); + if (!bridge_pos) + return; + + gpu_pos = pci_pcie_cap(rdev->pdev); + if (!gpu_pos) + return; + + if (mask & DRM_PCIE_SPEED_80) { + /* re-try equalization if gen3 is not already enabled */ + if (current_data_rate != 2) { + u16 bridge_cfg, gpu_cfg; + u16 bridge_cfg2, gpu_cfg2; + u32 max_lw, current_lw, tmp; + + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + + tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; + pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + + tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; + pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + + tmp = RREG32_PCIE(PCIE_LC_STATUS1); + max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; + current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT; + + if (current_lw < max_lw) { + tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + if (tmp & LC_RENEGOTIATION_SUPPORT) { + tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS); + tmp |= (max_lw << LC_LINK_WIDTH_SHIFT); + tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW; + WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp); + } + } + + for (i = 0; i < 10; i++) { + /* check status */ + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16); + if (tmp16 & PCI_EXP_DEVSTA_TRPND) + break; + + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg); + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg); + + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2); + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp |= LC_SET_QUIESCE; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp |= LC_REDO_EQ; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + + mdelay(100); + + /* linkctl */ + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL_HAWD; + tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); + pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16); + + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16); + tmp16 &= ~PCI_EXP_LNKCTL_HAWD; + tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); + pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16); + + /* linkctl2 */ + pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~((1 << 4) | (7 << 9)); + tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9))); + pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16); + + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~((1 << 4) | (7 << 9)); + tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9))); + pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + + tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); + tmp &= ~LC_SET_QUIESCE; + WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp); + } + } + } + + /* set the link speed */ + speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE; + speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; + WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + + pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); + tmp16 &= ~0xf; + if (mask & DRM_PCIE_SPEED_80) + tmp16 |= 3; /* gen3 */ + else if (mask & DRM_PCIE_SPEED_50) + tmp16 |= 2; /* gen2 */ + else + tmp16 |= 1; /* gen1 */ + pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16); + + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; + WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); + + for (i = 0; i < rdev->usec_timeout; i++) { + speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); + if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0) + break; + udelay(1); + } +} + +static void si_program_aspm(struct radeon_device *rdev) +{ + u32 data, orig; + bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; + bool disable_clkreq = false; + + if (!(rdev->flags & RADEON_IS_PCIE)) + return; + + orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); + data &= ~LC_XMIT_N_FTS_MASK; + data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data); + + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3); + data |= LC_GO_TO_RECOVERY; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL3, data); + + orig = data = RREG32_PCIE(PCIE_P_CNTL); + data |= P_IGNORE_EDB_ERR; + if (orig != data) + WREG32_PCIE(PCIE_P_CNTL, data); + + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); + data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK); + data |= LC_PMI_TO_L1_DIS; + if (!disable_l0s) + data |= LC_L0S_INACTIVITY(7); + + if (!disable_l1) { + data |= LC_L1_INACTIVITY(7); + data &= ~LC_PMI_TO_L1_DIS; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + + if (!disable_plloff_in_l1) { + bool clk_req_support; + + orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0); + data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + if (orig != data) + WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data); + + orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1); + data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + if (orig != data) + WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data); + + orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0); + data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK); + data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7); + if (orig != data) + WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data); + + orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1); + data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK); + data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7); + if (orig != data) + WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data); + + if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) { + orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0); + data &= ~PLL_RAMP_UP_TIME_0_MASK; + if (orig != data) + WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data); + + orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1); + data &= ~PLL_RAMP_UP_TIME_1_MASK; + if (orig != data) + WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data); + + orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2); + data &= ~PLL_RAMP_UP_TIME_2_MASK; + if (orig != data) + WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data); + + orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3); + data &= ~PLL_RAMP_UP_TIME_3_MASK; + if (orig != data) + WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data); + + orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0); + data &= ~PLL_RAMP_UP_TIME_0_MASK; + if (orig != data) + WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data); + + orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1); + data &= ~PLL_RAMP_UP_TIME_1_MASK; + if (orig != data) + WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data); + + orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2); + data &= ~PLL_RAMP_UP_TIME_2_MASK; + if (orig != data) + WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data); + + orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3); + data &= ~PLL_RAMP_UP_TIME_3_MASK; + if (orig != data) + WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data); + } + orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL); + data &= ~LC_DYN_LANES_PWR_STATE_MASK; + data |= LC_DYN_LANES_PWR_STATE(3); + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); + + orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL); + data &= ~LS2_EXIT_TIME_MASK; + if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN)) + data |= LS2_EXIT_TIME(5); + if (orig != data) + WREG32_PIF_PHY0(PB0_PIF_CNTL, data); + + orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL); + data &= ~LS2_EXIT_TIME_MASK; + if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN)) + data |= LS2_EXIT_TIME(5); + if (orig != data) + WREG32_PIF_PHY1(PB1_PIF_CNTL, data); + + if (!disable_clkreq) { + struct pci_dev *root = rdev->pdev->bus->self; + u32 lnkcap; + + clk_req_support = false; + pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap); + if (lnkcap & PCI_EXP_LNKCAP_CLKPM) + clk_req_support = true; + } else { + clk_req_support = false; + } + + if (clk_req_support) { + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2); + data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL2, data); + + orig = data = RREG32(THM_CLK_CNTL); + data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK); + data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1); + if (orig != data) + WREG32(THM_CLK_CNTL, data); + + orig = data = RREG32(MISC_CLK_CNTL); + data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK); + data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1); + if (orig != data) + WREG32(MISC_CLK_CNTL, data); + + orig = data = RREG32(CG_CLKPIN_CNTL); + data &= ~BCLK_AS_XCLK; + if (orig != data) + WREG32(CG_CLKPIN_CNTL, data); + + orig = data = RREG32(CG_CLKPIN_CNTL_2); + data &= ~FORCE_BIF_REFCLK_EN; + if (orig != data) + WREG32(CG_CLKPIN_CNTL_2, data); + + orig = data = RREG32(MPLL_BYPASSCLK_SEL); + data &= ~MPLL_CLKOUT_SEL_MASK; + data |= MPLL_CLKOUT_SEL(4); + if (orig != data) + WREG32(MPLL_BYPASSCLK_SEL, data); + + orig = data = RREG32(SPLL_CNTL_MODE); + data &= ~SPLL_REFCLK_SEL_MASK; + if (orig != data) + WREG32(SPLL_CNTL_MODE, data); + } + } + } else { + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + } + + orig = data = RREG32_PCIE(PCIE_CNTL2); + data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN; + if (orig != data) + WREG32_PCIE(PCIE_CNTL2, data); + + if (!disable_l0s) { + data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); + if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) { + data = RREG32_PCIE(PCIE_LC_STATUS1); + if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) { + orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL); + data &= ~LC_L0S_INACTIVITY_MASK; + if (orig != data) + WREG32_PCIE_PORT(PCIE_LC_CNTL, data); + } + } + } +} |