summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 11:44:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 11:44:24 -0700
commit2f34c1231bfc9f2550f934acb268ac7315fb3837 (patch)
treeff8114b3b4ec4723a11b041c6b74c389e9f0eeb9 /drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
parenta3719f34fdb664ffcfaec2160ef20fca7becf2ee (diff)
parent8b03d1ed2c43a2ba5ef3381322ee4515b97381bf (diff)
downloadlinux-2f34c1231bfc9f2550f934acb268ac7315fb3837.tar.gz
linux-2f34c1231bfc9f2550f934acb268ac7315fb3837.tar.bz2
linux-2f34c1231bfc9f2550f934acb268ac7315fb3837.zip
Merge tag 'drm-for-v4.12' of git://people.freedesktop.org/~airlied/linux
Pull drm u pdates from Dave Airlie: "This is the main drm pull request for v4.12. Apart from two fixes pulls, everything should have been in drm-next for at least 2 weeks. The biggest thing in here is AMD released the public headers for their upcoming VEGA GPUs. These as always are quite a sizeable chunk of header files. They've also added initial non-display support for those GPUs, though they aren't available in production yet. Otherwise it's pretty much normal. New bridge drivers: - megachips-stdpxxxx-ge-b850v3-fw LVDS->DP++ - generic LVDS bridge support. Core: - Displayport link train failure reporting to userspace - debugfs interface cleaned up - subsystem TODO in kerneldoc now - Extended fbdev support (flipping and vblank wait) - drm_platform removed - EDP CRC support in helper - HF-VSDB SCDC support in EDID parser - Lots of code cleanups and header extraction - Thunderbolt external GPU awareness - Atomic helper improvements - Documentation improvements panel: - Sitronix and Samsung new panel support amdgpu: - Preliminary vega10 support - Multi-level page table support - GPU sensor support for userspace - PRT support for sparse buffers - SR-IOV improvements - Non-contig VRAM CPU mapping i915: - Atomic modesetting enabled by default on Gen5+ - LSPCON improvements - Atomic state handling for cdclk - GPU reset improvements - In-kernel unit tests - Geminilake improvements and color manager support - Designware i2c fixes - vblank evasion improvements - Hotplug safe connector iterators - GVT scheduler QoS support - GVT Kabylake support nouveau: - Acceleration support for Pascal (GP10x). - Rearchitecture of code handling proprietary signed firmware - Fix GTX 970 with odd MMU configuration - GP10B support - GP107 acceleration support vmwgfx: - Atomic modesetting support for vmwgfx omapdrm: - Support for render nodes - Refactor omapdss code - Fix some probe ordering issues - Fix too dark RGB565 rendering sunxi: - prelim rework for multiple pipes. mali-dp: - Color management support - Plane scaling - Power management improvements imx-drm: - Prefetch Resolve Engine/Gasket on i.MX6QP - Deferred plane disabling - Separate alpha support mediatek: - Mediatek SoC MT2701 support rcar-du: - Gen3 HDMI support msm: - 4k support for newer chips - OPP bindings for gpu - prep work for per-process pagetables vc4: - HDMI audio support - fixes qxl: - minor fixes. dw-hdmi: - PHY improvements - CSC fixes - Amlogic GX SoC support" * tag 'drm-for-v4.12' of git://people.freedesktop.org/~airlied/linux: (1778 commits) drm/nouveau/fb/gf100-: Fix 32 bit wraparound in new ram detection drm/nouveau/secboot/gm20b: fix the error return code in gm20b_secboot_tegra_read_wpr() drm/nouveau/kms: Increase max retries in scanout position queries. drm/nouveau/bios/bitP: check that table is long enough for optional pointers drm/nouveau/fifo/nv40: no ctxsw for pre-nv44 mpeg engine drm: mali-dp: use div_u64 for expensive 64-bit divisions drm/i915: Confirm the request is still active before adding it to the await drm/i915: Avoid busy-spinning on VLV_GLTC_PW_STATUS mmio drm/i915/selftests: Allocate inode/file dynamically drm/i915: Fix system hang with EI UP masked on Haswell drm/i915: checking for NULL instead of IS_ERR() in mock selftests drm/i915: Perform link quality check unconditionally during long pulse drm/i915: Fix use after free in lpe_audio_platdev_destroy() drm/i915: Use the right mapping_gfp_mask for final shmem allocation drm/i915: Make legacy cursor updates more unsynced drm/i915: Apply a cond_resched() to the saturated signaler drm/i915: Park the signaler before sleeping drm: mali-dp: Check the mclk rate and allow up/down scaling drm: mali-dp: Enable image enhancement when scaling drm: mali-dp: Add plane upscaling support ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c60
1 files changed, 43 insertions, 17 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 1f9354541f29..8a8bc2fe6f2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -972,9 +972,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
out:
if (err) {
- printk(KERN_ERR
- "gfx7: Failed to load firmware \"%s\"\n",
- fw_name);
+ pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
release_firmware(adev->gfx.pfp_fw);
adev->gfx.pfp_fw = NULL;
release_firmware(adev->gfx.me_fw);
@@ -1876,6 +1874,11 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
}
+static void gfx_v7_0_config_init(struct amdgpu_device *adev)
+{
+ adev->gfx.config.double_offchip_lds_buf = 1;
+}
+
/**
* gfx_v7_0_gpu_init - setup the 3D engine
*
@@ -1886,7 +1889,8 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
*/
static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
{
- u32 tmp, sh_mem_cfg;
+ u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
+ u32 tmp;
int i;
WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
@@ -1899,6 +1903,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
gfx_v7_0_setup_rb(adev);
gfx_v7_0_get_cu_info(adev);
+ gfx_v7_0_config_init(adev);
/* set HW defaults for 3D engine */
WREG32(mmCP_MEQ_THRESHOLDS,
@@ -1916,15 +1921,32 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
/* where to put LDS, scratch, GPUVM in FSA64 space */
sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE,
+ MTYPE_NC);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE,
+ MTYPE_UC);
+ sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0);
+
+ sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
+ SWIZZLE_ENABLE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ ELEMENT_SIZE, 1);
+ sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
+ INDEX_STRIDE, 3);
mutex_lock(&adev->srbm_mutex);
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < adev->vm_manager.num_ids; i++) {
+ if (i == 0)
+ sh_mem_base = 0;
+ else
+ sh_mem_base = adev->mc.shared_aperture_start >> 48;
cik_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
- WREG32(mmSH_MEM_BASES, 0);
+ WREG32(mmSH_MEM_BASES, sh_mem_base);
+ WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -2607,7 +2629,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
/* Initialize the ring buffer's read and write pointers */
WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
ring->wptr = 0;
- WREG32(mmCP_RB0_WPTR, ring->wptr);
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
/* set the wb address wether it's enabled or not */
rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
@@ -2636,12 +2658,12 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
return 0;
}
-static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
+static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
return ring->adev->wb.wb[ring->rptr_offs];
}
-static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -2652,11 +2674,11 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- WREG32(mmCP_RB0_WPTR, ring->wptr);
+ WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
(void)RREG32(mmCP_RB0_WPTR);
}
-static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
return ring->adev->wb.wb[ring->wptr_offs];
@@ -2667,8 +2689,8 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = ring->wptr;
- WDOORBELL32(ring->doorbell_index, ring->wptr);
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
/**
@@ -3138,7 +3160,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
- mqd->queue_state.cp_hqd_pq_wptr = ring->wptr;
+ mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
@@ -4647,17 +4669,19 @@ static int gfx_v7_0_sw_init(void *handle)
int i, r;
/* EOP Event */
- r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
+ &adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
+ &adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -5184,6 +5208,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.type = AMDGPU_RING_TYPE_GFX,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
.get_rptr = gfx_v7_0_ring_get_rptr,
.get_wptr = gfx_v7_0_ring_get_wptr_gfx,
.set_wptr = gfx_v7_0_ring_set_wptr_gfx,
@@ -5214,6 +5239,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.type = AMDGPU_RING_TYPE_COMPUTE,
.align_mask = 0xff,
.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = false,
.get_rptr = gfx_v7_0_ring_get_rptr,
.get_wptr = gfx_v7_0_ring_get_wptr_compute,
.set_wptr = gfx_v7_0_ring_set_wptr_compute,