diff options
-rw-r--r-- | drivers/gpu/drm/radeon/Makefile | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen.c | 51 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_blit_kms.c | 776 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_blit_shaders.c | 359 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreen_blit_shaders.h | 35 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/evergreend.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/r600_blit_kms.c | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_asic.h | 8 |
10 files changed, 1221 insertions, 55 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index aebe00875041..6cae4f2028d2 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -65,7 +65,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \ r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \ - evergreen.o evergreen_cs.o + evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 4c82cc830271..aee61ae24402 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2062,26 +2062,13 @@ static int evergreen_startup(struct radeon_device *rdev) return r; } evergreen_gpu_init(rdev); -#if 0 - if (!rdev->r600_blit.shader_obj) { - r = r600_blit_init(rdev); - if (r) { - DRM_ERROR("radeon: failed blitter (%d).\n", r); - return r; - } - } - r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); - if (unlikely(r != 0)) - return r; - r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, - &rdev->r600_blit.shader_gpu_addr); - radeon_bo_unreserve(rdev->r600_blit.shader_obj); + r = evergreen_blit_init(rdev); if (r) { - DRM_ERROR("failed to pin blit object %d\n", r); - return r; + evergreen_blit_fini(rdev); + rdev->asic->copy = NULL; + dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } -#endif /* allocate wb buffer */ r = radeon_wb_init(rdev); @@ -2139,23 +2126,43 @@ int evergreen_resume(struct radeon_device *rdev) int evergreen_suspend(struct radeon_device *rdev) { -#if 0 int r; -#endif + /* FIXME: we should wait for ring to be empty */ r700_cp_stop(rdev); rdev->cp.ready = false; evergreen_irq_suspend(rdev); radeon_wb_disable(rdev); evergreen_pcie_gart_disable(rdev); -#if 0 + /* unpin shaders bo */ r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); if (likely(r == 0)) { radeon_bo_unpin(rdev->r600_blit.shader_obj); radeon_bo_unreserve(rdev->r600_blit.shader_obj); } -#endif + + return 0; +} + +int evergreen_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence) +{ + int r; + + mutex_lock(&rdev->r600_blit.mutex); + rdev->r600_blit.vb_ib = NULL; + r = evergreen_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE); + if (r) { + if (rdev->r600_blit.vb_ib) + radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); + mutex_unlock(&rdev->r600_blit.mutex); + return r; + } + evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE); + evergreen_blit_done_copy(rdev, fence); + mutex_unlock(&rdev->r600_blit.mutex); return 0; } @@ -2286,7 +2293,7 @@ int evergreen_init(struct radeon_device *rdev) void evergreen_fini(struct radeon_device *rdev) { - /*r600_blit_fini(rdev);*/ + evergreen_blit_fini(rdev); r700_cp_fini(rdev); r600_irq_fini(rdev); radeon_wb_fini(rdev); diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c new file mode 100644 index 000000000000..ce1ae4a2aa54 --- /dev/null +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -0,0 +1,776 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + */ + +#include "drmP.h" +#include "drm.h" +#include "radeon_drm.h" +#include "radeon.h" + +#include "evergreend.h" +#include "evergreen_blit_shaders.h" + +#define DI_PT_RECTLIST 0x11 +#define DI_INDEX_SIZE_16_BIT 0x0 +#define DI_SRC_SEL_AUTO_INDEX 0x2 + +#define FMT_8 0x1 +#define FMT_5_6_5 0x8 +#define FMT_8_8_8_8 0x1a +#define COLOR_8 0x1 +#define COLOR_5_6_5 0x8 +#define COLOR_8_8_8_8 0x1a + +/* emits 17 */ +static void +set_render_target(struct radeon_device *rdev, int format, + int w, int h, u64 gpu_addr) +{ + u32 cb_color_info; + int pitch, slice; + + h = ALIGN(h, 8); + if (h < 8) + h = 8; + + cb_color_info = ((format << 2) | (1 << 24)); + pitch = (w / 8) - 1; + slice = ((w * h) / 64) - 1; + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 15)); + radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, pitch); + radeon_ring_write(rdev, slice); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, cb_color_info); + radeon_ring_write(rdev, (1 << 4)); + radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); +} + +/* emits 5dw */ +static void +cp_set_surface_sync(struct radeon_device *rdev, + u32 sync_type, u32 size, + u64 mc_addr) +{ + u32 cp_coher_size; + + if (size == 0xffffffff) + cp_coher_size = 0xffffffff; + else + cp_coher_size = ((size + 255) >> 8); + + radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3)); + radeon_ring_write(rdev, sync_type); + radeon_ring_write(rdev, cp_coher_size); + radeon_ring_write(rdev, mc_addr >> 8); + radeon_ring_write(rdev, 10); /* poll interval */ +} + +/* emits 11dw + 1 surface sync = 16dw */ +static void +set_shaders(struct radeon_device *rdev) +{ + u64 gpu_addr; + + /* VS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 3)); + radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, 2); + radeon_ring_write(rdev, 0); + + /* PS */ + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 4)); + radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, 1); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 2); + + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset; + cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr); +} + +/* emits 10 + 1 sync (5) = 15 */ +static void +set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr) +{ + u32 sq_vtx_constant_word2, sq_vtx_constant_word3; + + /* high addr, stride */ + sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + /* xyzw swizzles */ + sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); + radeon_ring_write(rdev, 0x580); + radeon_ring_write(rdev, gpu_addr & 0xffffffff); + radeon_ring_write(rdev, 48 - 1); /* size */ + radeon_ring_write(rdev, sq_vtx_constant_word2); + radeon_ring_write(rdev, sq_vtx_constant_word3); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30); + + if (rdev->family == CHIP_CEDAR) + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, 48, gpu_addr); + else + cp_set_surface_sync(rdev, + PACKET3_VC_ACTION_ENA, 48, gpu_addr); + +} + +/* emits 10 */ +static void +set_tex_resource(struct radeon_device *rdev, + int format, int w, int h, int pitch, + u64 gpu_addr) +{ + u32 sq_tex_resource_word0, sq_tex_resource_word1; + u32 sq_tex_resource_word4, sq_tex_resource_word7; + + if (h < 1) + h = 1; + + sq_tex_resource_word0 = (1 << 0); /* 2D */ + sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) | + ((w - 1) << 18)); + sq_tex_resource_word1 = ((h - 1) << 0); + /* xyzw swizzles */ + sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25); + + sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_tex_resource_word0); + radeon_ring_write(rdev, sq_tex_resource_word1); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, gpu_addr >> 8); + radeon_ring_write(rdev, sq_tex_resource_word4); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_tex_resource_word7); +} + +/* emits 12 */ +static void +set_scissors(struct radeon_device *rdev, int x1, int y1, + int x2, int y2) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); + + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31)); + radeon_ring_write(rdev, (x2 << 0) | (y2 << 16)); +} + +/* emits 10 */ +static void +draw_auto(struct radeon_device *rdev) +{ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(rdev, DI_PT_RECTLIST); + + radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); + radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT); + + radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); + radeon_ring_write(rdev, 1); + + radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1)); + radeon_ring_write(rdev, 3); + radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX); + +} + +/* emits 20 */ +static void +set_default_state(struct radeon_device *rdev) +{ + u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3; + u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2; + u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3; + int num_ps_gprs, num_vs_gprs, num_temp_gprs; + int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs; + int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads; + int num_hs_threads, num_ls_threads; + int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries; + int num_hs_stack_entries, num_ls_stack_entries; + u64 gpu_addr; + int dwords; + + switch (rdev->family) { + case CHIP_CEDAR: + default: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_REDWOOD: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_JUNIPER: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + } + + if (rdev->family == CHIP_CEDAR) + sq_config = 0; + else + sq_config = VC_ENABLE; + + sq_config |= (EXPORT_SRC_C | + CS_PRIO(0) | + LS_PRIO(0) | + HS_PRIO(0) | + PS_PRIO(0) | + VS_PRIO(1) | + GS_PRIO(2) | + ES_PRIO(3)); + + sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) | + NUM_VS_GPRS(num_vs_gprs) | + NUM_CLAUSE_TEMP_GPRS(num_temp_gprs)); + sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) | + NUM_ES_GPRS(num_es_gprs)); + sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) | + NUM_LS_GPRS(num_ls_gprs)); + sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) | + NUM_VS_THREADS(num_vs_threads) | + NUM_GS_THREADS(num_gs_threads) | + NUM_ES_THREADS(num_es_threads)); + sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) | + NUM_LS_THREADS(num_ls_threads)); + sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) | + NUM_VS_STACK_ENTRIES(num_vs_stack_entries)); + sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) | + NUM_ES_STACK_ENTRIES(num_es_stack_entries)); + sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) | + NUM_LS_STACK_ENTRIES(num_ls_stack_entries)); + + /* emit an IB pointing at default state */ + dwords = ALIGN(rdev->r600_blit.state_len, 0x10); + gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; + radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC); + radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); + radeon_ring_write(rdev, dwords); + + /* disable dyn gprs */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(rdev, 0); + + /* SQ config */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11)); + radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2); + radeon_ring_write(rdev, sq_config); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_1); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_2); + radeon_ring_write(rdev, sq_gpr_resource_mgmt_3); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, sq_thread_resource_mgmt); + radeon_ring_write(rdev, sq_thread_resource_mgmt_2); + radeon_ring_write(rdev, sq_stack_resource_mgmt_1); + radeon_ring_write(rdev, sq_stack_resource_mgmt_2); + radeon_ring_write(rdev, sq_stack_resource_mgmt_3); +} + +static inline uint32_t i2f(uint32_t input) +{ + u32 result, i, exponent, fraction; + + if ((input & 0x3fff) == 0) + result = 0; /* 0 is a special case */ + else { + exponent = 140; /* exponent biased by 127; */ + fraction = (input & 0x3fff) << 10; /* cheat and only + handle numbers below 2^^15 */ + for (i = 0; i < 14; i++) { + if (fraction & 0x800000) + break; + else { + fraction = fraction << 1; /* keep + shifting left until top bit = 1 */ + exponent = exponent - 1; + } + } + result = exponent << 23 | (fraction & 0x7fffff); /* mask + off top bit; assumed 1 */ + } + return result; +} + +int evergreen_blit_init(struct radeon_device *rdev) +{ + u32 obj_size; + int r, dwords; + void *ptr; + u32 packet2s[16]; + int num_packet2s = 0; + + /* pin copy shader into vram if already initialized */ + if (rdev->r600_blit.shader_obj) + goto done; + + mutex_init(&rdev->r600_blit.mutex); + rdev->r600_blit.state_offset = 0; + + rdev->r600_blit.state_len = evergreen_default_size; + + dwords = rdev->r600_blit.state_len; + while (dwords & 0xf) { + packet2s[num_packet2s++] = PACKET2(0); + dwords++; + } + + obj_size = dwords * 4; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.vs_offset = obj_size; + obj_size += evergreen_vs_size * 4; + obj_size = ALIGN(obj_size, 256); + + rdev->r600_blit.ps_offset = obj_size; + obj_size += evergreen_ps_size * 4; + obj_size = ALIGN(obj_size, 256); + + r = radeon_bo_create(rdev, NULL, obj_size, true, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_obj); + if (r) { + DRM_ERROR("evergreen failed to allocate shader\n"); + return r; + } + + DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n", + obj_size, + rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset); + + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr); + if (r) { + DRM_ERROR("failed to map blit object %d\n", r); + return r; + } + + memcpy_toio(ptr + rdev->r600_blit.state_offset, + evergreen_default_state, rdev->r600_blit.state_len * 4); + + if (num_packet2s) + memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), + packet2s, num_packet2s * 4); + memcpy(ptr + rdev->r600_blit.vs_offset, evergreen_vs, evergreen_vs_size * 4); + memcpy(ptr + rdev->r600_blit.ps_offset, evergreen_ps, evergreen_ps_size * 4); + radeon_bo_kunmap(rdev->r600_blit.shader_obj); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + +done: + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM, + &rdev->r600_blit.shader_gpu_addr); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + if (r) { + dev_err(rdev->dev, "(%d) pin blit object failed\n", r); + return r; + } + return 0; +} + +void evergreen_blit_fini(struct radeon_device *rdev) +{ + int r; + + if (rdev->r600_blit.shader_obj == NULL) + return; + /* If we can't reserve the bo, unref should be enough to destroy + * it when it becomes idle. + */ + r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false); + if (!r) { + radeon_bo_unpin(rdev->r600_blit.shader_obj); + radeon_bo_unreserve(rdev->r600_blit.shader_obj); + } + radeon_bo_unref(&rdev->r600_blit.shader_obj); +} + +static int evergreen_vb_ib_get(struct radeon_device *rdev) +{ + int r; + r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); + if (r) { + DRM_ERROR("failed to get IB for vertex buffer\n"); + return r; + } + + rdev->r600_blit.vb_total = 64*1024; + rdev->r600_blit.vb_used = 0; + return 0; +} + +static void evergreen_vb_ib_put(struct radeon_device *rdev) +{ + radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); + radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); +} + +int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) +{ + int r; + int ring_size, line_size; + int max_size; + /* loops of emits + fence emit possible */ + int dwords_per_loop = 74, num_loops; + + r = evergreen_vb_ib_get(rdev); + if (r) + return r; + + /* 8 bpp vs 32 bpp for xfer unit */ + if (size_bytes & 3) + line_size = 8192; + else + line_size = 8192 * 4; + + max_size = 8192 * line_size; + + /* major loops cover the max size transfer */ + num_loops = ((size_bytes + max_size) / max_size); + /* minor loops cover the extra non aligned bits */ + num_loops += ((size_bytes % line_size) ? 1 : 0); + /* calculate number of loops correctly */ + ring_size = num_loops * dwords_per_loop; + /* set default + shaders */ + ring_size += 36; /* shaders + def state */ + ring_size += 10; /* fence emit for VB IB */ + ring_size += 5; /* done copy */ + ring_size += 10; /* fence emit for done copy */ + r = radeon_ring_lock(rdev, ring_size); + if (r) + return r; + + set_default_state(rdev); /* 20 */ + set_shaders(rdev); /* 16 */ + return 0; +} + +void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) +{ + int r; + + if (rdev->r600_blit.vb_ib) + evergreen_vb_ib_put(rdev); + + if (fence) + r = radeon_fence_emit(rdev, fence); + + radeon_ring_unlock_commit(rdev); +} + +void evergreen_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes) +{ + int max_bytes; + u64 vb_gpu_addr; + u32 *vb; + + DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, + size_bytes, rdev->r600_blit.vb_used); + vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); + if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { + max_bytes = 8192; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = src_gpu_addr & 255; + int dst_x = dst_gpu_addr & 255; + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + } + + vb[0] = i2f(dst_x); + vb[1] = 0; + vb[2] = i2f(src_x); + vb[3] = 0; + + vb[4] = i2f(dst_x); + vb[5] = i2f(h); + vb[6] = i2f(src_x); + vb[7] = i2f(h); + + vb[8] = i2f(dst_x + cur_size); + vb[9] = i2f(h); + vb[10] = i2f(src_x + cur_size); + vb[11] = i2f(h); + + /* src 10 */ + set_tex_resource(rdev, FMT_8, + src_x + cur_size, h, src_x + cur_size, + src_gpu_addr); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + + /* dst 17 */ + set_render_target(rdev, COLOR_8, + dst_x + cur_size, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); + + /* 15 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } else { + max_bytes = 8192 * 4; + + while (size_bytes) { + int cur_size = size_bytes; + int src_x = (src_gpu_addr & 255); + int dst_x = (dst_gpu_addr & 255); + int h = 1; + src_gpu_addr = src_gpu_addr & ~255; + dst_gpu_addr = dst_gpu_addr & ~255; + + if (!src_x && !dst_x) { + h = (cur_size / max_bytes); + if (h > 8192) + h = 8192; + if (h == 0) + h = 1; + else + cur_size = max_bytes; + } else { + if (cur_size > max_bytes) + cur_size = max_bytes; + if (cur_size > (max_bytes - dst_x)) + cur_size = (max_bytes - dst_x); + if (cur_size > (max_bytes - src_x)) + cur_size = (max_bytes - src_x); + } + + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + } + + vb[0] = i2f(dst_x / 4); + vb[1] = 0; + vb[2] = i2f(src_x / 4); + vb[3] = 0; + + vb[4] = i2f(dst_x / 4); + vb[5] = i2f(h); + vb[6] = i2f(src_x / 4); + vb[7] = i2f(h); + + vb[8] = i2f((dst_x + cur_size) / 4); + vb[9] = i2f(h); + vb[10] = i2f((src_x + cur_size) / 4); + vb[11] = i2f(h); + + /* src 10 */ + set_tex_resource(rdev, FMT_8_8_8_8, + (src_x + cur_size) / 4, + h, (src_x + cur_size) / 4, + src_gpu_addr); + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + + /* dst 17 */ + set_render_target(rdev, COLOR_8_8_8_8, + (dst_x + cur_size) / 4, h, + dst_gpu_addr); + + /* scissors 12 */ + set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); + + /* Vertex buffer setup 15 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); + + /* draw 10 */ + draw_auto(rdev); + + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + cur_size * h, dst_gpu_addr); + + /* 74 ring dwords per loop */ + vb += 12; + rdev->r600_blit.vb_used += 12 * 4; + + src_gpu_addr += cur_size * h; + dst_gpu_addr += cur_size * h; + size_bytes -= cur_size * h; + } + } +} + diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c new file mode 100644 index 000000000000..5d5045027b46 --- /dev/null +++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c @@ -0,0 +1,359 @@ +/* + * Copyright 2010 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> + */ + +#include <linux/types.h> +#include <linux/kernel.h> + +/* + * evergreen cards need to use the 3D engine to blit data which requires + * quite a bit of hw state setup. Rather than pull the whole 3D driver + * (which normally generates the 3D state) into the DRM, we opt to use + * statically generated state tables. The regsiter state and shaders + * were hand generated to support blitting functionality. See the 3D + * driver or documentation for descriptions of the registers and + * shader instructions. + */ + +const u32 evergreen_default_state[] = +{ + 0xc0012800, /* CONTEXT_CONTROL */ + 0x80000000, + 0x80000000, + + 0xc0016900, + 0x0000023b, + 0x00000000, /* SQ_LDS_ALLOC_PS */ + + 0xc0066900, + 0x00000240, + 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0046900, + 0x00000247, + 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026f00, + 0x00000000, + 0x00000000, /* SQ_VTX_BASE_VTX_LOC */ + 0x00000000, + + 0xc0026900, + 0x00000010, + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + + 0xc0016900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ + + 0xc0026900, + 0x0000000a, + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0016900, + 0x000000d4, + 0x00000000, /* SX_MISC */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0106900, + 0x00000300, + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_0 */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_7 */ + 0xffffffff, /* PA_SC_AA_MASK */ + + 0xc00d6900, + 0x00000202, + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* SQ_DYN_GPR_RESOURCE_LIMIT_1 */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0016900, + 0x00000229, + 0x00000000, /* SQ_PGM_START_FS */ + + 0xc0016900, + 0x0000022a, + 0x00000000, /* SQ_PGM_RESOURCES_FS */ + + 0xc0096900, + 0x00000100, + 0x00ffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* SX_ALPHA_TEST_CONTROL */ + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, /* */ + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, /* */ + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* VGT_GS_MODE */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, /* */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc0016900, + 0x000001b1, + 0x00000000, /* SPI_VS_OUT_CONFIG */ + + 0xc0016900, + 0x00000187, + 0x00000000, /* SPI_VS_OUT_ID_0 */ + + 0xc0016900, + 0x00000191, + 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ + + 0xc00b6900, + 0x000001b3, + 0x20000001, /* SPI_PS_IN_CONTROL_0 */ + 0x00000000, /* SPI_PS_IN_CONTROL_1 */ + 0x00000000, /* SPI_INTERP_CONTROL_0 */ + 0x00000000, /* SPI_INPUT_Z */ + 0x00000000, /* SPI_FOG_CNTL */ + 0x00100000, /* SPI_BARYC_CNTL */ + 0x00000000, /* SPI_PS_IN_CONTROL_2 */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0036e00, /* SET_SAMPLER */ + 0x00000000, + 0x00000012, + 0x00000000, + 0x00000000, +}; + +const u32 evergreen_vs[] = +{ + 0x00000004, + 0x80800400, + 0x0000a03c, + 0x95000688, + 0x00004000, + 0x15200688, + 0x00000000, + 0x00000000, + 0x3c000000, + 0x67961001, + 0x00080000, + 0x00000000, + 0x1c000000, + 0x67961000, + 0x00000008, + 0x00000000, +}; + +const u32 evergreen_ps[] = +{ + 0x00000003, + 0xa00c0000, + 0x00000008, + 0x80400000, + 0x00000000, + 0x95200688, + 0x00380400, + 0x00146b10, + 0x00380000, + 0x20146b10, + 0x00380400, + 0x40146b00, + 0x80380000, + 0x60146b00, + 0x00000000, + 0x00000000, + 0x00000010, + 0x000d1000, + 0xb0800000, + 0x00000000, +}; + +const u32 evergreen_ps_size = ARRAY_SIZE(evergreen_ps); +const u32 evergreen_vs_size = ARRAY_SIZE(evergreen_vs); +const u32 evergreen_default_size = ARRAY_SIZE(evergreen_default_state); diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.h b/drivers/gpu/drm/radeon/evergreen_blit_shaders.h new file mode 100644 index 000000000000..bb8d6c751595 --- /dev/null +++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.h @@ -0,0 +1,35 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef EVERGREEN_BLIT_SHADERS_H +#define EVERGREEN_BLIT_SHADERS_H + +extern const u32 evergreen_ps[]; +extern const u32 evergreen_vs[]; +extern const u32 evergreen_default_state[]; + +extern const u32 evergreen_ps_size, evergreen_vs_size; +extern const u32 evergreen_default_size; + +#endif diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 9b7532dd30f7..319aa9752d40 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -802,6 +802,11 @@ #define SQ_ALU_CONST_CACHE_LS_14 0x28f78 #define SQ_ALU_CONST_CACHE_LS_15 0x28f7c +#define PA_SC_SCREEN_SCISSOR_TL 0x28030 +#define PA_SC_GENERIC_SCISSOR_TL 0x28240 +#define PA_SC_WINDOW_SCISSOR_TL 0x28204 +#define VGT_PRIMITIVE_TYPE 0x8958 + #define DB_DEPTH_CONTROL 0x28800 #define DB_DEPTH_VIEW 0x28008 #define DB_HTILE_DATA_BASE 0x28014 diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 90394df63009..2a4747d9747c 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -565,7 +565,7 @@ void r600_blit_fini(struct radeon_device *rdev) radeon_bo_unref(&rdev->r600_blit.shader_obj); } -int r600_vb_ib_get(struct radeon_device *rdev) +static int r600_vb_ib_get(struct radeon_device *rdev) { int r; r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib); @@ -579,7 +579,7 @@ int r600_vb_ib_get(struct radeon_device *rdev) return 0; } -void r600_vb_ib_put(struct radeon_device *rdev) +static void r600_vb_ib_put(struct radeon_device *rdev) { radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence); radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); @@ -683,17 +683,6 @@ void r600_kms_blit_copy(struct radeon_device *rdev, if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { WARN_ON(1); - -#if 0 - r600_vb_ib_put(rdev); - - r600_nomm_put_vb(dev); - r600_nomm_get_vb(dev); - if (!dev_priv->blit_vb) - return; - set_shaders(dev); - vb = r600_nomm_get_vb_ptr(dev); -#endif } vb[0] = i2f(dst_x); @@ -778,17 +767,6 @@ void r600_kms_blit_copy(struct radeon_device *rdev, if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { WARN_ON(1); } -#if 0 - if ((rdev->blit_vb->used + 48) > rdev->blit_vb->total) { - r600_nomm_put_vb(dev); - r600_nomm_get_vb(dev); - if (!rdev->blit_vb) - return; - - set_shaders(dev); - vb = r600_nomm_get_vb_ptr(dev); - } -#endif vb[0] = i2f(dst_x / 4); vb[1] = 0; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8e5cb2c4fa7e..2edd52ece226 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1130,6 +1130,12 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, int size_bytes); +/* evergreen blit */ +int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); +void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); +void evergreen_kms_blit_copy(struct radeon_device *rdev, + u64 src_gpu_addr, u64 dst_gpu_addr, + int size_bytes); static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) { @@ -1471,6 +1477,8 @@ extern void r700_cp_stop(struct radeon_device *rdev); extern void r700_cp_fini(struct radeon_device *rdev); extern void evergreen_disable_interrupt_state(struct radeon_device *rdev); extern int evergreen_irq_set(struct radeon_device *rdev); +extern int evergreen_blit_init(struct radeon_device *rdev); +extern void evergreen_blit_fini(struct radeon_device *rdev); /* radeon_acpi.c */ #if defined(CONFIG_ACPI) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 25e1dd197791..64fb89ecbf74 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -726,9 +726,9 @@ static struct radeon_asic evergreen_asic = { .get_vblank_counter = &evergreen_get_vblank_counter, .fence_ring_emit = &r600_fence_ring_emit, .cs_parse = &evergreen_cs_parse, - .copy_blit = NULL, - .copy_dma = NULL, - .copy = NULL, + .copy_blit = &evergreen_copy_blit, + .copy_dma = &evergreen_copy_blit, + .copy = &evergreen_copy_blit, .get_engine_clock = &radeon_atom_get_engine_clock, .set_engine_clock = &radeon_atom_set_engine_clock, .get_memory_clock = &radeon_atom_get_memory_clock, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 6d3b055c02fd..740988244143 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -254,11 +254,6 @@ void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int r600_cs_parse(struct radeon_cs_parser *p); void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); -int r600_copy_dma(struct radeon_device *rdev, - uint64_t src_offset, - uint64_t dst_offset, - unsigned num_pages, - struct radeon_fence *fence); int r600_irq_process(struct radeon_device *rdev); int r600_irq_set(struct radeon_device *rdev); bool r600_gpu_is_lockup(struct radeon_device *rdev); @@ -304,6 +299,9 @@ int evergreen_resume(struct radeon_device *rdev); bool evergreen_gpu_is_lockup(struct radeon_device *rdev); int evergreen_asic_reset(struct radeon_device *rdev); void evergreen_bandwidth_update(struct radeon_device *rdev); +int evergreen_copy_blit(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_pages, struct radeon_fence *fence); void evergreen_hpd_init(struct radeon_device *rdev); void evergreen_hpd_fini(struct radeon_device *rdev); bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); |