summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2013-01-29 10:48:30 +0100
committerJiri Kosina <jkosina@suse.cz>2013-01-29 10:48:30 +0100
commit617677295b53a40d0e54aac4cbbc216ffbc755dd (patch)
tree51b9e87213243ed5efff252c8e8d8fec4eebc588 /drivers/gpu/drm/radeon
parent5c8d1b68e01a144813e38795fe6dbe7ebb506131 (diff)
parent6abb7c25775b7fb2225ad0508236d63ca710e65f (diff)
downloadlinux-617677295b53a40d0e54aac4cbbc216ffbc755dd.tar.gz
linux-617677295b53a40d0e54aac4cbbc216ffbc755dd.tar.bz2
linux-617677295b53a40d0e54aac4cbbc216ffbc755dd.zip
Merge branch 'master' into for-next
Conflicts: drivers/devfreq/exynos4_bus.c Sync with Linus' tree to be able to apply patches that are against newer code (mvneta).
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c46
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c149
-rw-r--r--drivers/gpu/drm/radeon/atombios_encoders.c4
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c312
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c774
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h149
-rw-r--r--drivers/gpu/drm/radeon/ni.c469
-rw-r--r--drivers/gpu/drm/radeon/nid.h87
-rw-r--r--drivers/gpu/drm/radeon/r100.c23
-rw-r--r--drivers/gpu/drm/radeon/r600.c571
-rw-r--r--drivers/gpu/drm/radeon/r600_cp.c7
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c405
-rw-r--r--drivers/gpu/drm/radeon/r600_reg.h9
-rw-r--r--drivers/gpu/drm/radeon/r600d.h86
-rw-r--r--drivers/gpu/drm/radeon/radeon.h47
-rw-r--r--drivers/gpu/drm/radeon/radeon_agp.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c198
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h38
-rw-r--r--drivers/gpu/drm/radeon/radeon_atpx_handler.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c57
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c100
-rw-r--r--drivers/gpu/drm/radeon/radeon_cp.c14
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c22
-rw-r--r--drivers/gpu/drm/radeon/radeon_cursor.c17
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c46
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c17
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c24
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c51
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_i2c.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c16
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c15
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_encoders.c183
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h7
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c34
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_pm.c15
-rw-r--r--drivers/gpu/drm/radeon/radeon_prime.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c26
-rw-r--r--drivers/gpu/drm/radeon/radeon_semaphore.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_test.c37
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c40
-rw-r--r--drivers/gpu/drm/radeon/reg_srcs/rv5152
-rw-r--r--drivers/gpu/drm/radeon/rv515.c122
-rw-r--r--drivers/gpu/drm/radeon/rv770.c105
-rw-r--r--drivers/gpu/drm/radeon/rv770d.h71
-rw-r--r--drivers/gpu/drm/radeon/si.c440
-rw-r--r--drivers/gpu/drm/radeon/sid.h138
49 files changed, 4350 insertions, 651 deletions
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 2e566e123e9e..9175615bbd8a 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -561,6 +561,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
/* use frac fb div on APUs */
if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))
radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
+ if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
+ radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
} else {
radeon_crtc->pll_flags |= RADEON_PLL_LEGACY;
@@ -1697,34 +1699,22 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
DRM_ERROR("unable to allocate a PPLL\n");
return ATOM_PPLL_INVALID;
} else {
- if (ASIC_IS_AVIVO(rdev)) {
- /* in DP mode, the DP ref clock can come from either PPLL
- * depending on the asic:
- * DCE3: PPLL1 or PPLL2
- */
- if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(radeon_crtc->encoder))) {
- /* use the same PPLL for all DP monitors */
- pll = radeon_get_shared_dp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- } else {
- /* use the same PPLL for all monitors with the same clock */
- pll = radeon_get_shared_nondp_ppll(crtc);
- if (pll != ATOM_PPLL_INVALID)
- return pll;
- }
- /* all other cases */
- pll_in_use = radeon_get_pll_use_mask(crtc);
- if (!(pll_in_use & (1 << ATOM_PPLL1)))
- return ATOM_PPLL1;
- if (!(pll_in_use & (1 << ATOM_PPLL2)))
- return ATOM_PPLL2;
- DRM_ERROR("unable to allocate a PPLL\n");
- return ATOM_PPLL_INVALID;
- } else {
- /* on pre-R5xx asics, the crtc to pll mapping is hardcoded */
- return radeon_crtc->crtc_id;
- }
+ /* on pre-R5xx asics, the crtc to pll mapping is hardcoded */
+ /* some atombios (observed in some DCE2/DCE3) code have a bug,
+ * the matching btw pll and crtc is done through
+ * PCLK_CRTC[1|2]_CNTL (0x480/0x484) but atombios code use the
+ * pll (1 or 2) to select which register to write. ie if using
+ * pll1 it will use PCLK_CRTC1_CNTL (0x480) and if using pll2
+ * it will use PCLK_CRTC2_CNTL (0x484), it then use crtc id to
+ * choose which value to write. Which is reverse order from
+ * register logic. So only case that works is when pllid is
+ * same as crtcid or when both pll and crtc are enabled and
+ * both use same clock.
+ *
+ * So just return crtc id as if crtc and pll were hard linked
+ * together even if they aren't
+ */
+ return radeon_crtc->crtc_id;
}
}
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index d5699fe4f1e8..064023bed480 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -34,8 +34,7 @@
/* move these to drm_dp_helper.c/h */
#define DP_LINK_CONFIGURATION_SIZE 9
-#define DP_LINK_STATUS_SIZE 6
-#define DP_DPCD_SIZE 8
+#define DP_DPCD_SIZE DP_RECEIVER_CAP_SIZE
static char *voltage_names[] = {
"0.4V", "0.6V", "0.8V", "1.2V"
@@ -290,78 +289,6 @@ int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
/***** general DP utility functions *****/
-static u8 dp_link_status(u8 link_status[DP_LINK_STATUS_SIZE], int r)
-{
- return link_status[r - DP_LANE0_1_STATUS];
-}
-
-static u8 dp_get_lane_status(u8 link_status[DP_LINK_STATUS_SIZE],
- int lane)
-{
- int i = DP_LANE0_1_STATUS + (lane >> 1);
- int s = (lane & 1) * 4;
- u8 l = dp_link_status(link_status, i);
- return (l >> s) & 0xf;
-}
-
-static bool dp_clock_recovery_ok(u8 link_status[DP_LINK_STATUS_SIZE],
- int lane_count)
-{
- int lane;
- u8 lane_status;
-
- for (lane = 0; lane < lane_count; lane++) {
- lane_status = dp_get_lane_status(link_status, lane);
- if ((lane_status & DP_LANE_CR_DONE) == 0)
- return false;
- }
- return true;
-}
-
-static bool dp_channel_eq_ok(u8 link_status[DP_LINK_STATUS_SIZE],
- int lane_count)
-{
- u8 lane_align;
- u8 lane_status;
- int lane;
-
- lane_align = dp_link_status(link_status,
- DP_LANE_ALIGN_STATUS_UPDATED);
- if ((lane_align & DP_INTERLANE_ALIGN_DONE) == 0)
- return false;
- for (lane = 0; lane < lane_count; lane++) {
- lane_status = dp_get_lane_status(link_status, lane);
- if ((lane_status & DP_CHANNEL_EQ_BITS) != DP_CHANNEL_EQ_BITS)
- return false;
- }
- return true;
-}
-
-static u8 dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE],
- int lane)
-
-{
- int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
- int s = ((lane & 1) ?
- DP_ADJUST_VOLTAGE_SWING_LANE1_SHIFT :
- DP_ADJUST_VOLTAGE_SWING_LANE0_SHIFT);
- u8 l = dp_link_status(link_status, i);
-
- return ((l >> s) & 0x3) << DP_TRAIN_VOLTAGE_SWING_SHIFT;
-}
-
-static u8 dp_get_adjust_request_pre_emphasis(u8 link_status[DP_LINK_STATUS_SIZE],
- int lane)
-{
- int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
- int s = ((lane & 1) ?
- DP_ADJUST_PRE_EMPHASIS_LANE1_SHIFT :
- DP_ADJUST_PRE_EMPHASIS_LANE0_SHIFT);
- u8 l = dp_link_status(link_status, i);
-
- return ((l >> s) & 0x3) << DP_TRAIN_PRE_EMPHASIS_SHIFT;
-}
-
#define DP_VOLTAGE_MAX DP_TRAIN_VOLTAGE_SWING_1200
#define DP_PRE_EMPHASIS_MAX DP_TRAIN_PRE_EMPHASIS_9_5
@@ -374,8 +301,8 @@ static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
int lane;
for (lane = 0; lane < lane_count; lane++) {
- u8 this_v = dp_get_adjust_request_voltage(link_status, lane);
- u8 this_p = dp_get_adjust_request_pre_emphasis(link_status, lane);
+ u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
+ u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
DRM_DEBUG_KMS("requested signal parameters: lane %d voltage %s pre_emph %s\n",
lane,
@@ -420,37 +347,6 @@ static int dp_get_max_dp_pix_clock(int link_rate,
return (link_rate * lane_num * 8) / bpp;
}
-static int dp_get_max_link_rate(u8 dpcd[DP_DPCD_SIZE])
-{
- switch (dpcd[DP_MAX_LINK_RATE]) {
- case DP_LINK_BW_1_62:
- default:
- return 162000;
- case DP_LINK_BW_2_7:
- return 270000;
- case DP_LINK_BW_5_4:
- return 540000;
- }
-}
-
-static u8 dp_get_max_lane_number(u8 dpcd[DP_DPCD_SIZE])
-{
- return dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK;
-}
-
-static u8 dp_get_dp_link_rate_coded(int link_rate)
-{
- switch (link_rate) {
- case 162000:
- default:
- return DP_LINK_BW_1_62;
- case 270000:
- return DP_LINK_BW_2_7;
- case 540000:
- return DP_LINK_BW_5_4;
- }
-}
-
/***** radeon specific DP functions *****/
/* First get the min lane# when low rate is used according to pixel clock
@@ -462,8 +358,8 @@ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector,
int pix_clock)
{
int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector));
- int max_link_rate = dp_get_max_link_rate(dpcd);
- int max_lane_num = dp_get_max_lane_number(dpcd);
+ int max_link_rate = drm_dp_max_link_rate(dpcd);
+ int max_lane_num = drm_dp_max_lane_count(dpcd);
int lane_num;
int max_dp_pix_clock;
@@ -500,7 +396,7 @@ static int radeon_dp_get_dp_link_clock(struct drm_connector *connector,
return 540000;
}
- return dp_get_max_link_rate(dpcd);
+ return drm_dp_max_link_rate(dpcd);
}
static u8 radeon_dp_encoder_service(struct radeon_device *rdev,
@@ -551,14 +447,15 @@ static void radeon_dp_probe_oui(struct radeon_connector *radeon_connector)
bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector)
{
struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
- u8 msg[25];
+ u8 msg[DP_DPCD_SIZE];
int ret, i;
- ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg, 8, 0);
+ ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg,
+ DP_DPCD_SIZE, 0);
if (ret > 0) {
- memcpy(dig_connector->dpcd, msg, 8);
+ memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE);
DRM_DEBUG_KMS("DPCD: ");
- for (i = 0; i < 8; i++)
+ for (i = 0; i < DP_DPCD_SIZE; i++)
DRM_DEBUG_KMS("%02x ", msg[i]);
DRM_DEBUG_KMS("\n");
@@ -664,7 +561,7 @@ bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector)
if (!radeon_dp_get_link_status(radeon_connector, link_status))
return false;
- if (dp_channel_eq_ok(link_status, dig->dp_lane_count))
+ if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
return false;
return true;
}
@@ -677,9 +574,8 @@ struct radeon_dp_link_train_info {
int enc_id;
int dp_clock;
int dp_lane_count;
- int rd_interval;
bool tp3_supported;
- u8 dpcd[8];
+ u8 dpcd[DP_RECEIVER_CAP_SIZE];
u8 train_set[4];
u8 link_status[DP_LINK_STATUS_SIZE];
u8 tries;
@@ -765,7 +661,7 @@ static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info)
radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LANE_COUNT_SET, tmp);
/* set the link rate on the sink */
- tmp = dp_get_dp_link_rate_coded(dp_info->dp_clock);
+ tmp = drm_dp_link_rate_to_bw_code(dp_info->dp_clock);
radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LINK_BW_SET, tmp);
/* start training on the source */
@@ -821,17 +717,14 @@ static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info)
dp_info->tries = 0;
voltage = 0xff;
while (1) {
- if (dp_info->rd_interval == 0)
- udelay(100);
- else
- mdelay(dp_info->rd_interval * 4);
+ drm_dp_link_train_clock_recovery_delay(dp_info->dpcd);
if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
DRM_ERROR("displayport link status failed\n");
break;
}
- if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+ if (drm_dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
clock_recovery = true;
break;
}
@@ -886,17 +779,14 @@ static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info)
dp_info->tries = 0;
channel_eq = false;
while (1) {
- if (dp_info->rd_interval == 0)
- udelay(400);
- else
- mdelay(dp_info->rd_interval * 4);
+ drm_dp_link_train_channel_eq_delay(dp_info->dpcd);
if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
DRM_ERROR("displayport link status failed\n");
break;
}
- if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
+ if (drm_dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
channel_eq = true;
break;
}
@@ -974,14 +864,13 @@ void radeon_dp_link_train(struct drm_encoder *encoder,
else
dp_info.enc_id |= ATOM_DP_CONFIG_LINK_A;
- dp_info.rd_interval = radeon_read_dpcd_reg(radeon_connector, DP_TRAINING_AUX_RD_INTERVAL);
tmp = radeon_read_dpcd_reg(radeon_connector, DP_MAX_LANE_COUNT);
if (ASIC_IS_DCE5(rdev) && (tmp & DP_TPS3_SUPPORTED))
dp_info.tp3_supported = true;
else
dp_info.tp3_supported = false;
- memcpy(dp_info.dpcd, dig_connector->dpcd, 8);
+ memcpy(dp_info.dpcd, dig_connector->dpcd, DP_RECEIVER_CAP_SIZE);
dp_info.rdev = rdev;
dp_info.encoder = encoder;
dp_info.connector = connector;
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index ba498f8e47a2..4552d4aff317 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -340,7 +340,7 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
((radeon_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
(radeon_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE))) {
struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
- radeon_dp_set_link_config(connector, mode);
+ radeon_dp_set_link_config(connector, adjusted_mode);
}
return true;
@@ -1625,7 +1625,7 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
/* some early dce3.2 boards have a bug in their transmitter control table */
- if ((rdev->family != CHIP_RV710) || (rdev->family != CHIP_RV730))
+ if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730))
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
}
if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 42509492717e..4d0e60adbc6d 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1330,6 +1330,8 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav
break;
udelay(1);
}
+ } else {
+ save->crtc_enabled[i] = false;
}
}
@@ -1372,7 +1374,7 @@ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *s
WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
for (i = 0; i < rdev->num_crtc; i++) {
- if (save->crtc_enabled) {
+ if (save->crtc_enabled[i]) {
if (ASIC_IS_DCE6(rdev)) {
tmp = RREG32(EVERGREEN_CRTC_BLANK_CONTROL + crtc_offsets[i]);
tmp |= EVERGREEN_CRTC_BLANK_DATA_EN;
@@ -1819,7 +1821,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
case CHIP_SUMO:
rdev->config.evergreen.num_ses = 1;
rdev->config.evergreen.max_pipes = 4;
- rdev->config.evergreen.max_tile_pipes = 2;
+ rdev->config.evergreen.max_tile_pipes = 4;
if (rdev->pdev->device == 0x9648)
rdev->config.evergreen.max_simds = 3;
else if ((rdev->pdev->device == 0x9647) ||
@@ -1842,7 +1844,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
rdev->config.evergreen.sc_prim_fifo_size = 0x40;
rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
- gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN;
+ gb_addr_config = SUMO_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_SUMO2:
rdev->config.evergreen.num_ses = 1;
@@ -1864,7 +1866,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
rdev->config.evergreen.sc_prim_fifo_size = 0x40;
rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
- gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN;
+ gb_addr_config = SUMO2_GB_ADDR_CONFIG_GOLDEN;
break;
case CHIP_BARTS:
rdev->config.evergreen.num_ses = 2;
@@ -1912,7 +1914,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
break;
case CHIP_CAICOS:
rdev->config.evergreen.num_ses = 1;
- rdev->config.evergreen.max_pipes = 4;
+ rdev->config.evergreen.max_pipes = 2;
rdev->config.evergreen.max_tile_pipes = 2;
rdev->config.evergreen.max_simds = 2;
rdev->config.evergreen.max_backends = 1 * rdev->config.evergreen.num_ses;
@@ -2032,6 +2034,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
WREG32(GB_ADDR_CONFIG, gb_addr_config);
WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG, gb_addr_config);
tmp = gb_addr_config & NUM_PIPES_MASK;
tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends,
@@ -2303,22 +2306,20 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin
return radeon_ring_test_lockup(rdev, ring);
}
-static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
+static void evergreen_gpu_soft_reset_gfx(struct radeon_device *rdev)
{
- struct evergreen_mc_save save;
u32 grbm_reset = 0;
if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
- return 0;
+ return;
- dev_info(rdev->dev, "GPU softreset \n");
- dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS = 0x%08X\n",
RREG32(GRBM_STATUS));
- dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE0 = 0x%08X\n",
RREG32(GRBM_STATUS_SE0));
- dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE1 = 0x%08X\n",
RREG32(GRBM_STATUS_SE1));
- dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " SRBM_STATUS = 0x%08X\n",
RREG32(SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -2328,10 +2329,7 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
- evergreen_mc_stop(rdev, &save);
- if (evergreen_mc_wait_for_idle(rdev)) {
- dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
- }
+
/* Disable CP parsing/prefetching */
WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
@@ -2355,15 +2353,14 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
udelay(50);
WREG32(GRBM_SOFT_RESET, 0);
(void)RREG32(GRBM_SOFT_RESET);
- /* Wait a little for things to settle down */
- udelay(50);
- dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
+
+ dev_info(rdev->dev, " GRBM_STATUS = 0x%08X\n",
RREG32(GRBM_STATUS));
- dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE0 = 0x%08X\n",
RREG32(GRBM_STATUS_SE0));
- dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE1 = 0x%08X\n",
RREG32(GRBM_STATUS_SE1));
- dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " SRBM_STATUS = 0x%08X\n",
RREG32(SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -2373,13 +2370,71 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
+}
+
+static void evergreen_gpu_soft_reset_dma(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ return;
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+ /* Disable DMA */
+ tmp = RREG32(DMA_RB_CNTL);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL, tmp);
+
+ /* Reset dma */
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+}
+
+static int evergreen_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+{
+ struct evergreen_mc_save save;
+
+ if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
+ reset_mask &= ~(RADEON_RESET_GFX | RADEON_RESET_COMPUTE);
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ reset_mask &= ~RADEON_RESET_DMA;
+
+ if (reset_mask == 0)
+ return 0;
+
+ dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+ evergreen_mc_stop(rdev, &save);
+ if (evergreen_mc_wait_for_idle(rdev)) {
+ dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ }
+
+ if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
+ evergreen_gpu_soft_reset_gfx(rdev);
+
+ if (reset_mask & RADEON_RESET_DMA)
+ evergreen_gpu_soft_reset_dma(rdev);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
evergreen_mc_resume(rdev, &save);
return 0;
}
int evergreen_asic_reset(struct radeon_device *rdev)
{
- return evergreen_gpu_soft_reset(rdev);
+ return evergreen_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
+ RADEON_RESET_COMPUTE |
+ RADEON_RESET_DMA));
}
/* Interrupts */
@@ -2401,8 +2456,12 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
cayman_cp_int_cntl_setup(rdev, 1, 0);
cayman_cp_int_cntl_setup(rdev, 2, 0);
+ tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
+ WREG32(CAYMAN_DMA1_CNTL, tmp);
} else
WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+ tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+ WREG32(DMA_CNTL, tmp);
WREG32(GRBM_INT_CNTL, 0);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -2455,6 +2514,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
u32 grbm_int_cntl = 0;
u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
+ u32 dma_cntl, dma_cntl1 = 0;
if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -2482,6 +2542,8 @@ int evergreen_irq_set(struct radeon_device *rdev)
afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
+ dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+
if (rdev->family >= CHIP_CAYMAN) {
/* enable CP interrupts on all rings */
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
@@ -2504,6 +2566,19 @@ int evergreen_irq_set(struct radeon_device *rdev)
}
}
+ if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+ DRM_DEBUG("r600_irq_set: sw int dma\n");
+ dma_cntl |= TRAP_ENABLE;
+ }
+
+ if (rdev->family >= CHIP_CAYMAN) {
+ dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
+ if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
+ DRM_DEBUG("r600_irq_set: sw int dma1\n");
+ dma_cntl1 |= TRAP_ENABLE;
+ }
+ }
+
if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("evergreen_irq_set: vblank 0\n");
@@ -2589,6 +2664,12 @@ int evergreen_irq_set(struct radeon_device *rdev)
cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2);
} else
WREG32(CP_INT_CNTL, cp_int_cntl);
+
+ WREG32(DMA_CNTL, dma_cntl);
+
+ if (rdev->family >= CHIP_CAYMAN)
+ WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
+
WREG32(GRBM_INT_CNTL, grbm_int_cntl);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3091,6 +3172,16 @@ restart_ih:
break;
}
break;
+ case 146:
+ case 147:
+ dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
+ dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+ dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+ /* reset addr and status */
+ WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
+ break;
case 176: /* CP_INT in ring buffer */
case 177: /* CP_INT in IB1 */
case 178: /* CP_INT in IB2 */
@@ -3114,9 +3205,19 @@ restart_ih:
} else
radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
break;
+ case 224: /* DMA trap event */
+ DRM_DEBUG("IH: DMA trap\n");
+ radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+ break;
case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n");
break;
+ case 244: /* DMA trap event */
+ if (rdev->family >= CHIP_CAYMAN) {
+ DRM_DEBUG("IH: DMA1 trap\n");
+ radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+ }
+ break;
default:
DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
break;
@@ -3142,6 +3243,143 @@ restart_ih:
return IRQ_HANDLED;
}
+/**
+ * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (evergreen-SI).
+ */
+void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence)
+{
+ struct radeon_ring *ring = &rdev->ring[fence->ring];
+ u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+ /* write the fence */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+ radeon_ring_write(ring, addr & 0xfffffffc);
+ radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+ radeon_ring_write(ring, fence->seq);
+ /* generate an interrupt */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+ /* flush HDP */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+ radeon_ring_write(ring, 1);
+}
+
+/**
+ * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (evergreen).
+ */
+void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
+ struct radeon_ib *ib)
+{
+ struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+ if (rdev->wb.enabled) {
+ u32 next_rptr = ring->wptr + 4;
+ while ((next_rptr & 7) != 5)
+ next_rptr++;
+ next_rptr += 3;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+ radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+ radeon_ring_write(ring, next_rptr);
+ }
+
+ /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+ * Pad as necessary with NOPs.
+ */
+ while ((ring->wptr & 7) != 5)
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+ radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+ radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * evergreen_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (evergreen-cayman).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int evergreen_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence)
+{
+ struct radeon_semaphore *sem = NULL;
+ int ring_index = rdev->asic->copy.dma_ring_index;
+ struct radeon_ring *ring = &rdev->ring[ring_index];
+ u32 size_in_dw, cur_size_in_dw;
+ int i, num_loops;
+ int r = 0;
+
+ r = radeon_semaphore_create(rdev, &sem);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ return r;
+ }
+
+ size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+ num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
+ r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ radeon_semaphore_free(rdev, &sem, NULL);
+ return r;
+ }
+
+ if (radeon_fence_need_sync(*fence, ring->idx)) {
+ radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+ ring->idx);
+ radeon_fence_note_sync(*fence, ring->idx);
+ } else {
+ radeon_semaphore_free(rdev, &sem, NULL);
+ }
+
+ for (i = 0; i < num_loops; i++) {
+ cur_size_in_dw = size_in_dw;
+ if (cur_size_in_dw > 0xFFFFF)
+ cur_size_in_dw = 0xFFFFF;
+ size_in_dw -= cur_size_in_dw;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+ radeon_ring_write(ring, dst_offset & 0xfffffffc);
+ radeon_ring_write(ring, src_offset & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+ radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+ src_offset += cur_size_in_dw * 4;
+ dst_offset += cur_size_in_dw * 4;
+ }
+
+ r = radeon_fence_emit(rdev, fence, ring->idx);
+ if (r) {
+ radeon_ring_unlock_undo(rdev, ring);
+ return r;
+ }
+
+ radeon_ring_unlock_commit(rdev, ring);
+ radeon_semaphore_free(rdev, &sem, *fence);
+
+ return r;
+}
+
static int evergreen_startup(struct radeon_device *rdev)
{
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -3205,6 +3443,12 @@ static int evergreen_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
@@ -3219,12 +3463,23 @@ static int evergreen_startup(struct radeon_device *rdev)
0, 0xfffff, RADEON_CP_PACKET2);
if (r)
return r;
+
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR, DMA_RB_WPTR,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ if (r)
+ return r;
+
r = evergreen_cp_load_microcode(rdev);
if (r)
return r;
r = evergreen_cp_resume(rdev);
if (r)
return r;
+ r = r600_dma_resume(rdev);
+ if (r)
+ return r;
r = radeon_ib_pool_init(rdev);
if (r) {
@@ -3271,11 +3526,9 @@ int evergreen_resume(struct radeon_device *rdev)
int evergreen_suspend(struct radeon_device *rdev)
{
- struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-
r600_audio_fini(rdev);
r700_cp_stop(rdev);
- ring->ready = false;
+ r600_dma_stop(rdev);
evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev);
evergreen_pcie_gart_disable(rdev);
@@ -3352,6 +3605,9 @@ int evergreen_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+ r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -3364,6 +3620,7 @@ int evergreen_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
r700_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -3391,6 +3648,7 @@ void evergreen_fini(struct radeon_device *rdev)
r600_audio_fini(rdev);
r600_blit_fini(rdev);
r700_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 30271b641913..7a445666e71f 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -34,6 +34,8 @@
#define MAX(a,b) (((a)>(b))?(a):(b))
#define MIN(a,b) (((a)<(b))?(a):(b))
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc);
static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
struct radeon_cs_reloc **cs_reloc);
@@ -264,7 +266,7 @@ static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
/* macro tile width & height */
palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
- mtileb = (palign / 8) * (halign / 8) * tileb;;
+ mtileb = (palign / 8) * (halign / 8) * tileb;
mtile_pr = surf->nbx / palign;
mtile_ps = (mtile_pr * surf->nby) / halign;
surf->layer_size = mtile_ps * mtileb * slice_pt;
@@ -507,20 +509,28 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
/* height is npipes htiles aligned == npipes * 8 pixel aligned */
nby = round_up(nby, track->npipes * 8);
} else {
+ /* always assume 8x8 htile */
+ /* align is htile align * 8, htile align vary according to
+ * number of pipe and tile width and nby
+ */
switch (track->npipes) {
case 8:
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 64 * 8);
break;
case 4:
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 64 * 8);
nby = round_up(nby, 32 * 8);
break;
case 2:
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 32 * 8);
break;
case 1:
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
nbx = round_up(nbx, 32 * 8);
nby = round_up(nby, 16 * 8);
break;
@@ -531,9 +541,10 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
}
}
/* compute number of htile */
- nbx = nbx / 8;
- nby = nby / 8;
- size = nbx * nby * 4;
+ nbx = nbx >> 3;
+ nby = nby >> 3;
+ /* size must be aligned on npipes * 2K boundary */
+ size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
size += track->htile_offset;
if (size > radeon_bo_size(track->htile_bo)) {
@@ -1790,6 +1801,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
case DB_HTILE_SURFACE:
/* 8x8 only */
track->htile_surface = radeon_get_ib_value(p, idx);
+ /* force 8x8 htile width and height */
+ ib[idx] |= 3;
track->db_dirty = true;
break;
case CB_IMMED0_BASE:
@@ -2232,6 +2245,107 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
ib[idx+2] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_CP_DMA:
+ {
+ u32 command, size, info;
+ u64 offset, tmp;
+ if (pkt->count != 4) {
+ DRM_ERROR("bad CP DMA\n");
+ return -EINVAL;
+ }
+ command = radeon_get_ib_value(p, idx+4);
+ size = command & 0x1fffff;
+ info = radeon_get_ib_value(p, idx+1);
+ if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
+ (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
+ ((((info & 0x00300000) >> 20) == 0) &&
+ (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
+ ((((info & 0x60000000) >> 29) == 0) &&
+ (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
+ /* non mem to mem copies requires dw aligned count */
+ if (size % 4) {
+ DRM_ERROR("CP DMA command requires dw count alignment\n");
+ return -EINVAL;
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ /* GDS is ok */
+ if (((info & 0x60000000) >> 29) != 1) {
+ DRM_ERROR("CP DMA SAS not supported\n");
+ return -EINVAL;
+ }
+ } else {
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ DRM_ERROR("CP DMA SAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ /* src address space is memory */
+ if (((info & 0x60000000) >> 29) == 0) {
+ r = evergreen_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad CP DMA SRC\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx) +
+ ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx] = offset;
+ ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
+ } else if (((info & 0x60000000) >> 29) != 2) {
+ DRM_ERROR("bad CP DMA SRC_SEL\n");
+ return -EINVAL;
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ /* GDS is ok */
+ if (((info & 0x00300000) >> 20) != 1) {
+ DRM_ERROR("CP DMA DAS not supported\n");
+ return -EINVAL;
+ }
+ } else {
+ /* dst address space is memory */
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ DRM_ERROR("CP DMA DAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ if (((info & 0x00300000) >> 20) == 0) {
+ r = evergreen_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad CP DMA DST\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx+2) +
+ ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx+2] = offset;
+ ib[idx+3] = upper_32_bits(offset) & 0xff;
+ } else {
+ DRM_ERROR("bad CP DMA DST_SEL\n");
+ return -EINVAL;
+ }
+ }
+ break;
+ }
case PACKET3_SURFACE_SYNC:
if (pkt->count != 3) {
DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2540,6 +2654,35 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
ib[idx+4] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_MEM_WRITE:
+ {
+ u64 offset;
+
+ if (pkt->count != 3) {
+ DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+ return -EINVAL;
+ }
+ r = evergreen_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+0);
+ offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+ if (offset & 0x7) {
+ DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
+ return -EINVAL;
+ }
+ if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+ offset + 8, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+0] = offset;
+ ib[idx+1] = upper_32_bits(offset) & 0xff;
+ break;
+ }
case PACKET3_COPY_DW:
if (pkt->count != 4) {
DRM_ERROR("bad COPY_DW (invalid count)\n");
@@ -2715,6 +2858,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
return 0;
}
+/*
+ * DMA
+ */
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
+#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
+
+/**
+ * evergreen_dma_cs_parse() - parse the DMA IB
+ * @p: parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (Evergreen-Cayman)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
+ u32 header, cmd, count, tiled, new_cmd, misc;
+ volatile u32 *ib = p->ib.ptr;
+ u32 idx, idx_value;
+ u64 src_offset, dst_offset, dst2_offset;
+ int r;
+
+ do {
+ if (p->idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ p->idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ idx = p->idx;
+ header = radeon_get_ib_value(p, idx);
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+ new_cmd = GET_DMA_NEW(header);
+ misc = GET_DMA_MISC(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += count + 7;
+ } else {
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += count + 3;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_COPY:
+ r = r600_dma_cs_next_reloc(p, &src_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ idx_value = radeon_get_ib_value(p, idx + 2);
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2T, frame to fields */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ case 1:
+ /* L2T, T2L partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ p->idx += 12;
+ break;
+ case 3:
+ /* L2T, broadcast */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ case 4:
+ /* L2T, T2L */
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+7];
+ dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+7];
+ src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 9;
+ break;
+ case 5:
+ /* T2T partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2T, T2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+ ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += 13;
+ break;
+ case 7:
+ /* L2T, broadcast */
+ if (idx_value & (1 << 31)) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ dst2_offset = ib[idx+2];
+ dst2_offset <<= 8;
+ src_offset = ib[idx+8];
+ src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
+ ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 10;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ switch (misc) {
+ case 0:
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+7];
+ dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+7];
+ src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
+ ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ p->idx += 9;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2L, byte */
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
+ src_offset + count, radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
+ dst_offset + count, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ break;
+ case 1:
+ /* L2L, partial */
+ if (p->family < CHIP_CAYMAN) {
+ DRM_ERROR("L2L Partial is cayman only !\n");
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
+ ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+
+ p->idx += 9;
+ break;
+ case 4:
+ /* L2L, dw, broadcast */
+ r = r600_dma_cs_next_reloc(p, &dst2_reloc);
+ if (r) {
+ DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst2_offset = ib[idx+2];
+ dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
+ src_offset = ib[idx+3];
+ src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
+ dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 7;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ /* L2L, dw */
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ }
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+ dst_offset, radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+ p->idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ p->idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
+}
+
/* vm parser */
static bool evergreen_vm_reg_valid(u32 reg)
{
@@ -2724,7 +3316,11 @@ static bool evergreen_vm_reg_valid(u32 reg)
/* check config regs */
switch (reg) {
+ case WAIT_UNTIL:
case GRBM_GFX_INDEX:
+ case CP_STRMOUT_CNTL:
+ case CP_COHER_CNTL:
+ case CP_COHER_SIZE:
case VGT_VTX_VECT_EJECT_REG:
case VGT_CACHE_INVALIDATION:
case VGT_GS_VERTEX_REUSE:
@@ -2840,6 +3436,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
u32 idx = pkt->idx + 1;
u32 idx_value = ib[idx];
u32 start_reg, end_reg, reg, i;
+ u32 command, info;
switch (pkt->opcode) {
case PACKET3_NOP:
@@ -2914,6 +3511,64 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
return -EINVAL;
}
break;
+ case PACKET3_CP_DMA:
+ command = ib[idx + 4];
+ info = ib[idx + 1];
+ if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
+ (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
+ ((((info & 0x00300000) >> 20) == 0) &&
+ (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
+ ((((info & 0x60000000) >> 29) == 0) &&
+ (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
+ /* non mem to mem copies requires dw aligned count */
+ if ((command & 0x1fffff) % 4) {
+ DRM_ERROR("CP DMA command requires dw count alignment\n");
+ return -EINVAL;
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ if (((info & 0x60000000) >> 29) == 0) {
+ start_reg = idx_value << 2;
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ reg = start_reg;
+ if (!evergreen_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad SRC register\n");
+ return -EINVAL;
+ }
+ } else {
+ for (i = 0; i < (command & 0x1fffff); i++) {
+ reg = start_reg + (4 * i);
+ if (!evergreen_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad SRC register\n");
+ return -EINVAL;
+ }
+ }
+ }
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ if (((info & 0x00300000) >> 20) == 0) {
+ start_reg = ib[idx + 2];
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ reg = start_reg;
+ if (!evergreen_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad DST register\n");
+ return -EINVAL;
+ }
+ } else {
+ for (i = 0; i < (command & 0x1fffff); i++) {
+ reg = start_reg + (4 * i);
+ if (!evergreen_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad DST register\n");
+ return -EINVAL;
+ }
+ }
+ }
+ }
+ }
+ break;
default:
return -EINVAL;
}
@@ -2955,3 +3610,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
return ret;
}
+
+/**
+ * evergreen_dma_ib_parse() - parse the DMA IB for VM
+ * @rdev: radeon_device pointer
+ * @ib: radeon_ib pointer
+ *
+ * Parses the DMA IB from the VM CS ioctl
+ * checks for errors. (Cayman-SI)
+ * Returns 0 for success and an error on failure.
+ **/
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+ u32 idx = 0;
+ u32 header, cmd, count, tiled, new_cmd, misc;
+
+ do {
+ header = ib->ptr[idx];
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+ new_cmd = GET_DMA_NEW(header);
+ misc = GET_DMA_MISC(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ if (tiled)
+ idx += count + 7;
+ else
+ idx += count + 3;
+ break;
+ case DMA_PACKET_COPY:
+ if (tiled) {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2T, frame to fields */
+ idx += 10;
+ break;
+ case 1:
+ /* L2T, T2L partial */
+ idx += 12;
+ break;
+ case 3:
+ /* L2T, broadcast */
+ idx += 10;
+ break;
+ case 4:
+ /* L2T, T2L */
+ idx += 9;
+ break;
+ case 5:
+ /* T2T partial */
+ idx += 13;
+ break;
+ case 7:
+ /* L2T, broadcast */
+ idx += 10;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ switch (misc) {
+ case 0:
+ idx += 9;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ }
+ } else {
+ if (new_cmd) {
+ switch (misc) {
+ case 0:
+ /* L2L, byte */
+ idx += 5;
+ break;
+ case 1:
+ /* L2L, partial */
+ idx += 9;
+ break;
+ case 4:
+ /* L2L, dw, broadcast */
+ idx += 7;
+ break;
+ default:
+ DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
+ return -EINVAL;
+ }
+ } else {
+ /* L2L, dw */
+ idx += 5;
+ }
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (idx < ib->length_dw);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index df542f1a5dfb..0bfd0e9e469b 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -45,6 +45,8 @@
#define TURKS_GB_ADDR_CONFIG_GOLDEN 0x02010002
#define CEDAR_GB_ADDR_CONFIG_GOLDEN 0x02010001
#define CAICOS_GB_ADDR_CONFIG_GOLDEN 0x02010001
+#define SUMO_GB_ADDR_CONFIG_GOLDEN 0x02010002
+#define SUMO2_GB_ADDR_CONFIG_GOLDEN 0x02010002
/* Registers */
@@ -91,6 +93,10 @@
#define FB_READ_EN (1 << 0)
#define FB_WRITE_EN (1 << 1)
+#define CP_STRMOUT_CNTL 0x84FC
+
+#define CP_COHER_CNTL 0x85F0
+#define CP_COHER_SIZE 0x85F4
#define CP_COHER_BASE 0x85F8
#define CP_STALLED_STAT1 0x8674
#define CP_STALLED_STAT2 0x8678
@@ -351,6 +357,54 @@
# define AFMT_MPEG_INFO_UPDATE (1 << 10)
#define AFMT_GENERIC0_7 0x7138
+/* DCE4/5 ELD audio interface */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x5f84 /* LPCM */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x5f88 /* AC3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x5f8c /* MPEG1 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x5f90 /* MP3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x5f94 /* MPEG2 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x5f98 /* AAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x5f9c /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x5fa0 /* ATRAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x5fa4 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x5fa8 /* Dolby Digital */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x5fac /* DTS-HD */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x5fb0 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x5fb4 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x5fb8 /* WMA Pro */
+# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
+/* max channels minus one. 7 = 8 channels */
+# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
+# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
+# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+
+#define AZ_HOT_PLUG_CONTROL 0x5e78
+# define AZ_FORCE_CODEC_WAKE (1 << 0)
+# define PIN0_JACK_DETECTION_ENABLE (1 << 4)
+# define PIN1_JACK_DETECTION_ENABLE (1 << 5)
+# define PIN2_JACK_DETECTION_ENABLE (1 << 6)
+# define PIN3_JACK_DETECTION_ENABLE (1 << 7)
+# define PIN0_UNSOLICITED_RESPONSE_ENABLE (1 << 8)
+# define PIN1_UNSOLICITED_RESPONSE_ENABLE (1 << 9)
+# define PIN2_UNSOLICITED_RESPONSE_ENABLE (1 << 10)
+# define PIN3_UNSOLICITED_RESPONSE_ENABLE (1 << 11)
+# define CODEC_HOT_PLUG_ENABLE (1 << 12)
+# define PIN0_AUDIO_ENABLED (1 << 24)
+# define PIN1_AUDIO_ENABLED (1 << 25)
+# define PIN2_AUDIO_ENABLED (1 << 26)
+# define PIN3_AUDIO_ENABLED (1 << 27)
+# define AUDIO_ENABLED (1 << 31)
+
+
#define GC_USER_SHADER_PIPE_CONFIG 0x8954
#define INACTIVE_QD_PIPES(x) ((x) << 8)
#define INACTIVE_QD_PIPES_MASK 0x0000FF00
@@ -647,6 +701,7 @@
#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
#define VM_CONTEXT1_CNTL 0x1414
+#define VM_CONTEXT1_CNTL2 0x1434
#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C
#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C
#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C
@@ -668,6 +723,8 @@
#define CACHE_UPDATE_MODE(x) ((x) << 6)
#define VM_L2_STATUS 0x140C
#define L2_BUSY (1 << 0)
+#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC
+#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC
#define WAIT_UNTIL 0x8040
@@ -685,8 +742,9 @@
#define SOFT_RESET_ROM (1 << 14)
#define SOFT_RESET_SEM (1 << 15)
#define SOFT_RESET_VMC (1 << 17)
+#define SOFT_RESET_DMA (1 << 20)
#define SOFT_RESET_TST (1 << 21)
-#define SOFT_RESET_REGBB (1 << 22)
+#define SOFT_RESET_REGBB (1 << 22)
#define SOFT_RESET_ORB (1 << 23)
/* display watermarks */
@@ -850,6 +908,37 @@
# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
# define DC_HPDx_EN (1 << 28)
+/* ASYNC DMA */
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+#define DMA_CNTL 0xd02c
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_TILING_CONFIG 0xD0B8
+
+#define CAYMAN_DMA1_CNTL 0xd82c
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFFF) << 0))
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_SRBM_WRITE 0x9
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_NOP 0xf
+
/* PCIE link stuff */
#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */
#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
@@ -947,6 +1036,53 @@
#define PACKET3_WAIT_REG_MEM 0x3C
#define PACKET3_MEM_WRITE 0x3D
#define PACKET3_INDIRECT_BUFFER 0x32
+#define PACKET3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO or DATA [31:0]
+ * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
+ * SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ */
+# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ * 2 - DATA
+ */
+# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_CP_DMA_DIS_WC (1 << 21)
+# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
+# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
#define PACKET3_SURFACE_SYNC 0x43
# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
@@ -1892,4 +2028,15 @@
/* cayman packet3 addition */
#define CAYMAN_PACKET3_DEALLOC_STATE 0x14
+/* DMA regs common on r6xx/r7xx/evergreen/ni */
+#define DMA_RB_CNTL 0xd000
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_STATUS_REG 0xd034
+# define DMA_IDLE (1 << 0)
+
#endif
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index cda01f808f12..59acabb45c9b 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
WREG32(GB_ADDR_CONFIG, gb_addr_config);
WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
tmp = gb_addr_config & NUM_PIPES_MASK;
tmp = r6xx_remap_render_backend(rdev, tmp,
@@ -784,10 +786,20 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
/* enable context1-7 */
WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(rdev->dummy_page.addr >> 12));
- WREG32(VM_CONTEXT1_CNTL2, 0);
- WREG32(VM_CONTEXT1_CNTL, 0);
+ WREG32(VM_CONTEXT1_CNTL2, 4);
WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
- RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+ RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+ PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
+ VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
+ READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT |
+ WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
cayman_pcie_gart_tlb_flush(rdev);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -905,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0);
+ rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
}
}
@@ -1118,22 +1131,195 @@ static int cayman_cp_resume(struct radeon_device *rdev)
return 0;
}
-static int cayman_gpu_soft_reset(struct radeon_device *rdev)
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine. The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things. It also
+ * has support for tiling/detiling of buffers.
+ * Cayman and newer support two asynchronous DMA engines.
+ */
+/**
+ * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (cayman-SI).
+ */
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+ struct radeon_ib *ib)
+{
+ struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+ if (rdev->wb.enabled) {
+ u32 next_rptr = ring->wptr + 4;
+ while ((next_rptr & 7) != 5)
+ next_rptr++;
+ next_rptr += 3;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+ radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+ radeon_ring_write(ring, next_rptr);
+ }
+
+ /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+ * Pad as necessary with NOPs.
+ */
+ while ((ring->wptr & 7) != 5)
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+ radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+ radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * cayman_dma_stop - stop the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines (cayman-SI).
+ */
+void cayman_dma_stop(struct radeon_device *rdev)
+{
+ u32 rb_cntl;
+
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+ /* dma0 */
+ rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+ rb_cntl &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
+
+ /* dma1 */
+ rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+ rb_cntl &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
+
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+ rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+}
+
+/**
+ * cayman_dma_resume - setup and start the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffers and enable them. (cayman-SI).
+ * Returns 0 for success, error for failure.
+ */
+int cayman_dma_resume(struct radeon_device *rdev)
+{
+ struct radeon_ring *ring;
+ u32 rb_cntl, dma_cntl;
+ u32 rb_bufsz;
+ u32 reg_offset, wb_offset;
+ int i, r;
+
+ /* Reset dma */
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ reg_offset = DMA0_REGISTER_OFFSET;
+ wb_offset = R600_WB_DMA_RPTR_OFFSET;
+ } else {
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ reg_offset = DMA1_REGISTER_OFFSET;
+ wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+ }
+
+ WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+ WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = drm_order(ring->ring_size / 4);
+ rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+ rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+ WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(DMA_RB_RPTR + reg_offset, 0);
+ WREG32(DMA_RB_WPTR + reg_offset, 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
+ upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
+ WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
+ ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+
+ if (rdev->wb.enabled)
+ rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+ WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+
+ /* enable DMA IBs */
+ WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
+
+ dma_cntl = RREG32(DMA_CNTL + reg_offset);
+ dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+ WREG32(DMA_CNTL + reg_offset, dma_cntl);
+
+ ring->wptr = 0;
+ WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
+
+ ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
+
+ WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
+
+ ring->ready = true;
+
+ r = radeon_ring_test(rdev, ring->idx, ring);
+ if (r) {
+ ring->ready = false;
+ return r;
+ }
+ }
+
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+ return 0;
+}
+
+/**
+ * cayman_dma_fini - tear down the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines and free the rings (cayman-SI).
+ */
+void cayman_dma_fini(struct radeon_device *rdev)
+{
+ cayman_dma_stop(rdev);
+ radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+ radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+}
+
+static void cayman_gpu_soft_reset_gfx(struct radeon_device *rdev)
{
- struct evergreen_mc_save save;
u32 grbm_reset = 0;
if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
- return 0;
+ return;
- dev_info(rdev->dev, "GPU softreset \n");
- dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS = 0x%08X\n",
RREG32(GRBM_STATUS));
- dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE0 = 0x%08X\n",
RREG32(GRBM_STATUS_SE0));
- dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE1 = 0x%08X\n",
RREG32(GRBM_STATUS_SE1));
- dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " SRBM_STATUS = 0x%08X\n",
RREG32(SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -1143,19 +1329,7 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
- dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_ADDR 0x%08X\n",
- RREG32(0x14F8));
- dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
- RREG32(0x14D8));
- dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- RREG32(0x14FC));
- dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
- RREG32(0x14DC));
- evergreen_mc_stop(rdev, &save);
- if (evergreen_mc_wait_for_idle(rdev)) {
- dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
- }
/* Disable CP parsing/prefetching */
WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
@@ -1180,16 +1354,14 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
udelay(50);
WREG32(GRBM_SOFT_RESET, 0);
(void)RREG32(GRBM_SOFT_RESET);
- /* Wait a little for things to settle down */
- udelay(50);
- dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS = 0x%08X\n",
RREG32(GRBM_STATUS));
- dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE0 = 0x%08X\n",
RREG32(GRBM_STATUS_SE0));
- dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
+ dev_info(rdev->dev, " GRBM_STATUS_SE1 = 0x%08X\n",
RREG32(GRBM_STATUS_SE1));
- dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " SRBM_STATUS = 0x%08X\n",
RREG32(SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -1199,13 +1371,113 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
+
+}
+
+static void cayman_gpu_soft_reset_dma(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ return;
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+ /* dma0 */
+ tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
+
+ /* dma1 */
+ tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
+
+ /* Reset dma */
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+}
+
+static int cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+{
+ struct evergreen_mc_save save;
+
+ if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
+ reset_mask &= ~(RADEON_RESET_GFX | RADEON_RESET_COMPUTE);
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ reset_mask &= ~RADEON_RESET_DMA;
+
+ if (reset_mask == 0)
+ return 0;
+
+ dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+ dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(0x14F8));
+ dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(0x14D8));
+ dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(0x14FC));
+ dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(0x14DC));
+
+ evergreen_mc_stop(rdev, &save);
+ if (evergreen_mc_wait_for_idle(rdev)) {
+ dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ }
+
+ if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
+ cayman_gpu_soft_reset_gfx(rdev);
+
+ if (reset_mask & RADEON_RESET_DMA)
+ cayman_gpu_soft_reset_dma(rdev);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
evergreen_mc_resume(rdev, &save);
return 0;
}
int cayman_asic_reset(struct radeon_device *rdev)
{
- return cayman_gpu_soft_reset(rdev);
+ return cayman_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
+ RADEON_RESET_COMPUTE |
+ RADEON_RESET_DMA));
+}
+
+/**
+ * cayman_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up (cayman-SI).
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+ u32 dma_status_reg;
+
+ if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+ dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
+ else
+ dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
+ if (dma_status_reg & DMA_IDLE) {
+ radeon_ring_lockup_update(ring);
+ return false;
+ }
+ /* force ring activities */
+ radeon_ring_force_activity(rdev, ring);
+ return radeon_ring_test_lockup(rdev, ring);
}
static int cayman_startup(struct radeon_device *rdev)
@@ -1289,6 +1561,18 @@ static int cayman_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
+ r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
@@ -1303,6 +1587,23 @@ static int cayman_startup(struct radeon_device *rdev)
0, 0xfffff, RADEON_CP_PACKET2);
if (r)
return r;
+
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
+ DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ if (r)
+ return r;
+
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
+ DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
+ DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ if (r)
+ return r;
+
r = cayman_cp_load_microcode(rdev);
if (r)
return r;
@@ -1310,6 +1611,10 @@ static int cayman_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = cayman_dma_resume(rdev);
+ if (r)
+ return r;
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1354,7 +1659,7 @@ int cayman_suspend(struct radeon_device *rdev)
{
r600_audio_fini(rdev);
cayman_cp_enable(rdev, false);
- rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+ cayman_dma_stop(rdev);
evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev);
cayman_pcie_gart_disable(rdev);
@@ -1421,6 +1726,14 @@ int cayman_init(struct radeon_device *rdev)
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 1024 * 1024);
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 64 * 1024);
+
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -1433,6 +1746,7 @@ int cayman_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
cayman_cp_fini(rdev);
+ cayman_dma_fini(rdev);
r600_irq_fini(rdev);
if (rdev->flags & RADEON_IS_IGP)
si_rlc_fini(rdev);
@@ -1463,6 +1777,7 @@ void cayman_fini(struct radeon_device *rdev)
{
r600_blit_fini(rdev);
cayman_cp_fini(rdev);
+ cayman_dma_fini(rdev);
r600_irq_fini(rdev);
if (rdev->flags & RADEON_IS_IGP)
si_rlc_fini(rdev);
@@ -1538,30 +1853,57 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
{
struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
-
- while (count) {
- unsigned ndw = 1 + count * 2;
- if (ndw > 0x3FFF)
- ndw = 0x3FFF;
-
- radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
- radeon_ring_write(ring, pe);
- radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
- for (; ndw > 1; ndw -= 2, --count, pe += 8) {
- uint64_t value = 0;
- if (flags & RADEON_VM_PAGE_SYSTEM) {
- value = radeon_vm_map_gart(rdev, addr);
- value &= 0xFFFFFFFFFFFFF000ULL;
+ uint64_t value;
+ unsigned ndw;
+
+ if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
+ while (count) {
+ ndw = 1 + count * 2;
+ if (ndw > 0x3FFF)
+ ndw = 0x3FFF;
+
+ radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
+ radeon_ring_write(ring, pe);
+ radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+ for (; ndw > 1; ndw -= 2, --count, pe += 8) {
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ value = radeon_vm_map_gart(rdev, addr);
+ value &= 0xFFFFFFFFFFFFF000ULL;
+ } else if (flags & RADEON_VM_PAGE_VALID) {
+ value = addr;
+ } else {
+ value = 0;
+ }
addr += incr;
-
- } else if (flags & RADEON_VM_PAGE_VALID) {
- value = addr;
+ value |= r600_flags;
+ radeon_ring_write(ring, value);
+ radeon_ring_write(ring, upper_32_bits(value));
+ }
+ }
+ } else {
+ while (count) {
+ ndw = count * 2;
+ if (ndw > 0xFFFFE)
+ ndw = 0xFFFFE;
+
+ /* for non-physically contiguous pages (system) */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
+ radeon_ring_write(ring, pe);
+ radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+ for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ value = radeon_vm_map_gart(rdev, addr);
+ value &= 0xFFFFFFFFFFFFF000ULL;
+ } else if (flags & RADEON_VM_PAGE_VALID) {
+ value = addr;
+ } else {
+ value = 0;
+ }
addr += incr;
+ value |= r600_flags;
+ radeon_ring_write(ring, value);
+ radeon_ring_write(ring, upper_32_bits(value));
}
-
- value |= r600_flags;
- radeon_ring_write(ring, value);
- radeon_ring_write(ring, upper_32_bits(value));
}
}
}
@@ -1596,3 +1938,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
radeon_ring_write(ring, 0x0);
}
+
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+ struct radeon_ring *ring = &rdev->ring[ridx];
+
+ if (vm == NULL)
+ return;
+
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+ radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+ /* flush hdp cache */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+ radeon_ring_write(ring, 1);
+
+ /* bits 0-7 are the VM contexts0-7 */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+ radeon_ring_write(ring, 1 << vm->id);
+}
+
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index cbef6815907a..48e5022ee921 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -50,6 +50,24 @@
#define VMID(x) (((x) & 0x7) << 0)
#define SRBM_STATUS 0x0E50
+#define SRBM_SOFT_RESET 0x0E60
+#define SOFT_RESET_BIF (1 << 1)
+#define SOFT_RESET_CG (1 << 2)
+#define SOFT_RESET_DC (1 << 5)
+#define SOFT_RESET_DMA1 (1 << 6)
+#define SOFT_RESET_GRBM (1 << 8)
+#define SOFT_RESET_HDP (1 << 9)
+#define SOFT_RESET_IH (1 << 10)
+#define SOFT_RESET_MC (1 << 11)
+#define SOFT_RESET_RLC (1 << 13)
+#define SOFT_RESET_ROM (1 << 14)
+#define SOFT_RESET_SEM (1 << 15)
+#define SOFT_RESET_VMC (1 << 17)
+#define SOFT_RESET_DMA (1 << 20)
+#define SOFT_RESET_TST (1 << 21)
+#define SOFT_RESET_REGBB (1 << 22)
+#define SOFT_RESET_ORB (1 << 23)
+
#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
#define REQUEST_TYPE(x) (((x) & 0xf) << 0)
#define RESPONSE_TYPE_MASK 0x000000F0
@@ -80,7 +98,18 @@
#define VM_CONTEXT0_CNTL 0x1410
#define ENABLE_CONTEXT (1 << 0)
#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
+#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
+#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
+#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
+#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
+#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
+#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
+#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
+#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
+#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
#define VM_CONTEXT1_CNTL 0x1414
#define VM_CONTEXT0_CNTL2 0x1430
#define VM_CONTEXT1_CNTL2 0x1434
@@ -588,5 +617,61 @@
#define PACKET3_SET_APPEND_CNT 0x75
#define PACKET3_ME_WRITE 0x7A
-#endif
+/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
+#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
+#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
+
+#define DMA_RB_CNTL 0xd000
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_RB_BASE 0xd004
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI 0xd01c
+#define DMA_RB_RPTR_ADDR_LO 0xd020
+
+#define DMA_IB_CNTL 0xd024
+# define DMA_IB_ENABLE (1 << 0)
+# define DMA_IB_SWAP_ENABLE (1 << 4)
+# define CMD_VMID_FORCE (1 << 31)
+#define DMA_IB_RPTR 0xd028
+#define DMA_CNTL 0xd02c
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_STATUS_REG 0xd034
+# define DMA_IDLE (1 << 0)
+#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
+#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
+#define DMA_TILING_CONFIG 0xd0b8
+#define DMA_MODE 0xd0bc
+
+#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFFF) << 0))
+
+#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
+ (((vmid) & 0xF) << 20) | \
+ (((n) & 0xFFFFF) << 0))
+
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_SRBM_WRITE 0x9
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_NOP 0xf
+#endif
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 376884f1bcd2..8ff7cac222dc 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4135,23 +4135,36 @@ int r100_init(struct radeon_device *rdev)
return 0;
}
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
+ bool always_indirect)
{
- if (reg < rdev->rmmio_size)
+ if (reg < rdev->rmmio_size && !always_indirect)
return readl(((void __iomem *)rdev->rmmio) + reg);
else {
+ unsigned long flags;
+ uint32_t ret;
+
+ spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
- return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+
+ return ret;
}
}
-void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
+ bool always_indirect)
{
- if (reg < rdev->rmmio_size)
+ if (reg < rdev->rmmio_size && !always_indirect)
writel(v, ((void __iomem *)rdev->rmmio) + reg);
else {
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
}
}
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index cda280d157da..3cb9d6089373 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1258,9 +1258,8 @@ void r600_vram_scratch_fini(struct radeon_device *rdev)
* reset, it's up to the caller to determine if the GPU needs one. We
* might add an helper function to check that.
*/
-static int r600_gpu_soft_reset(struct radeon_device *rdev)
+static void r600_gpu_soft_reset_gfx(struct radeon_device *rdev)
{
- struct rv515_mc_save save;
u32 grbm_busy_mask = S_008010_VC_BUSY(1) | S_008010_VGT_BUSY_NO_DMA(1) |
S_008010_VGT_BUSY(1) | S_008010_TA03_BUSY(1) |
S_008010_TC_BUSY(1) | S_008010_SX_BUSY(1) |
@@ -1280,14 +1279,13 @@ static int r600_gpu_soft_reset(struct radeon_device *rdev)
u32 tmp;
if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
- return 0;
+ return;
- dev_info(rdev->dev, "GPU softreset \n");
- dev_info(rdev->dev, " R_008010_GRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " R_008010_GRBM_STATUS = 0x%08X\n",
RREG32(R_008010_GRBM_STATUS));
- dev_info(rdev->dev, " R_008014_GRBM_STATUS2=0x%08X\n",
+ dev_info(rdev->dev, " R_008014_GRBM_STATUS2 = 0x%08X\n",
RREG32(R_008014_GRBM_STATUS2));
- dev_info(rdev->dev, " R_000E50_SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " R_000E50_SRBM_STATUS = 0x%08X\n",
RREG32(R_000E50_SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -1297,12 +1295,10 @@ static int r600_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
- rv515_mc_stop(rdev, &save);
- if (r600_mc_wait_for_idle(rdev)) {
- dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
- }
+
/* Disable CP parsing/prefetching */
WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
+
/* Check if any of the rendering block is busy and reset it */
if ((RREG32(R_008010_GRBM_STATUS) & grbm_busy_mask) ||
(RREG32(R_008014_GRBM_STATUS2) & grbm2_busy_mask)) {
@@ -1332,13 +1328,12 @@ static int r600_gpu_soft_reset(struct radeon_device *rdev)
RREG32(R_008020_GRBM_SOFT_RESET);
mdelay(15);
WREG32(R_008020_GRBM_SOFT_RESET, 0);
- /* Wait a little for things to settle down */
- mdelay(1);
- dev_info(rdev->dev, " R_008010_GRBM_STATUS=0x%08X\n",
+
+ dev_info(rdev->dev, " R_008010_GRBM_STATUS = 0x%08X\n",
RREG32(R_008010_GRBM_STATUS));
- dev_info(rdev->dev, " R_008014_GRBM_STATUS2=0x%08X\n",
+ dev_info(rdev->dev, " R_008014_GRBM_STATUS2 = 0x%08X\n",
RREG32(R_008014_GRBM_STATUS2));
- dev_info(rdev->dev, " R_000E50_SRBM_STATUS=0x%08X\n",
+ dev_info(rdev->dev, " R_000E50_SRBM_STATUS = 0x%08X\n",
RREG32(R_000E50_SRBM_STATUS));
dev_info(rdev->dev, " R_008674_CP_STALLED_STAT1 = 0x%08X\n",
RREG32(CP_STALLED_STAT1));
@@ -1348,6 +1343,66 @@ static int r600_gpu_soft_reset(struct radeon_device *rdev)
RREG32(CP_BUSY_STAT));
dev_info(rdev->dev, " R_008680_CP_STAT = 0x%08X\n",
RREG32(CP_STAT));
+
+}
+
+static void r600_gpu_soft_reset_dma(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ return;
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+ /* Disable DMA */
+ tmp = RREG32(DMA_RB_CNTL);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL, tmp);
+
+ /* Reset dma */
+ if (rdev->family >= CHIP_RV770)
+ WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
+ else
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ dev_info(rdev->dev, " R_00D034_DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+}
+
+static int r600_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+{
+ struct rv515_mc_save save;
+
+ if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
+ reset_mask &= ~(RADEON_RESET_GFX | RADEON_RESET_COMPUTE);
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ reset_mask &= ~RADEON_RESET_DMA;
+
+ if (reset_mask == 0)
+ return 0;
+
+ dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+ rv515_mc_stop(rdev, &save);
+ if (r600_mc_wait_for_idle(rdev)) {
+ dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ }
+
+ if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
+ r600_gpu_soft_reset_gfx(rdev);
+
+ if (reset_mask & RADEON_RESET_DMA)
+ r600_gpu_soft_reset_dma(rdev);
+
+ /* Wait a little for things to settle down */
+ mdelay(1);
+
rv515_mc_resume(rdev, &save);
return 0;
}
@@ -1370,9 +1425,34 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
return radeon_ring_test_lockup(rdev, ring);
}
+/**
+ * r600_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up (r6xx-evergreen).
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+ u32 dma_status_reg;
+
+ dma_status_reg = RREG32(DMA_STATUS_REG);
+ if (dma_status_reg & DMA_IDLE) {
+ radeon_ring_lockup_update(ring);
+ return false;
+ }
+ /* force ring activities */
+ radeon_ring_force_activity(rdev, ring);
+ return radeon_ring_test_lockup(rdev, ring);
+}
+
int r600_asic_reset(struct radeon_device *rdev)
{
- return r600_gpu_soft_reset(rdev);
+ return r600_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
+ RADEON_RESET_COMPUTE |
+ RADEON_RESET_DMA));
}
u32 r6xx_remap_render_backend(struct radeon_device *rdev,
@@ -1424,13 +1504,7 @@ u32 r6xx_remap_render_backend(struct radeon_device *rdev,
int r600_count_pipe_bits(uint32_t val)
{
- int i, ret = 0;
-
- for (i = 0; i < 32; i++) {
- ret += val & 1;
- val >>= 1;
- }
- return ret;
+ return hweight32(val);
}
static void r600_gpu_init(struct radeon_device *rdev)
@@ -1594,6 +1668,7 @@ static void r600_gpu_init(struct radeon_device *rdev)
WREG32(GB_TILING_CONFIG, tiling_config);
WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
+ WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff);
tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
@@ -1871,6 +1946,7 @@ void r600_cp_stop(struct radeon_device *rdev)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
WREG32(SCRATCH_UMSK, 0);
+ rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
}
int r600_init_microcode(struct radeon_device *rdev)
@@ -2196,6 +2272,128 @@ void r600_cp_fini(struct radeon_device *rdev)
radeon_scratch_free(rdev, ring->rptr_save_reg);
}
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine. The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things. It also
+ * has support for tiling/detiling of buffers.
+ */
+/**
+ * r600_dma_stop - stop the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engine (r6xx-evergreen).
+ */
+void r600_dma_stop(struct radeon_device *rdev)
+{
+ u32 rb_cntl = RREG32(DMA_RB_CNTL);
+
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+ rb_cntl &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL, rb_cntl);
+
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+}
+
+/**
+ * r600_dma_resume - setup and start the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
+ * Returns 0 for success, error for failure.
+ */
+int r600_dma_resume(struct radeon_device *rdev)
+{
+ struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ u32 rb_cntl, dma_cntl;
+ u32 rb_bufsz;
+ int r;
+
+ /* Reset dma */
+ if (rdev->family >= CHIP_RV770)
+ WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
+ else
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
+ WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = drm_order(ring->ring_size / 4);
+ rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+ rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+ WREG32(DMA_RB_CNTL, rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(DMA_RB_RPTR, 0);
+ WREG32(DMA_RB_WPTR, 0);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(DMA_RB_RPTR_ADDR_HI,
+ upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
+ WREG32(DMA_RB_RPTR_ADDR_LO,
+ ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
+
+ if (rdev->wb.enabled)
+ rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+ WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
+
+ /* enable DMA IBs */
+ WREG32(DMA_IB_CNTL, DMA_IB_ENABLE);
+
+ dma_cntl = RREG32(DMA_CNTL);
+ dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+ WREG32(DMA_CNTL, dma_cntl);
+
+ if (rdev->family >= CHIP_RV770)
+ WREG32(DMA_MODE, 1);
+
+ ring->wptr = 0;
+ WREG32(DMA_RB_WPTR, ring->wptr << 2);
+
+ ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
+
+ WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
+
+ ring->ready = true;
+
+ r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
+ if (r) {
+ ring->ready = false;
+ return r;
+ }
+
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+ return 0;
+}
+
+/**
+ * r600_dma_fini - tear down the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engine and free the ring (r6xx-evergreen).
+ */
+void r600_dma_fini(struct radeon_device *rdev)
+{
+ r600_dma_stop(rdev);
+ radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+}
/*
* GPU scratch registers helpers function.
@@ -2252,6 +2450,64 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
return r;
}
+/**
+ * r600_dma_ring_test - simple async dma engine test
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (r6xx-SI).
+ * Returns 0 for success, error for failure.
+ */
+int r600_dma_ring_test(struct radeon_device *rdev,
+ struct radeon_ring *ring)
+{
+ unsigned i;
+ int r;
+ void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+ u32 tmp;
+
+ if (!ptr) {
+ DRM_ERROR("invalid vram scratch pointer\n");
+ return -EINVAL;
+ }
+
+ tmp = 0xCAFEDEAD;
+ writel(tmp, ptr);
+
+ r = radeon_ring_lock(rdev, ring, 4);
+ if (r) {
+ DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ return r;
+ }
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+ radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
+ radeon_ring_write(ring, 0xDEADBEEF);
+ radeon_ring_unlock_commit(rdev, ring);
+
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ tmp = readl(ptr);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+
+ if (i < rdev->usec_timeout) {
+ DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+ } else {
+ DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+ ring->idx, tmp);
+ r = -EINVAL;
+ }
+ return r;
+}
+
+/*
+ * CP fences/semaphores
+ */
+
void r600_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence)
{
@@ -2315,6 +2571,59 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
}
+/*
+ * DMA fences/semaphores
+ */
+
+/**
+ * r600_dma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (r6xx-r7xx).
+ */
+void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence)
+{
+ struct radeon_ring *ring = &rdev->ring[fence->ring];
+ u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+ /* write the fence */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+ radeon_ring_write(ring, addr & 0xfffffffc);
+ radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+ radeon_ring_write(ring, lower_32_bits(fence->seq));
+ /* generate an interrupt */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+}
+
+/**
+ * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @semaphore: radeon semaphore object
+ * @emit_wait: wait or signal semaphore
+ *
+ * Add a DMA semaphore packet to the ring wait on or signal
+ * other rings (r6xx-SI).
+ */
+void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+ struct radeon_ring *ring,
+ struct radeon_semaphore *semaphore,
+ bool emit_wait)
+{
+ u64 addr = semaphore->gpu_addr;
+ u32 s = emit_wait ? 0 : 1;
+
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
+ radeon_ring_write(ring, addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+}
+
int r600_copy_blit(struct radeon_device *rdev,
uint64_t src_offset,
uint64_t dst_offset,
@@ -2334,6 +2643,80 @@ int r600_copy_blit(struct radeon_device *rdev,
return 0;
}
+/**
+ * r600_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (r6xx).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int r600_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence)
+{
+ struct radeon_semaphore *sem = NULL;
+ int ring_index = rdev->asic->copy.dma_ring_index;
+ struct radeon_ring *ring = &rdev->ring[ring_index];
+ u32 size_in_dw, cur_size_in_dw;
+ int i, num_loops;
+ int r = 0;
+
+ r = radeon_semaphore_create(rdev, &sem);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ return r;
+ }
+
+ size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+ num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
+ r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ radeon_semaphore_free(rdev, &sem, NULL);
+ return r;
+ }
+
+ if (radeon_fence_need_sync(*fence, ring->idx)) {
+ radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+ ring->idx);
+ radeon_fence_note_sync(*fence, ring->idx);
+ } else {
+ radeon_semaphore_free(rdev, &sem, NULL);
+ }
+
+ for (i = 0; i < num_loops; i++) {
+ cur_size_in_dw = size_in_dw;
+ if (cur_size_in_dw > 0xFFFE)
+ cur_size_in_dw = 0xFFFE;
+ size_in_dw -= cur_size_in_dw;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+ radeon_ring_write(ring, dst_offset & 0xfffffffc);
+ radeon_ring_write(ring, src_offset & 0xfffffffc);
+ radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
+ (upper_32_bits(src_offset) & 0xff)));
+ src_offset += cur_size_in_dw * 4;
+ dst_offset += cur_size_in_dw * 4;
+ }
+
+ r = radeon_fence_emit(rdev, fence, ring->idx);
+ if (r) {
+ radeon_ring_unlock_undo(rdev, ring);
+ return r;
+ }
+
+ radeon_ring_unlock_commit(rdev, ring);
+ radeon_semaphore_free(rdev, &sem, *fence);
+
+ return r;
+}
+
int r600_set_surface_reg(struct radeon_device *rdev, int reg,
uint32_t tiling_flags, uint32_t pitch,
uint32_t offset, uint32_t obj_size)
@@ -2349,7 +2732,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
static int r600_startup(struct radeon_device *rdev)
{
- struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+ struct radeon_ring *ring;
int r;
/* enable pcie gen2 link */
@@ -2394,6 +2777,12 @@ static int r600_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
@@ -2403,12 +2792,20 @@ static int r600_startup(struct radeon_device *rdev)
}
r600_irq_set(rdev);
+ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
R600_CP_RB_RPTR, R600_CP_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
+ if (r)
+ return r;
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR, DMA_RB_WPTR,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
if (r)
return r;
+
r = r600_cp_load_microcode(rdev);
if (r)
return r;
@@ -2416,6 +2813,10 @@ static int r600_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = r600_dma_resume(rdev);
+ if (r)
+ return r;
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2471,7 +2872,7 @@ int r600_suspend(struct radeon_device *rdev)
{
r600_audio_fini(rdev);
r600_cp_stop(rdev);
- rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+ r600_dma_stop(rdev);
r600_irq_suspend(rdev);
radeon_wb_disable(rdev);
r600_pcie_gart_disable(rdev);
@@ -2544,6 +2945,9 @@ int r600_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+ r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -2556,6 +2960,7 @@ int r600_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
r600_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -2572,6 +2977,7 @@ void r600_fini(struct radeon_device *rdev)
r600_audio_fini(rdev);
r600_blit_fini(rdev);
r600_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -2674,6 +3080,104 @@ free_scratch:
return r;
}
+/**
+ * r600_dma_ib_test - test an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test a simple IB in the DMA ring (r6xx-SI).
+ * Returns 0 on success, error on failure.
+ */
+int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+ struct radeon_ib ib;
+ unsigned i;
+ int r;
+ void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+ u32 tmp = 0;
+
+ if (!ptr) {
+ DRM_ERROR("invalid vram scratch pointer\n");
+ return -EINVAL;
+ }
+
+ tmp = 0xCAFEDEAD;
+ writel(tmp, ptr);
+
+ r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+ if (r) {
+ DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
+ ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+ ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
+ ib.ptr[3] = 0xDEADBEEF;
+ ib.length_dw = 4;
+
+ r = radeon_ib_schedule(rdev, &ib, NULL);
+ if (r) {
+ radeon_ib_free(rdev, &ib);
+ DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+ return r;
+ }
+ r = radeon_fence_wait(ib.fence, false);
+ if (r) {
+ DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+ return r;
+ }
+ for (i = 0; i < rdev->usec_timeout; i++) {
+ tmp = readl(ptr);
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+ if (i < rdev->usec_timeout) {
+ DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+ } else {
+ DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+ r = -EINVAL;
+ }
+ radeon_ib_free(rdev, &ib);
+ return r;
+}
+
+/**
+ * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (r6xx-r7xx).
+ */
+void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+ struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+ if (rdev->wb.enabled) {
+ u32 next_rptr = ring->wptr + 4;
+ while ((next_rptr & 7) != 5)
+ next_rptr++;
+ next_rptr += 3;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+ radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+ radeon_ring_write(ring, next_rptr);
+ }
+
+ /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+ * Pad as necessary with NOPs.
+ */
+ while ((ring->wptr & 7) != 5)
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+ radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+ radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
/*
* Interrupts
*
@@ -2865,6 +3369,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev)
u32 tmp;
WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+ tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
+ WREG32(DMA_CNTL, tmp);
WREG32(GRBM_INT_CNTL, 0);
WREG32(DxMODE_INT_MASK, 0);
WREG32(D1GRPH_INTERRUPT_CONTROL, 0);
@@ -3006,6 +3512,7 @@ int r600_irq_set(struct radeon_device *rdev)
u32 grbm_int_cntl = 0;
u32 hdmi0, hdmi1;
u32 d1grph = 0, d2grph = 0;
+ u32 dma_cntl;
if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3040,12 +3547,19 @@ int r600_irq_set(struct radeon_device *rdev)
hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
}
+ dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
DRM_DEBUG("r600_irq_set: sw int\n");
cp_int_cntl |= RB_INT_ENABLE;
cp_int_cntl |= TIME_STAMP_INT_ENABLE;
}
+
+ if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+ DRM_DEBUG("r600_irq_set: sw int dma\n");
+ dma_cntl |= TRAP_ENABLE;
+ }
+
if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("r600_irq_set: vblank 0\n");
@@ -3090,6 +3604,7 @@ int r600_irq_set(struct radeon_device *rdev)
}
WREG32(CP_INT_CNTL, cp_int_cntl);
+ WREG32(DMA_CNTL, dma_cntl);
WREG32(DxMODE_INT_MASK, mode_int);
WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph);
WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph);
@@ -3469,6 +3984,10 @@ restart_ih:
DRM_DEBUG("IH: CP EOP\n");
radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
break;
+ case 224: /* DMA trap event */
+ DRM_DEBUG("IH: DMA trap\n");
+ radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+ break;
case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n");
break;
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 2514123d2d00..be85f75aedda 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -721,12 +721,7 @@ static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
static int r600_count_pipe_bits(uint32_t val)
{
- int i, ret = 0;
- for (i = 0; i < 32; i++) {
- ret += val & 1;
- val >>= 1;
- }
- return ret;
+ return hweight32(val);
}
static void r600_gfx_init(struct drm_device *dev,
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 211c40252fe0..69ec24ab8d63 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
/* nby is npipes htiles aligned == npipes * 8 pixel aligned */
nby = round_up(nby, track->npipes * 8);
} else {
- /* htile widht & nby (8 or 4) make 2 bits number */
- tmp = track->htile_surface & 3;
+ /* always assume 8x8 htile */
/* align is htile align * 8, htile align vary according to
* number of pipe and tile width and nby
*/
switch (track->npipes) {
case 8:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 64 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 64 * 8);
+ nby = round_up(nby, 64 * 8);
break;
case 4:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 64 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 64 * 8);
+ nby = round_up(nby, 32 * 8);
break;
case 2:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 32 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 32 * 8);
+ nby = round_up(nby, 32 * 8);
break;
case 1:
- switch (tmp) {
- case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
- nbx = round_up(nbx, 32 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
- case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 16 * 8);
- break;
- case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
- nbx = round_up(nbx, 16 * 8);
- nby = round_up(nby, 8 * 8);
- break;
- default:
- return -EINVAL;
- }
+ /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
+ nbx = round_up(nbx, 32 * 8);
+ nby = round_up(nby, 16 * 8);
break;
default:
dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
@@ -746,9 +689,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
}
}
/* compute number of htile */
- nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4;
- nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4;
- size = nbx * nby * 4;
+ nbx = nbx >> 3;
+ nby = nby >> 3;
+ /* size must be aligned on npipes * 2K boundary */
+ size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
size += track->htile_offset;
if (size > radeon_bo_size(track->htile_bo)) {
@@ -1492,6 +1436,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
break;
case DB_HTILE_SURFACE:
track->htile_surface = radeon_get_ib_value(p, idx);
+ /* force 8x8 htile width and height */
+ ib[idx] |= 3;
track->db_dirty = true;
break;
case SQ_PGM_START_FS:
@@ -1949,6 +1895,78 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
ib[idx+2] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_CP_DMA:
+ {
+ u32 command, size;
+ u64 offset, tmp;
+ if (pkt->count != 4) {
+ DRM_ERROR("bad CP DMA\n");
+ return -EINVAL;
+ }
+ command = radeon_get_ib_value(p, idx+4);
+ size = command & 0x1fffff;
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ DRM_ERROR("CP DMA SAS not supported\n");
+ return -EINVAL;
+ } else {
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ DRM_ERROR("CP DMA SAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ /* src address space is memory */
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad CP DMA SRC\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx) +
+ ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx] = offset;
+ ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ DRM_ERROR("CP DMA DAS not supported\n");
+ return -EINVAL;
+ } else {
+ /* dst address space is memory */
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ DRM_ERROR("CP DMA DAIC only supported for registers\n");
+ return -EINVAL;
+ }
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad CP DMA DST\n");
+ return -EINVAL;
+ }
+
+ tmp = radeon_get_ib_value(p, idx+2) +
+ ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
+
+ offset = reloc->lobj.gpu_offset + tmp;
+
+ if ((tmp + size) > radeon_bo_size(reloc->robj)) {
+ dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
+ tmp + size, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+
+ ib[idx+2] = offset;
+ ib[idx+3] = upper_32_bits(offset) & 0xff;
+ }
+ break;
+ }
case PACKET3_SURFACE_SYNC:
if (pkt->count != 3) {
DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2276,6 +2294,35 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
ib[idx+4] = upper_32_bits(offset) & 0xff;
}
break;
+ case PACKET3_MEM_WRITE:
+ {
+ u64 offset;
+
+ if (pkt->count != 3) {
+ DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+ return -EINVAL;
+ }
+ r = r600_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+ return -EINVAL;
+ }
+ offset = radeon_get_ib_value(p, idx+0);
+ offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+ if (offset & 0x7) {
+ DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
+ return -EINVAL;
+ }
+ if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+ DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+ offset + 8, radeon_bo_size(reloc->robj));
+ return -EINVAL;
+ }
+ offset += reloc->lobj.gpu_offset;
+ ib[idx+0] = offset;
+ ib[idx+1] = upper_32_bits(offset) & 0xff;
+ break;
+ }
case PACKET3_COPY_DW:
if (pkt->count != 4) {
DRM_ERROR("bad COPY_DW (invalid count)\n");
@@ -2429,8 +2476,10 @@ static void r600_cs_parser_fini(struct radeon_cs_parser *parser, int error)
kfree(parser->relocs);
for (i = 0; i < parser->nchunks; i++) {
kfree(parser->chunks[i].kdata);
- kfree(parser->chunks[i].kpage[0]);
- kfree(parser->chunks[i].kpage[1]);
+ if (parser->rdev && (parser->rdev->flags & RADEON_IS_AGP)) {
+ kfree(parser->chunks[i].kpage[0]);
+ kfree(parser->chunks[i].kpage[1]);
+ }
}
kfree(parser->chunks);
kfree(parser->chunks_array);
@@ -2496,3 +2545,209 @@ void r600_cs_legacy_init(void)
{
r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
}
+
+/*
+ * DMA
+ */
+/**
+ * r600_dma_cs_next_reloc() - parse next reloc
+ * @p: parser structure holding parsing context.
+ * @cs_reloc: reloc informations
+ *
+ * Return the next reloc, do bo validation and compute
+ * GPU offset using the provided start.
+ **/
+int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
+ struct radeon_cs_reloc **cs_reloc)
+{
+ struct radeon_cs_chunk *relocs_chunk;
+ unsigned idx;
+
+ *cs_reloc = NULL;
+ if (p->chunk_relocs_idx == -1) {
+ DRM_ERROR("No relocation chunk !\n");
+ return -EINVAL;
+ }
+ relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+ idx = p->dma_reloc_idx;
+ if (idx >= p->nrelocs) {
+ DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+ idx, p->nrelocs);
+ return -EINVAL;
+ }
+ *cs_reloc = p->relocs_ptr[idx];
+ p->dma_reloc_idx++;
+ return 0;
+}
+
+#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
+#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
+#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
+
+/**
+ * r600_dma_cs_parse() - parse the DMA IB
+ * @p: parser structure holding parsing context.
+ *
+ * Parses the DMA IB from the CS ioctl and updates
+ * the GPU addresses based on the reloc information and
+ * checks for errors. (R6xx-R7xx)
+ * Returns 0 for success and an error on failure.
+ **/
+int r600_dma_cs_parse(struct radeon_cs_parser *p)
+{
+ struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
+ struct radeon_cs_reloc *src_reloc, *dst_reloc;
+ u32 header, cmd, count, tiled;
+ volatile u32 *ib = p->ib.ptr;
+ u32 idx, idx_value;
+ u64 src_offset, dst_offset;
+ int r;
+
+ do {
+ if (p->idx >= ib_chunk->length_dw) {
+ DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
+ p->idx, ib_chunk->length_dw);
+ return -EINVAL;
+ }
+ idx = p->idx;
+ header = radeon_get_ib_value(p, idx);
+ cmd = GET_DMA_CMD(header);
+ count = GET_DMA_COUNT(header);
+ tiled = GET_DMA_T(header);
+
+ switch (cmd) {
+ case DMA_PACKET_WRITE:
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ p->idx += count + 5;
+ } else {
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += count + 3;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_COPY:
+ r = r600_dma_cs_next_reloc(p, &src_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_COPY\n");
+ return -EINVAL;
+ }
+ if (tiled) {
+ idx_value = radeon_get_ib_value(p, idx + 2);
+ /* detile bit */
+ if (idx_value & (1 << 31)) {
+ /* tiled src, linear dst */
+ src_offset = ib[idx+1];
+ src_offset <<= 8;
+ ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+
+ dst_offset = ib[idx+5];
+ dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ } else {
+ /* linear src, tiled dst */
+ src_offset = ib[idx+5];
+ src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
+ ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+
+ dst_offset = ib[idx+1];
+ dst_offset <<= 8;
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+ }
+ p->idx += 7;
+ } else {
+ if (p->family >= CHIP_RV770) {
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ p->idx += 5;
+ } else {
+ src_offset = ib[idx+2];
+ src_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0xff0000)) << 16;
+
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16;
+ p->idx += 4;
+ }
+ }
+ if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
+ dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
+ src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
+ return -EINVAL;
+ }
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ break;
+ case DMA_PACKET_CONSTANT_FILL:
+ if (p->family < CHIP_RV770) {
+ DRM_ERROR("Constant Fill is 7xx only !\n");
+ return -EINVAL;
+ }
+ r = r600_dma_cs_next_reloc(p, &dst_reloc);
+ if (r) {
+ DRM_ERROR("bad DMA_PACKET_WRITE\n");
+ return -EINVAL;
+ }
+ dst_offset = ib[idx+1];
+ dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
+ if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
+ dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
+ dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
+ return -EINVAL;
+ }
+ ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
+ ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+ p->idx += 4;
+ break;
+ case DMA_PACKET_NOP:
+ p->idx += 1;
+ break;
+ default:
+ DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
+ return -EINVAL;
+ }
+ } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+#if 0
+ for (r = 0; r < p->ib->length_dw; r++) {
+ printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
+ mdelay(1);
+ }
+#endif
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/r600_reg.h b/drivers/gpu/drm/radeon/r600_reg.h
index 2b960cb5c18a..909219b1bf80 100644
--- a/drivers/gpu/drm/radeon/r600_reg.h
+++ b/drivers/gpu/drm/radeon/r600_reg.h
@@ -96,6 +96,15 @@
#define R600_CONFIG_F0_BASE 0x542C
#define R600_CONFIG_APER_SIZE 0x5430
+#define R600_BIF_FB_EN 0x5490
+#define R600_FB_READ_EN (1 << 0)
+#define R600_FB_WRITE_EN (1 << 1)
+
+#define R600_CITF_CNTL 0x200c
+#define R600_BLACKOUT_MASK 0x00000003
+
+#define R700_MC_CITF_CNTL 0x25c0
+
#define R600_ROM_CNTL 0x1600
# define R600_SCK_OVERWRITE (1 << 1)
# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index fa6f37099ba9..4a53402b1852 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -590,9 +590,59 @@
#define WAIT_2D_IDLECLEAN_bit (1 << 16)
#define WAIT_3D_IDLECLEAN_bit (1 << 17)
+/* async DMA */
+#define DMA_TILING_CONFIG 0x3ec4
+#define DMA_CONFIG 0x3e4c
+
+#define DMA_RB_CNTL 0xd000
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_RB_BASE 0xd004
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI 0xd01c
+#define DMA_RB_RPTR_ADDR_LO 0xd020
+
+#define DMA_IB_CNTL 0xd024
+# define DMA_IB_ENABLE (1 << 0)
+# define DMA_IB_SWAP_ENABLE (1 << 4)
+#define DMA_IB_RPTR 0xd028
+#define DMA_CNTL 0xd02c
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_STATUS_REG 0xd034
+# define DMA_IDLE (1 << 0)
+#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
+#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
+#define DMA_MODE 0xd0bc
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFF) << 0))
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
+#define DMA_PACKET_NOP 0xf
+
#define IH_RB_CNTL 0x3e00
# define IH_RB_ENABLE (1 << 0)
-# define IH_IB_SIZE(x) ((x) << 1) /* log2 */
+# define IH_RB_SIZE(x) ((x) << 1) /* log2 */
# define IH_RB_FULL_DRAIN_ENABLE (1 << 6)
# define IH_WPTR_WRITEBACK_ENABLE (1 << 8)
# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */
@@ -637,7 +687,9 @@
#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20
#define SRBM_SOFT_RESET 0xe60
+# define SOFT_RESET_DMA (1 << 12)
# define SOFT_RESET_RLC (1 << 13)
+# define RV770_SOFT_RESET_DMA (1 << 20)
#define CP_INT_CNTL 0xc124
# define CNTX_BUSY_INT_ENABLE (1 << 19)
@@ -1134,6 +1186,38 @@
#define PACKET3_WAIT_REG_MEM 0x3C
#define PACKET3_MEM_WRITE 0x3D
#define PACKET3_INDIRECT_BUFFER 0x32
+#define PACKET3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO [31:0]
+ * 3. CP_SYNC [31] | SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
+ */
+# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
+# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
#define PACKET3_SURFACE_SYNC 0x43
# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
# define PACKET3_TC_ACTION_ENA (1 << 23)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8c42d54c2e26..a08f657329a0 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
#define RADEON_BIOS_NUM_SCRATCH 8
/* max number of rings */
-#define RADEON_NUM_RINGS 3
+#define RADEON_NUM_RINGS 5
/* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL
@@ -122,11 +122,21 @@ extern int radeon_lockup_timeout;
#define CAYMAN_RING_TYPE_CP1_INDEX 1
#define CAYMAN_RING_TYPE_CP2_INDEX 2
+/* R600+ has an async dma ring */
+#define R600_RING_TYPE_DMA_INDEX 3
+/* cayman add a second async dma ring */
+#define CAYMAN_RING_TYPE_DMA1_INDEX 4
+
/* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20)
#define RADEON_VA_RESERVED_SIZE (8 << 20)
#define RADEON_IB_VM_MAX_SIZE (64 << 10)
+/* reset flags */
+#define RADEON_RESET_GFX (1 << 0)
+#define RADEON_RESET_COMPUTE (1 << 1)
+#define RADEON_RESET_DMA (1 << 2)
+
/*
* Errata workarounds.
*/
@@ -220,12 +230,13 @@ struct radeon_fence {
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
int radeon_fence_driver_init(struct radeon_device *rdev);
void radeon_fence_driver_fini(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev);
int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
void radeon_fence_process(struct radeon_device *rdev, int ring);
bool radeon_fence_signaled(struct radeon_fence *fence);
int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
-void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
int radeon_fence_wait_any(struct radeon_device *rdev,
struct radeon_fence **fences,
bool intr);
@@ -642,6 +653,8 @@ struct radeon_ring {
u32 ptr_reg_mask;
u32 nop;
u32 idx;
+ u64 last_semaphore_signal_addr;
+ u64 last_semaphore_wait_addr;
};
/*
@@ -787,6 +800,15 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne
void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
+/* r600 async dma */
+void r600_dma_stop(struct radeon_device *rdev);
+int r600_dma_resume(struct radeon_device *rdev);
+void r600_dma_fini(struct radeon_device *rdev);
+
+void cayman_dma_stop(struct radeon_device *rdev);
+int cayman_dma_resume(struct radeon_device *rdev);
+void cayman_dma_fini(struct radeon_device *rdev);
+
/*
* CS.
*/
@@ -824,6 +846,7 @@ struct radeon_cs_parser {
struct radeon_cs_reloc *relocs;
struct radeon_cs_reloc **relocs_ptr;
struct list_head validated;
+ unsigned dma_reloc_idx;
/* indices of various chunks */
int chunk_ib_idx;
int chunk_relocs_idx;
@@ -883,7 +906,9 @@ struct radeon_wb {
#define RADEON_WB_CP_RPTR_OFFSET 1024
#define RADEON_WB_CP1_RPTR_OFFSET 1280
#define RADEON_WB_CP2_RPTR_OFFSET 1536
+#define R600_WB_DMA_RPTR_OFFSET 1792
#define R600_WB_IH_WPTR_OFFSET 2048
+#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
#define R600_WB_EVENT_OFFSET 3072
/**
@@ -1539,6 +1564,8 @@ struct radeon_device {
/* Register mmio */
resource_size_t rmmio_base;
resource_size_t rmmio_size;
+ /* protects concurrent MM_INDEX/DATA based register access */
+ spinlock_t mmio_idx_lock;
void __iomem *rmmio;
radeon_rreg_t mc_rreg;
radeon_wreg_t mc_wreg;
@@ -1614,8 +1641,10 @@ int radeon_device_init(struct radeon_device *rdev,
void radeon_device_fini(struct radeon_device *rdev);
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
-void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
+ bool always_indirect);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
+ bool always_indirect);
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
@@ -1631,9 +1660,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg))
#define RREG16(reg) readw((rdev->rmmio) + (reg))
#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg))
-#define RREG32(reg) r100_mm_rreg(rdev, (reg))
-#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg)))
-#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v))
+#define RREG32(reg) r100_mm_rreg(rdev, (reg), false)
+#define RREG32_IDX(reg) r100_mm_rreg(rdev, (reg), true)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg), false))
+#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v), false)
+#define WREG32_IDX(reg, v) r100_mm_wreg(rdev, (reg), (v), true)
#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
@@ -1658,7 +1689,7 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
tmp_ |= ((val) & ~(mask)); \
WREG32_PLL(reg, tmp_); \
} while (0)
-#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg)))
+#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
index 10ea17a6b2a6..42433344cb1b 100644
--- a/drivers/gpu/drm/radeon/radeon_agp.c
+++ b/drivers/gpu/drm/radeon/radeon_agp.c
@@ -69,9 +69,12 @@ static struct radeon_agpmode_quirk radeon_agpmode_quirk_list[] = {
/* Intel 82830 830 Chipset Host Bridge / Mobility M6 LY Needs AGPMode 2 (fdo #17360)*/
{ PCI_VENDOR_ID_INTEL, 0x3575, PCI_VENDOR_ID_ATI, 0x4c59,
PCI_VENDOR_ID_DELL, 0x00e3, 2},
- /* Intel 82852/82855 host bridge / Mobility FireGL 9000 R250 Needs AGPMode 1 (lp #296617) */
+ /* Intel 82852/82855 host bridge / Mobility FireGL 9000 RV250 Needs AGPMode 1 (lp #296617) */
{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4c66,
PCI_VENDOR_ID_DELL, 0x0149, 1},
+ /* Intel 82855PM host bridge / Mobility FireGL 9000 RV250 Needs AGPMode 1 for suspend/resume */
+ { PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c66,
+ PCI_VENDOR_ID_IBM, 0x0531, 1},
/* Intel 82852/82855 host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (deb #467460) */
{ PCI_VENDOR_ID_INTEL, 0x3580, PCI_VENDOR_ID_ATI, 0x4e50,
0x1025, 0x0061, 1},
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 654520b95ab7..9056fafb00ea 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -947,6 +947,15 @@ static struct radeon_asic r600_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &r600_dma_ring_ib_execute,
+ .emit_fence = &r600_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &r600_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -963,10 +972,10 @@ static struct radeon_asic r600_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &r600_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &r600_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1022,6 +1031,15 @@ static struct radeon_asic rs780_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &r600_dma_ring_ib_execute,
+ .emit_fence = &r600_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &r600_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -1038,10 +1056,10 @@ static struct radeon_asic rs780_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &r600_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &r600_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1097,6 +1115,15 @@ static struct radeon_asic rv770_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &r600_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &r600_dma_ring_ib_execute,
+ .emit_fence = &r600_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &r600_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -1113,10 +1140,10 @@ static struct radeon_asic rv770_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &rv770_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &rv770_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1172,6 +1199,15 @@ static struct radeon_asic evergreen_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &evergreen_dma_ring_ib_execute,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -1188,10 +1224,10 @@ static struct radeon_asic evergreen_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1248,6 +1284,15 @@ static struct radeon_asic sumo_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
},
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &evergreen_dma_ring_ib_execute,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
+ }
},
.irq = {
.set = &evergreen_irq_set,
@@ -1263,10 +1308,10 @@ static struct radeon_asic sumo_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1322,6 +1367,15 @@ static struct radeon_asic btc_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &evergreen_dma_ring_ib_execute,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &r600_dma_is_lockup,
}
},
.irq = {
@@ -1338,10 +1392,10 @@ static struct radeon_asic btc_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1391,7 +1445,7 @@ static struct radeon_asic cayman_asic = {
.vm = {
.init = &cayman_vm_init,
.fini = &cayman_vm_fini,
- .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
.set_page = &cayman_vm_set_page,
},
.ring = {
@@ -1427,6 +1481,28 @@ static struct radeon_asic cayman_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
.vm_flush = &cayman_vm_flush,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &cayman_dma_vm_flush,
+ },
+ [CAYMAN_RING_TYPE_DMA1_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &cayman_dma_vm_flush,
}
},
.irq = {
@@ -1443,10 +1519,10 @@ static struct radeon_asic cayman_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1496,7 +1572,7 @@ static struct radeon_asic trinity_asic = {
.vm = {
.init = &cayman_vm_init,
.fini = &cayman_vm_fini,
- .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
.set_page = &cayman_vm_set_page,
},
.ring = {
@@ -1532,6 +1608,28 @@ static struct radeon_asic trinity_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup,
.vm_flush = &cayman_vm_flush,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &cayman_dma_vm_flush,
+ },
+ [CAYMAN_RING_TYPE_DMA1_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = &evergreen_dma_cs_parse,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &cayman_dma_vm_flush,
}
},
.irq = {
@@ -1548,10 +1646,10 @@ static struct radeon_asic trinity_asic = {
.copy = {
.blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = &r600_copy_blit,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &evergreen_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &evergreen_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
@@ -1601,7 +1699,7 @@ static struct radeon_asic si_asic = {
.vm = {
.init = &si_vm_init,
.fini = &si_vm_fini,
- .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
.set_page = &si_vm_set_page,
},
.ring = {
@@ -1637,6 +1735,28 @@ static struct radeon_asic si_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
.vm_flush = &si_vm_flush,
+ },
+ [R600_RING_TYPE_DMA_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = NULL,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &si_dma_vm_flush,
+ },
+ [CAYMAN_RING_TYPE_DMA1_INDEX] = {
+ .ib_execute = &cayman_dma_ring_ib_execute,
+ .ib_parse = &evergreen_dma_ib_parse,
+ .emit_fence = &evergreen_dma_fence_ring_emit,
+ .emit_semaphore = &r600_dma_semaphore_ring_emit,
+ .cs_parse = NULL,
+ .ring_test = &r600_dma_ring_test,
+ .ib_test = &r600_dma_ib_test,
+ .is_lockup = &cayman_dma_is_lockup,
+ .vm_flush = &si_dma_vm_flush,
}
},
.irq = {
@@ -1653,10 +1773,10 @@ static struct radeon_asic si_asic = {
.copy = {
.blit = NULL,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .dma = NULL,
- .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
- .copy = NULL,
- .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+ .dma = &si_copy_dma,
+ .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+ .copy = &si_copy_dma,
+ .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
},
.surface = {
.set_reg = r600_set_surface_reg,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5e3a0e5c6be1..15d70e613076 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -263,6 +263,7 @@ extern int rs690_mc_wait_for_idle(struct radeon_device *rdev);
struct rv515_mc_save {
u32 vga_render_control;
u32 vga_hdp_control;
+ bool crtc_enabled[2];
};
int rv515_init(struct radeon_device *rdev);
@@ -303,12 +304,21 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
int r600_cs_parse(struct radeon_cs_parser *p);
+int r600_dma_cs_parse(struct radeon_cs_parser *p);
void r600_fence_ring_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
void r600_semaphore_ring_emit(struct radeon_device *rdev,
struct radeon_ring *cp,
struct radeon_semaphore *semaphore,
bool emit_wait);
+void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence);
+void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+ struct radeon_ring *ring,
+ struct radeon_semaphore *semaphore,
+ bool emit_wait);
+void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_asic_reset(struct radeon_device *rdev);
int r600_set_surface_reg(struct radeon_device *rdev, int reg,
@@ -316,11 +326,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg,
uint32_t offset, uint32_t obj_size);
void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
+int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
+int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
int r600_copy_blit(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages, struct radeon_fence **fence);
+int r600_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages, struct radeon_fence **fence);
void r600_hpd_init(struct radeon_device *rdev);
void r600_hpd_fini(struct radeon_device *rdev);
bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -388,6 +403,10 @@ u32 rv770_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
void r700_cp_stop(struct radeon_device *rdev);
void r700_cp_fini(struct radeon_device *rdev);
+int rv770_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence);
/*
* evergreen
@@ -416,6 +435,7 @@ u32 evergreen_get_vblank_counter(struct radeon_device *rdev, int crtc);
int evergreen_irq_set(struct radeon_device *rdev);
int evergreen_irq_process(struct radeon_device *rdev);
extern int evergreen_cs_parse(struct radeon_cs_parser *p);
+extern int evergreen_dma_cs_parse(struct radeon_cs_parser *p);
extern void evergreen_pm_misc(struct radeon_device *rdev);
extern void evergreen_pm_prepare(struct radeon_device *rdev);
extern void evergreen_pm_finish(struct radeon_device *rdev);
@@ -428,6 +448,14 @@ extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc);
void evergreen_disable_interrupt_state(struct radeon_device *rdev);
int evergreen_blit_init(struct radeon_device *rdev);
int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
+void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
+ struct radeon_fence *fence);
+void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
+ struct radeon_ib *ib);
+int evergreen_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence);
/*
* cayman
@@ -449,6 +477,11 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+ struct radeon_ib *ib);
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
/* DCE6 - SI */
void dce6_bandwidth_update(struct radeon_device *rdev);
@@ -476,5 +509,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
uint64_t si_get_gpu_clock(struct radeon_device *rdev);
+int si_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence);
+void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 37f6a907aea4..15f5ded65e0c 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -352,9 +352,9 @@ static int radeon_atpx_switchto(enum vga_switcheroo_client_id id)
}
/**
- * radeon_atpx_switchto - switch to the requested GPU
+ * radeon_atpx_power_state - power down/up the requested GPU
*
- * @id: GPU to switch to
+ * @id: GPU to power down/up
* @state: requested power state (0 = off, 1 = on)
*
* Execute the necessary ATPX function to power down/up the discrete GPU
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 45b660b27cfc..33a56a09ff10 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -1548,6 +1548,9 @@ bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev)
of_machine_is_compatible("PowerBook6,7")) {
/* ibook */
rdev->mode_info.connector_table = CT_IBOOK;
+ } else if (of_machine_is_compatible("PowerMac3,5")) {
+ /* PowerMac G4 Silver radeon 7500 */
+ rdev->mode_info.connector_table = CT_MAC_G4_SILVER;
} else if (of_machine_is_compatible("PowerMac4,4")) {
/* emac */
rdev->mode_info.connector_table = CT_EMAC;
@@ -2212,6 +2215,54 @@ bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev)
CONNECTOR_OBJECT_ID_SVIDEO,
&hpd);
break;
+ case CT_MAC_G4_SILVER:
+ DRM_INFO("Connector Table: %d (mac g4 silver)\n",
+ rdev->mode_info.connector_table);
+ /* DVI-I - tv dac, int tmds */
+ ddc_i2c = combios_setup_i2c_bus(rdev, DDC_DVI, 0, 0);
+ hpd.hpd = RADEON_HPD_1; /* ??? */
+ radeon_add_legacy_encoder(dev,
+ radeon_get_encoder_enum(dev,
+ ATOM_DEVICE_DFP1_SUPPORT,
+ 0),
+ ATOM_DEVICE_DFP1_SUPPORT);
+ radeon_add_legacy_encoder(dev,
+ radeon_get_encoder_enum(dev,
+ ATOM_DEVICE_CRT2_SUPPORT,
+ 2),
+ ATOM_DEVICE_CRT2_SUPPORT);
+ radeon_add_legacy_connector(dev, 0,
+ ATOM_DEVICE_DFP1_SUPPORT |
+ ATOM_DEVICE_CRT2_SUPPORT,
+ DRM_MODE_CONNECTOR_DVII, &ddc_i2c,
+ CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I,
+ &hpd);
+ /* VGA - primary dac */
+ ddc_i2c = combios_setup_i2c_bus(rdev, DDC_VGA, 0, 0);
+ hpd.hpd = RADEON_HPD_NONE;
+ radeon_add_legacy_encoder(dev,
+ radeon_get_encoder_enum(dev,
+ ATOM_DEVICE_CRT1_SUPPORT,
+ 1),
+ ATOM_DEVICE_CRT1_SUPPORT);
+ radeon_add_legacy_connector(dev, 1, ATOM_DEVICE_CRT1_SUPPORT,
+ DRM_MODE_CONNECTOR_VGA, &ddc_i2c,
+ CONNECTOR_OBJECT_ID_VGA,
+ &hpd);
+ /* TV - TV DAC */
+ ddc_i2c.valid = false;
+ hpd.hpd = RADEON_HPD_NONE;
+ radeon_add_legacy_encoder(dev,
+ radeon_get_encoder_enum(dev,
+ ATOM_DEVICE_TV1_SUPPORT,
+ 2),
+ ATOM_DEVICE_TV1_SUPPORT);
+ radeon_add_legacy_connector(dev, 2, ATOM_DEVICE_TV1_SUPPORT,
+ DRM_MODE_CONNECTOR_SVIDEO,
+ &ddc_i2c,
+ CONNECTOR_OBJECT_ID_SVIDEO,
+ &hpd);
+ break;
default:
DRM_INFO("Connector table: %d (invalid)\n",
rdev->mode_info.connector_table);
@@ -3246,11 +3297,9 @@ static uint32_t combios_detect_ram(struct drm_device *dev, int ram,
while (ram--) {
addr = ram * 1024 * 1024;
/* write to each page */
- WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER);
- WREG32(RADEON_MM_DATA, 0xdeadbeef);
+ WREG32_IDX((addr) | RADEON_MM_APER, 0xdeadbeef);
/* read back and verify */
- WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER);
- if (RREG32(RADEON_MM_DATA) != 0xdeadbeef)
+ if (RREG32_IDX((addr) | RADEON_MM_APER) != 0xdeadbeef)
return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 67cfc1795ecd..2399f25ec037 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -741,7 +741,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
ret = connector_status_disconnected;
if (radeon_connector->ddc_bus)
- dret = radeon_ddc_probe(radeon_connector);
+ dret = radeon_ddc_probe(radeon_connector, false);
if (dret) {
radeon_connector->detected_by_load = false;
if (radeon_connector->edid) {
@@ -941,13 +941,13 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
struct drm_mode_object *obj;
int i;
enum drm_connector_status ret = connector_status_disconnected;
- bool dret = false;
+ bool dret = false, broken_edid = false;
if (!force && radeon_check_hpd_status_unchanged(connector))
return connector->status;
if (radeon_connector->ddc_bus)
- dret = radeon_ddc_probe(radeon_connector);
+ dret = radeon_ddc_probe(radeon_connector, false);
if (dret) {
radeon_connector->detected_by_load = false;
if (radeon_connector->edid) {
@@ -965,6 +965,9 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
ret = connector_status_disconnected;
DRM_ERROR("%s: detected RS690 floating bus bug, stopping ddc detect\n", drm_get_connector_name(connector));
radeon_connector->ddc_bus = NULL;
+ } else {
+ ret = connector_status_connected;
+ broken_edid = true; /* defer use_digital to later */
}
} else {
radeon_connector->use_digital = !!(radeon_connector->edid->input & DRM_EDID_INPUT_DIGITAL);
@@ -1047,13 +1050,24 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
encoder_funcs = encoder->helper_private;
if (encoder_funcs->detect) {
- if (ret != connector_status_connected) {
- ret = encoder_funcs->detect(encoder, connector);
- if (ret == connector_status_connected) {
- radeon_connector->use_digital = false;
+ if (!broken_edid) {
+ if (ret != connector_status_connected) {
+ /* deal with analog monitors without DDC */
+ ret = encoder_funcs->detect(encoder, connector);
+ if (ret == connector_status_connected) {
+ radeon_connector->use_digital = false;
+ }
+ if (ret != connector_status_disconnected)
+ radeon_connector->detected_by_load = true;
}
- if (ret != connector_status_disconnected)
- radeon_connector->detected_by_load = true;
+ } else {
+ enum drm_connector_status lret;
+ /* assume digital unless load detected otherwise */
+ radeon_connector->use_digital = true;
+ lret = encoder_funcs->detect(encoder, connector);
+ DRM_DEBUG_KMS("load_detect %x returned: %x\n",encoder->encoder_type,lret);
+ if (lret == connector_status_connected)
+ radeon_connector->use_digital = false;
}
break;
}
@@ -1387,7 +1401,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
if (encoder) {
/* setup ddc on the bridge */
radeon_atom_ext_encoder_setup_ddc(encoder);
- if (radeon_ddc_probe(radeon_connector)) /* try DDC */
+ /* bridge chips are always aux */
+ if (radeon_ddc_probe(radeon_connector, true)) /* try DDC */
ret = connector_status_connected;
else if (radeon_connector->dac_load_detect) { /* try load detection */
struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
@@ -1405,7 +1420,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
if (radeon_dp_getdpcd(radeon_connector))
ret = connector_status_connected;
} else {
- if (radeon_ddc_probe(radeon_connector))
+ /* try non-aux ddc (DP to DVI/HMDI/etc. adapter) */
+ if (radeon_ddc_probe(radeon_connector, false))
ret = connector_status_connected;
}
}
@@ -1585,7 +1601,7 @@ radeon_add_atom_connector(struct drm_device *dev,
connector->interlace_allowed = true;
connector->doublescan_allowed = true;
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
break;
@@ -1594,13 +1610,13 @@ radeon_add_atom_connector(struct drm_device *dev,
case DRM_MODE_CONNECTOR_HDMIA:
case DRM_MODE_CONNECTOR_HDMIB:
case DRM_MODE_CONNECTOR_DisplayPort:
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_property,
UNDERSCAN_OFF);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_hborder_property,
0);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_vborder_property,
0);
subpixel_order = SubPixelHorizontalRGB;
@@ -1611,14 +1627,14 @@ radeon_add_atom_connector(struct drm_device *dev,
connector->doublescan_allowed = false;
if (connector_type == DRM_MODE_CONNECTOR_DVII) {
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
}
break;
case DRM_MODE_CONNECTOR_LVDS:
case DRM_MODE_CONNECTOR_eDP:
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
subpixel_order = SubPixelHorizontalRGB;
@@ -1637,7 +1653,7 @@ radeon_add_atom_connector(struct drm_device *dev,
DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
/* no HPD on analog connectors */
@@ -1655,7 +1671,7 @@ radeon_add_atom_connector(struct drm_device *dev,
DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
/* no HPD on analog connectors */
@@ -1678,23 +1694,23 @@ radeon_add_atom_connector(struct drm_device *dev,
DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
subpixel_order = SubPixelHorizontalRGB;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.coherent_mode_property,
1);
if (ASIC_IS_AVIVO(rdev)) {
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_property,
UNDERSCAN_OFF);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_hborder_property,
0);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_vborder_property,
0);
}
if (connector_type == DRM_MODE_CONNECTOR_DVII) {
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
}
@@ -1718,17 +1734,17 @@ radeon_add_atom_connector(struct drm_device *dev,
if (!radeon_connector->ddc_bus)
DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.coherent_mode_property,
1);
if (ASIC_IS_AVIVO(rdev)) {
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_property,
UNDERSCAN_OFF);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_hborder_property,
0);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_vborder_property,
0);
}
@@ -1757,17 +1773,17 @@ radeon_add_atom_connector(struct drm_device *dev,
DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
subpixel_order = SubPixelHorizontalRGB;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.coherent_mode_property,
1);
if (ASIC_IS_AVIVO(rdev)) {
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_property,
UNDERSCAN_OFF);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_hborder_property,
0);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.underscan_vborder_property,
0);
}
@@ -1792,7 +1808,7 @@ radeon_add_atom_connector(struct drm_device *dev,
if (!radeon_connector->ddc_bus)
DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
subpixel_order = SubPixelHorizontalRGB;
@@ -1805,10 +1821,10 @@ radeon_add_atom_connector(struct drm_device *dev,
drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.tv_std_property,
radeon_atombios_get_tv_info(rdev));
/* no HPD on analog connectors */
@@ -1829,7 +1845,7 @@ radeon_add_atom_connector(struct drm_device *dev,
if (!radeon_connector->ddc_bus)
DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
subpixel_order = SubPixelHorizontalRGB;
@@ -1908,7 +1924,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
/* no HPD on analog connectors */
@@ -1926,7 +1942,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
/* no HPD on analog connectors */
@@ -1945,7 +1961,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
}
if (connector_type == DRM_MODE_CONNECTOR_DVII) {
radeon_connector->dac_load_detect = true;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
1);
}
@@ -1969,10 +1985,10 @@ radeon_add_legacy_connector(struct drm_device *dev,
*/
if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480)
radeon_connector->dac_load_detect = false;
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.load_detect_property,
radeon_connector->dac_load_detect);
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
rdev->mode_info.tv_std_property,
radeon_combios_get_tv_info(rdev));
/* no HPD on analog connectors */
@@ -1988,7 +2004,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
if (!radeon_connector->ddc_bus)
DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
}
- drm_connector_attach_property(&radeon_connector->base,
+ drm_object_attach_property(&radeon_connector->base.base,
dev->mode_config.scaling_mode_property,
DRM_MODE_SCALE_FULLSCREEN);
subpixel_order = SubPixelHorizontalRGB;
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 8b2797dc7b64..9143fc45e35b 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -116,20 +116,6 @@ u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index)
}
}
-u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr)
-{
- u32 ret;
-
- if (addr < 0x10000)
- ret = DRM_READ32(dev_priv->mmio, addr);
- else {
- DRM_WRITE32(dev_priv->mmio, RADEON_MM_INDEX, addr);
- ret = DRM_READ32(dev_priv->mmio, RADEON_MM_DATA);
- }
-
- return ret;
-}
-
static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
u32 ret;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc563fb..469661fd1903 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
return 0;
}
chunk = &p->chunks[p->chunk_relocs_idx];
+ p->dma_reloc_idx = 0;
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
@@ -111,6 +112,18 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
} else
p->ring = RADEON_RING_TYPE_GFX_INDEX;
break;
+ case RADEON_CS_RING_DMA:
+ if (p->rdev->family >= CHIP_CAYMAN) {
+ if (p->priority > 0)
+ p->ring = R600_RING_TYPE_DMA_INDEX;
+ else
+ p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
+ } else if (p->rdev->family >= CHIP_R600) {
+ p->ring = R600_RING_TYPE_DMA_INDEX;
+ } else {
+ return -EINVAL;
+ }
+ break;
}
return 0;
}
@@ -266,13 +279,13 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
p->chunks[p->chunk_ib_idx].length_dw);
return -EINVAL;
}
- if ((p->rdev->flags & RADEON_IS_AGP)) {
+ if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) {
p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
- kfree(p->chunks[i].kpage[0]);
- kfree(p->chunks[i].kpage[1]);
+ kfree(p->chunks[p->chunk_ib_idx].kpage[0]);
+ kfree(p->chunks[p->chunk_ib_idx].kpage[1]);
return -ENOMEM;
}
}
@@ -570,7 +583,8 @@ static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
int i;
int size = PAGE_SIZE;
- bool copy1 = (p->rdev->flags & RADEON_IS_AGP) ? false : true;
+ bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ?
+ false : true;
for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)),
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 0fe56c9f64bd..ad6df625e8b8 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -66,24 +66,25 @@ static void radeon_hide_cursor(struct drm_crtc *crtc)
struct radeon_device *rdev = crtc->dev->dev_private;
if (ASIC_IS_DCE4(rdev)) {
- WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
- WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
- EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
+ WREG32_IDX(EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset,
+ EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
+ EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
} else if (ASIC_IS_AVIVO(rdev)) {
- WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
- WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
+ WREG32_IDX(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset,
+ (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
} else {
+ u32 reg;
switch (radeon_crtc->crtc_id) {
case 0:
- WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL);
+ reg = RADEON_CRTC_GEN_CNTL;
break;
case 1:
- WREG32(RADEON_MM_INDEX, RADEON_CRTC2_GEN_CNTL);
+ reg = RADEON_CRTC2_GEN_CNTL;
break;
default:
return;
}
- WREG32_P(RADEON_MM_DATA, 0, ~RADEON_CRTC_CUR_EN);
+ WREG32_IDX(reg, RREG32_IDX(reg) & ~RADEON_CRTC_CUR_EN);
}
}
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f888c374..edfc54e41842 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -897,6 +897,25 @@ static void radeon_check_arguments(struct radeon_device *rdev)
}
/**
+ * radeon_switcheroo_quirk_long_wakeup - return true if longer d3 delay is
+ * needed for waking up.
+ *
+ * @pdev: pci dev pointer
+ */
+static bool radeon_switcheroo_quirk_long_wakeup(struct pci_dev *pdev)
+{
+
+ /* 6600m in a macbook pro */
+ if (pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE &&
+ pdev->subsystem_device == 0x00e2) {
+ printk(KERN_INFO "radeon: quirking longer d3 wakeup delay\n");
+ return true;
+ }
+
+ return false;
+}
+
+/**
* radeon_switcheroo_set_state - set switcheroo state
*
* @pdev: pci dev pointer
@@ -910,10 +929,19 @@ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
struct drm_device *dev = pci_get_drvdata(pdev);
pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
if (state == VGA_SWITCHEROO_ON) {
+ unsigned d3_delay = dev->pdev->d3_delay;
+
printk(KERN_INFO "radeon: switched on\n");
/* don't suspend or resume card normally */
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
+
+ if (d3_delay < 20 && radeon_switcheroo_quirk_long_wakeup(pdev))
+ dev->pdev->d3_delay = 20;
+
radeon_resume_kms(dev);
+
+ dev->pdev->d3_delay = d3_delay;
+
dev->switch_power_state = DRM_SWITCH_POWER_ON;
drm_kms_helper_poll_enable(dev);
} else {
@@ -1059,6 +1087,7 @@ int radeon_device_init(struct radeon_device *rdev,
/* Registers mapping */
/* TODO: block userspace mapping of io register */
+ spin_lock_init(&rdev->mmio_idx_lock);
rdev->rmmio_base = pci_resource_start(rdev->pdev, 2);
rdev->rmmio_size = pci_resource_len(rdev->pdev, 2);
rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size);
@@ -1163,6 +1192,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
struct drm_crtc *crtc;
struct drm_connector *connector;
int i, r;
+ bool force_completion = false;
if (dev == NULL || dev->dev_private == NULL) {
return -ENODEV;
@@ -1205,8 +1235,16 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
mutex_lock(&rdev->ring_lock);
/* wait for gpu to finish processing current batch */
- for (i = 0; i < RADEON_NUM_RINGS; i++)
- radeon_fence_wait_empty_locked(rdev, i);
+ for (i = 0; i < RADEON_NUM_RINGS; i++) {
+ r = radeon_fence_wait_empty_locked(rdev, i);
+ if (r) {
+ /* delay GPU reset to resume */
+ force_completion = true;
+ }
+ }
+ if (force_completion) {
+ radeon_fence_driver_force_completion(rdev);
+ }
mutex_unlock(&rdev->ring_lock);
radeon_save_bios_scratch_regs(rdev);
@@ -1337,7 +1375,6 @@ retry:
}
radeon_restore_bios_scratch_regs(rdev);
- drm_helper_resume_force_mode(rdev->ddev);
if (!r) {
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -1357,11 +1394,14 @@ retry:
}
}
} else {
+ radeon_fence_driver_force_completion(rdev);
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
kfree(ring_data[i]);
}
}
+ drm_helper_resume_force_mode(rdev->ddev);
+
ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
if (r) {
/* bad news, how to tell it to userspace ? */
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bfa2a6015727..1da2386d7cf7 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -378,8 +378,12 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
work->old_rbo = rbo;
obj = new_radeon_fb->obj;
rbo = gem_to_radeon_bo(obj);
+
+ spin_lock(&rbo->tbo.bdev->fence_lock);
if (rbo->tbo.sync_obj)
work->fence = radeon_fence_ref(rbo->tbo.sync_obj);
+ spin_unlock(&rbo->tbo.bdev->fence_lock);
+
INIT_WORK(&work->work, radeon_unpin_work_func);
/* We borrow the event spin lock for protecting unpin_work */
@@ -695,10 +699,15 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
if (radeon_connector->router.ddc_valid)
radeon_router_select_ddc_port(radeon_connector);
- if ((radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
- (radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) ||
- (radeon_connector_encoder_get_dp_bridge_encoder_id(&radeon_connector->base) !=
- ENCODER_OBJECT_ID_NONE)) {
+ if (radeon_connector_encoder_get_dp_bridge_encoder_id(&radeon_connector->base) !=
+ ENCODER_OBJECT_ID_NONE) {
+ struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
+
+ if (dig->dp_i2c_bus)
+ radeon_connector->edid = drm_get_edid(&radeon_connector->base,
+ &dig->dp_i2c_bus->adapter);
+ } else if ((radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
+ (radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)) {
struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
if ((dig->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT ||
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 07eb84e8a8a4..d9bf96ee299a 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -65,9 +65,14 @@
* 2.22.0 - r600 only: RESOLVE_BOX allowed
* 2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880
* 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures
+ * 2.25.0 - eg+: new info request for num SE and num SH
+ * 2.26.0 - r600-eg: fix htile size computation
+ * 2.27.0 - r600-SI: Add CS ioctl support for async DMA
+ * 2.28.0 - r600-eg: Add MEM_WRITE packet support
+ * 2.29.0 - R500 FP16 color clear registers
*/
#define KMS_DRIVER_MAJOR 2
-#define KMS_DRIVER_MINOR 24
+#define KMS_DRIVER_MINOR 29
#define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev);
@@ -281,12 +286,15 @@ static struct drm_driver driver_old = {
static struct drm_driver kms_driver;
-static void radeon_kick_out_firmware_fb(struct pci_dev *pdev)
+static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
{
struct apertures_struct *ap;
bool primary = false;
ap = alloc_apertures(1);
+ if (!ap)
+ return -ENOMEM;
+
ap->ranges[0].base = pci_resource_start(pdev, 0);
ap->ranges[0].size = pci_resource_len(pdev, 0);
@@ -295,13 +303,19 @@ static void radeon_kick_out_firmware_fb(struct pci_dev *pdev)
#endif
remove_conflicting_framebuffers(ap, "radeondrmfb", primary);
kfree(ap);
+
+ return 0;
}
-static int __devinit
-radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int radeon_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
{
+ int ret;
+
/* Get rid of things like offb */
- radeon_kick_out_firmware_fb(pdev);
+ ret = radeon_kick_out_firmware_fb(pdev);
+ if (ret)
+ return ret;
return drm_get_pci_dev(pdev, ent, &kms_driver);
}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index a1b59ca96d01..e7fdf163a8ca 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -366,7 +366,6 @@ extern int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file
extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv);
extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc);
extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base);
-extern u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr);
extern void radeon_freelist_reset(struct drm_device * dev);
extern struct drm_buf *radeon_freelist_get(struct drm_device * dev);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 22bd6c2c2740..34356252567a 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -609,26 +609,20 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
* Returns 0 if the fences have passed, error for all other cases.
* Caller must hold ring lock.
*/
-void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
{
uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
+ int r;
- while(1) {
- int r;
- r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+ r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+ if (r) {
if (r == -EDEADLK) {
- mutex_unlock(&rdev->ring_lock);
- r = radeon_gpu_reset(rdev);
- mutex_lock(&rdev->ring_lock);
- if (!r)
- continue;
- }
- if (r) {
- dev_err(rdev->dev, "error waiting for ring to become"
- " idle (%d)\n", r);
+ return -EDEADLK;
}
- return;
+ dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
+ ring, r);
}
+ return 0;
}
/**
@@ -772,7 +766,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
int r;
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
- if (rdev->wb.use_event) {
+ if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
rdev->fence_drv[ring].scratch_reg = 0;
index = R600_WB_EVENT_OFFSET + ring * 4;
} else {
@@ -854,13 +848,17 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
*/
void radeon_fence_driver_fini(struct radeon_device *rdev)
{
- int ring;
+ int ring, r;
mutex_lock(&rdev->ring_lock);
for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
if (!rdev->fence_drv[ring].initialized)
continue;
- radeon_fence_wait_empty_locked(rdev, ring);
+ r = radeon_fence_wait_empty_locked(rdev, ring);
+ if (r) {
+ /* no need to trigger GPU reset as we are unloading */
+ radeon_fence_driver_force_completion(rdev);
+ }
wake_up_all(&rdev->fence_queue);
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
rdev->fence_drv[ring].initialized = false;
@@ -868,6 +866,25 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
mutex_unlock(&rdev->ring_lock);
}
+/**
+ * radeon_fence_driver_force_completion - force all fence waiter to complete
+ *
+ * @rdev: radeon device pointer
+ *
+ * In case of GPU reset failure make sure no process keep waiting on fence
+ * that will never complete.
+ */
+void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+{
+ int ring;
+
+ for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
+ if (!rdev->fence_drv[ring].initialized)
+ continue;
+ radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
+ }
+}
+
/*
* Fence debugfs
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 4debd60e5aa6..6e24f84755b5 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -1237,7 +1237,6 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
{
struct radeon_bo_va *bo_va;
- BUG_ON(!atomic_read(&bo->tbo.reserved));
list_for_each_entry(bo_va, &bo->va, bo_list) {
bo_va->valid = false;
}
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index c5bddd630eb9..fc60b74ee304 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -39,7 +39,7 @@ extern u32 radeon_atom_hw_i2c_func(struct i2c_adapter *adap);
* radeon_ddc_probe
*
*/
-bool radeon_ddc_probe(struct radeon_connector *radeon_connector)
+bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool use_aux)
{
u8 out = 0x0;
u8 buf[8];
@@ -63,7 +63,13 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector)
if (radeon_connector->router.ddc_valid)
radeon_router_select_ddc_port(radeon_connector);
- ret = i2c_transfer(&radeon_connector->ddc_bus->adapter, msgs, 2);
+ if (use_aux) {
+ struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
+ ret = i2c_transfer(&dig->dp_i2c_bus->adapter, msgs, 2);
+ } else {
+ ret = i2c_transfer(&radeon_connector->ddc_bus->adapter, msgs, 2);
+ }
+
if (ret != 2)
/* Couldn't find an accessible DDC on this connector */
return false;
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index dc781c49b96b..9c312f9afb68 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -361,6 +361,22 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return -EINVAL;
}
break;
+ case RADEON_INFO_MAX_SE:
+ if (rdev->family >= CHIP_TAHITI)
+ value = rdev->config.si.max_shader_engines;
+ else if (rdev->family >= CHIP_CAYMAN)
+ value = rdev->config.cayman.max_shader_engines;
+ else if (rdev->family >= CHIP_CEDAR)
+ value = rdev->config.evergreen.num_ses;
+ else
+ value = 1;
+ break;
+ case RADEON_INFO_MAX_SH_PER_SE:
+ if (rdev->family >= CHIP_TAHITI)
+ value = rdev->config.si.max_sh_per_se;
+ else
+ return -EINVAL;
+ break;
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->request);
return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 5677a424b585..6857cb4efb76 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -295,6 +295,7 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode)
struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct radeon_device *rdev = dev->dev_private;
+ uint32_t crtc_ext_cntl = 0;
uint32_t mask;
if (radeon_crtc->crtc_id)
@@ -307,6 +308,16 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode)
RADEON_CRTC_VSYNC_DIS |
RADEON_CRTC_HSYNC_DIS);
+ /*
+ * On all dual CRTC GPUs this bit controls the CRTC of the primary DAC.
+ * Therefore it is set in the DAC DMPS function.
+ * This is different for GPU's with a single CRTC but a primary and a
+ * TV DAC: here it controls the single CRTC no matter where it is
+ * routed. Therefore we set it here.
+ */
+ if (rdev->flags & RADEON_SINGLE_CRTC)
+ crtc_ext_cntl = RADEON_CRTC_CRT_ON;
+
switch (mode) {
case DRM_MODE_DPMS_ON:
radeon_crtc->enabled = true;
@@ -317,7 +328,7 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode)
else {
WREG32_P(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_EN, ~(RADEON_CRTC_EN |
RADEON_CRTC_DISP_REQ_EN_B));
- WREG32_P(RADEON_CRTC_EXT_CNTL, 0, ~mask);
+ WREG32_P(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl, ~(mask | crtc_ext_cntl));
}
drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
radeon_crtc_load_lut(crtc);
@@ -331,7 +342,7 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode)
else {
WREG32_P(RADEON_CRTC_GEN_CNTL, RADEON_CRTC_DISP_REQ_EN_B, ~(RADEON_CRTC_EN |
RADEON_CRTC_DISP_REQ_EN_B));
- WREG32_P(RADEON_CRTC_EXT_CNTL, mask, ~mask);
+ WREG32_P(RADEON_CRTC_EXT_CNTL, mask, ~(mask | crtc_ext_cntl));
}
radeon_crtc->enabled = false;
/* adjust pm to dpms changes AFTER disabling crtcs */
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 0063df9d166d..62cd512f5c8d 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -537,7 +537,9 @@ static void radeon_legacy_primary_dac_dpms(struct drm_encoder *encoder, int mode
break;
}
- WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl);
+ /* handled in radeon_crtc_dpms() */
+ if (!(rdev->flags & RADEON_SINGLE_CRTC))
+ WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl);
WREG32(RADEON_DAC_CNTL, dac_cntl);
WREG32(RADEON_DAC_MACRO_CNTL, dac_macro_cntl);
@@ -638,6 +640,14 @@ static enum drm_connector_status radeon_legacy_primary_dac_detect(struct drm_enc
enum drm_connector_status found = connector_status_disconnected;
bool color = true;
+ /* just don't bother on RN50 those chip are often connected to remoting
+ * console hw and often we get failure to load detect those. So to make
+ * everyone happy report the encoder as always connected.
+ */
+ if (ASIC_IS_RN50(rdev)) {
+ return connector_status_connected;
+ }
+
/* save the regs we need */
vclk_ecp_cntl = RREG32_PLL(RADEON_VCLK_ECP_CNTL);
crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
@@ -662,6 +672,8 @@ static enum drm_connector_status radeon_legacy_primary_dac_detect(struct drm_enc
if (ASIC_IS_R300(rdev))
tmp |= (0x1b6 << RADEON_DAC_FORCE_DATA_SHIFT);
+ else if (ASIC_IS_RV100(rdev))
+ tmp |= (0x1ac << RADEON_DAC_FORCE_DATA_SHIFT);
else
tmp |= (0x180 << RADEON_DAC_FORCE_DATA_SHIFT);
@@ -671,6 +683,7 @@ static enum drm_connector_status radeon_legacy_primary_dac_detect(struct drm_enc
tmp |= RADEON_DAC_RANGE_CNTL_PS2 | RADEON_DAC_CMP_EN;
WREG32(RADEON_DAC_CNTL, tmp);
+ tmp = dac_macro_cntl;
tmp &= ~(RADEON_DAC_PDWN_R |
RADEON_DAC_PDWN_G |
RADEON_DAC_PDWN_B);
@@ -1092,7 +1105,8 @@ static void radeon_legacy_tv_dac_dpms(struct drm_encoder *encoder, int mode)
} else {
if (is_tv)
WREG32(RADEON_TV_MASTER_CNTL, tv_master_cntl);
- else
+ /* handled in radeon_crtc_dpms() */
+ else if (!(rdev->flags & RADEON_SINGLE_CRTC))
WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
}
@@ -1416,13 +1430,104 @@ static bool radeon_legacy_tv_detect(struct drm_encoder *encoder,
return found;
}
+static bool radeon_legacy_ext_dac_detect(struct drm_encoder *encoder,
+ struct drm_connector *connector)
+{
+ struct drm_device *dev = encoder->dev;
+ struct radeon_device *rdev = dev->dev_private;
+ uint32_t gpio_monid, fp2_gen_cntl, disp_output_cntl, crtc2_gen_cntl;
+ uint32_t disp_lin_trans_grph_a, disp_lin_trans_grph_b, disp_lin_trans_grph_c;
+ uint32_t disp_lin_trans_grph_d, disp_lin_trans_grph_e, disp_lin_trans_grph_f;
+ uint32_t tmp, crtc2_h_total_disp, crtc2_v_total_disp;
+ uint32_t crtc2_h_sync_strt_wid, crtc2_v_sync_strt_wid;
+ bool found = false;
+ int i;
+
+ /* save the regs we need */
+ gpio_monid = RREG32(RADEON_GPIO_MONID);
+ fp2_gen_cntl = RREG32(RADEON_FP2_GEN_CNTL);
+ disp_output_cntl = RREG32(RADEON_DISP_OUTPUT_CNTL);
+ crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+ disp_lin_trans_grph_a = RREG32(RADEON_DISP_LIN_TRANS_GRPH_A);
+ disp_lin_trans_grph_b = RREG32(RADEON_DISP_LIN_TRANS_GRPH_B);
+ disp_lin_trans_grph_c = RREG32(RADEON_DISP_LIN_TRANS_GRPH_C);
+ disp_lin_trans_grph_d = RREG32(RADEON_DISP_LIN_TRANS_GRPH_D);
+ disp_lin_trans_grph_e = RREG32(RADEON_DISP_LIN_TRANS_GRPH_E);
+ disp_lin_trans_grph_f = RREG32(RADEON_DISP_LIN_TRANS_GRPH_F);
+ crtc2_h_total_disp = RREG32(RADEON_CRTC2_H_TOTAL_DISP);
+ crtc2_v_total_disp = RREG32(RADEON_CRTC2_V_TOTAL_DISP);
+ crtc2_h_sync_strt_wid = RREG32(RADEON_CRTC2_H_SYNC_STRT_WID);
+ crtc2_v_sync_strt_wid = RREG32(RADEON_CRTC2_V_SYNC_STRT_WID);
+
+ tmp = RREG32(RADEON_GPIO_MONID);
+ tmp &= ~RADEON_GPIO_A_0;
+ WREG32(RADEON_GPIO_MONID, tmp);
+
+ WREG32(RADEON_FP2_GEN_CNTL, (RADEON_FP2_ON |
+ RADEON_FP2_PANEL_FORMAT |
+ R200_FP2_SOURCE_SEL_TRANS_UNIT |
+ RADEON_FP2_DVO_EN |
+ R200_FP2_DVO_RATE_SEL_SDR));
+
+ WREG32(RADEON_DISP_OUTPUT_CNTL, (RADEON_DISP_DAC_SOURCE_RMX |
+ RADEON_DISP_TRANS_MATRIX_GRAPHICS));
+
+ WREG32(RADEON_CRTC2_GEN_CNTL, (RADEON_CRTC2_EN |
+ RADEON_CRTC2_DISP_REQ_EN_B));
+
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_A, 0x00000000);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_B, 0x000003f0);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_C, 0x00000000);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_D, 0x000003f0);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_E, 0x00000000);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_F, 0x000003f0);
+
+ WREG32(RADEON_CRTC2_H_TOTAL_DISP, 0x01000008);
+ WREG32(RADEON_CRTC2_H_SYNC_STRT_WID, 0x00000800);
+ WREG32(RADEON_CRTC2_V_TOTAL_DISP, 0x00080001);
+ WREG32(RADEON_CRTC2_V_SYNC_STRT_WID, 0x00000080);
+
+ for (i = 0; i < 200; i++) {
+ tmp = RREG32(RADEON_GPIO_MONID);
+ if (tmp & RADEON_GPIO_Y_0)
+ found = true;
+
+ if (found)
+ break;
+
+ if (!drm_can_sleep())
+ mdelay(1);
+ else
+ msleep(1);
+ }
+
+ /* restore the regs we used */
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_A, disp_lin_trans_grph_a);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_B, disp_lin_trans_grph_b);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_C, disp_lin_trans_grph_c);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_D, disp_lin_trans_grph_d);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_E, disp_lin_trans_grph_e);
+ WREG32(RADEON_DISP_LIN_TRANS_GRPH_F, disp_lin_trans_grph_f);
+ WREG32(RADEON_CRTC2_H_TOTAL_DISP, crtc2_h_total_disp);
+ WREG32(RADEON_CRTC2_V_TOTAL_DISP, crtc2_v_total_disp);
+ WREG32(RADEON_CRTC2_H_SYNC_STRT_WID, crtc2_h_sync_strt_wid);
+ WREG32(RADEON_CRTC2_V_SYNC_STRT_WID, crtc2_v_sync_strt_wid);
+ WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
+ WREG32(RADEON_DISP_OUTPUT_CNTL, disp_output_cntl);
+ WREG32(RADEON_FP2_GEN_CNTL, fp2_gen_cntl);
+ WREG32(RADEON_GPIO_MONID, gpio_monid);
+
+ return found;
+}
+
static enum drm_connector_status radeon_legacy_tv_dac_detect(struct drm_encoder *encoder,
struct drm_connector *connector)
{
struct drm_device *dev = encoder->dev;
struct radeon_device *rdev = dev->dev_private;
- uint32_t crtc2_gen_cntl, tv_dac_cntl, dac_cntl2, dac_ext_cntl;
- uint32_t disp_hw_debug, disp_output_cntl, gpiopad_a, pixclks_cntl, tmp;
+ uint32_t crtc2_gen_cntl = 0, tv_dac_cntl, dac_cntl2, dac_ext_cntl;
+ uint32_t gpiopad_a = 0, pixclks_cntl, tmp;
+ uint32_t disp_output_cntl = 0, disp_hw_debug = 0, crtc_ext_cntl = 0;
enum drm_connector_status found = connector_status_disconnected;
struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
struct radeon_encoder_tv_dac *tv_dac = radeon_encoder->enc_priv;
@@ -1459,12 +1564,27 @@ static enum drm_connector_status radeon_legacy_tv_dac_detect(struct drm_encoder
return connector_status_disconnected;
}
+ /* R200 uses an external DAC for secondary DAC */
+ if (rdev->family == CHIP_R200) {
+ if (radeon_legacy_ext_dac_detect(encoder, connector))
+ found = connector_status_connected;
+ return found;
+ }
+
/* save the regs we need */
pixclks_cntl = RREG32_PLL(RADEON_PIXCLKS_CNTL);
- gpiopad_a = ASIC_IS_R300(rdev) ? RREG32(RADEON_GPIOPAD_A) : 0;
- disp_output_cntl = ASIC_IS_R300(rdev) ? RREG32(RADEON_DISP_OUTPUT_CNTL) : 0;
- disp_hw_debug = ASIC_IS_R300(rdev) ? 0 : RREG32(RADEON_DISP_HW_DEBUG);
- crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+
+ if (rdev->flags & RADEON_SINGLE_CRTC) {
+ crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
+ } else {
+ if (ASIC_IS_R300(rdev)) {
+ gpiopad_a = RREG32(RADEON_GPIOPAD_A);
+ disp_output_cntl = RREG32(RADEON_DISP_OUTPUT_CNTL);
+ } else {
+ disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
+ }
+ crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
+ }
tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
dac_ext_cntl = RREG32(RADEON_DAC_EXT_CNTL);
dac_cntl2 = RREG32(RADEON_DAC_CNTL2);
@@ -1473,22 +1593,24 @@ static enum drm_connector_status radeon_legacy_tv_dac_detect(struct drm_encoder
| RADEON_PIX2CLK_DAC_ALWAYS_ONb);
WREG32_PLL(RADEON_PIXCLKS_CNTL, tmp);
- if (ASIC_IS_R300(rdev))
- WREG32_P(RADEON_GPIOPAD_A, 1, ~1);
-
- tmp = crtc2_gen_cntl & ~RADEON_CRTC2_PIX_WIDTH_MASK;
- tmp |= RADEON_CRTC2_CRT2_ON |
- (2 << RADEON_CRTC2_PIX_WIDTH_SHIFT);
-
- WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
-
- if (ASIC_IS_R300(rdev)) {
- tmp = disp_output_cntl & ~RADEON_DISP_TVDAC_SOURCE_MASK;
- tmp |= RADEON_DISP_TVDAC_SOURCE_CRTC2;
- WREG32(RADEON_DISP_OUTPUT_CNTL, tmp);
+ if (rdev->flags & RADEON_SINGLE_CRTC) {
+ tmp = crtc_ext_cntl | RADEON_CRTC_CRT_ON;
+ WREG32(RADEON_CRTC_EXT_CNTL, tmp);
} else {
- tmp = disp_hw_debug & ~RADEON_CRT2_DISP1_SEL;
- WREG32(RADEON_DISP_HW_DEBUG, tmp);
+ tmp = crtc2_gen_cntl & ~RADEON_CRTC2_PIX_WIDTH_MASK;
+ tmp |= RADEON_CRTC2_CRT2_ON |
+ (2 << RADEON_CRTC2_PIX_WIDTH_SHIFT);
+ WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
+
+ if (ASIC_IS_R300(rdev)) {
+ WREG32_P(RADEON_GPIOPAD_A, 1, ~1);
+ tmp = disp_output_cntl & ~RADEON_DISP_TVDAC_SOURCE_MASK;
+ tmp |= RADEON_DISP_TVDAC_SOURCE_CRTC2;
+ WREG32(RADEON_DISP_OUTPUT_CNTL, tmp);
+ } else {
+ tmp = disp_hw_debug & ~RADEON_CRT2_DISP1_SEL;
+ WREG32(RADEON_DISP_HW_DEBUG, tmp);
+ }
}
tmp = RADEON_TV_DAC_NBLANK |
@@ -1530,14 +1652,19 @@ static enum drm_connector_status radeon_legacy_tv_dac_detect(struct drm_encoder
WREG32(RADEON_DAC_CNTL2, dac_cntl2);
WREG32(RADEON_DAC_EXT_CNTL, dac_ext_cntl);
WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
- WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
- if (ASIC_IS_R300(rdev)) {
- WREG32(RADEON_DISP_OUTPUT_CNTL, disp_output_cntl);
- WREG32_P(RADEON_GPIOPAD_A, gpiopad_a, ~1);
+ if (rdev->flags & RADEON_SINGLE_CRTC) {
+ WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl);
} else {
- WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
+ WREG32(RADEON_CRTC2_GEN_CNTL, crtc2_gen_cntl);
+ if (ASIC_IS_R300(rdev)) {
+ WREG32(RADEON_DISP_OUTPUT_CNTL, disp_output_cntl);
+ WREG32_P(RADEON_GPIOPAD_A, gpiopad_a, ~1);
+ } else {
+ WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
+ }
}
+
WREG32_PLL(RADEON_PIXCLKS_CNTL, pixclks_cntl);
return found;
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 92c5f473cf08..4003f5a68c09 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -209,7 +209,8 @@ enum radeon_connector_table {
CT_RN50_POWER,
CT_MAC_X800,
CT_MAC_G5_9600,
- CT_SAM440EP
+ CT_SAM440EP,
+ CT_MAC_G4_SILVER
};
enum radeon_dvo_chip {
@@ -427,7 +428,7 @@ struct radeon_connector_atom_dig {
uint32_t igp_lane_info;
/* displayport */
struct radeon_i2c_chan *dp_i2c_bus;
- u8 dpcd[8];
+ u8 dpcd[DP_RECEIVER_CAP_SIZE];
u8 dp_sink_type;
int dp_clock;
int dp_lane_count;
@@ -558,7 +559,7 @@ extern void radeon_i2c_put_byte(struct radeon_i2c_chan *i2c,
u8 val);
extern void radeon_router_select_ddc_port(struct radeon_connector *radeon_connector);
extern void radeon_router_select_cd_port(struct radeon_connector *radeon_connector);
-extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector);
+extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool use_aux);
extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector);
extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index b91118ccef86..d3aface2d12d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -88,10 +88,20 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
if (domain & RADEON_GEM_DOMAIN_VRAM)
rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
TTM_PL_FLAG_VRAM;
- if (domain & RADEON_GEM_DOMAIN_GTT)
- rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
- if (domain & RADEON_GEM_DOMAIN_CPU)
- rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+ if (domain & RADEON_GEM_DOMAIN_GTT) {
+ if (rbo->rdev->flags & RADEON_IS_AGP) {
+ rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
+ } else {
+ rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+ }
+ }
+ if (domain & RADEON_GEM_DOMAIN_CPU) {
+ if (rbo->rdev->flags & RADEON_IS_AGP) {
+ rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM;
+ } else {
+ rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
+ }
+ }
if (!c)
rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
rbo->placement.num_placement = c;
@@ -140,7 +150,7 @@ int radeon_bo_create(struct radeon_device *rdev,
/* Kernel allocation are uninterruptible */
down_read(&rdev->pm.mclk_lock);
r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
- &bo->placement, page_align, 0, !kernel, NULL,
+ &bo->placement, page_align, !kernel, NULL,
acc_size, sg, &radeon_ttm_bo_destroy);
up_read(&rdev->pm.mclk_lock);
if (unlikely(r != 0)) {
@@ -240,7 +250,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
}
for (i = 0; i < bo->placement.num_placement; i++)
bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
- r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
if (likely(r == 0)) {
bo->pin_count = 1;
if (gpu_addr != NULL)
@@ -269,7 +279,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
return 0;
for (i = 0; i < bo->placement.num_placement; i++)
bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
- r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
if (unlikely(r != 0))
dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
return r;
@@ -355,7 +365,7 @@ int radeon_bo_list_validate(struct list_head *head)
retry:
radeon_ttm_placement_from_domain(bo, domain);
r = ttm_bo_validate(&bo->tbo, &bo->placement,
- true, false, false);
+ true, false);
if (unlikely(r)) {
if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
domain |= RADEON_GEM_DOMAIN_GTT;
@@ -384,7 +394,7 @@ int radeon_bo_get_surface_reg(struct radeon_bo *bo)
int steal;
int i;
- BUG_ON(!atomic_read(&bo->tbo.reserved));
+ BUG_ON(!radeon_bo_is_reserved(bo));
if (!bo->tiling_flags)
return 0;
@@ -510,7 +520,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
uint32_t *tiling_flags,
uint32_t *pitch)
{
- BUG_ON(!atomic_read(&bo->tbo.reserved));
+ BUG_ON(!radeon_bo_is_reserved(bo));
if (tiling_flags)
*tiling_flags = bo->tiling_flags;
if (pitch)
@@ -520,7 +530,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
bool force_drop)
{
- BUG_ON(!atomic_read(&bo->tbo.reserved));
+ BUG_ON(!radeon_bo_is_reserved(bo) && !force_drop);
if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
return 0;
@@ -575,7 +585,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* hurrah the memory is not visible ! */
radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
- r = ttm_bo_validate(bo, &rbo->placement, false, true, false);
+ r = ttm_bo_validate(bo, &rbo->placement, false, false);
if (unlikely(r != 0))
return r;
offset = bo->mem.start << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 93cd491fff2e..5fc86b03043b 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -80,7 +80,7 @@ static inline unsigned long radeon_bo_size(struct radeon_bo *bo)
static inline bool radeon_bo_is_reserved(struct radeon_bo *bo)
{
- return !!atomic_read(&bo->tbo.reserved);
+ return ttm_bo_is_reserved(&bo->tbo);
}
static inline unsigned radeon_bo_ngpu_pages(struct radeon_bo *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index aa14dbb7e4fb..0bfa656aa87d 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -234,7 +234,7 @@ static void radeon_set_power_state(struct radeon_device *rdev)
static void radeon_pm_set_clocks(struct radeon_device *rdev)
{
- int i;
+ int i, r;
/* no need to take locks, etc. if nothing's going to change */
if ((rdev->pm.requested_clock_mode_index == rdev->pm.current_clock_mode_index) &&
@@ -248,8 +248,17 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
/* wait for the rings to drain */
for (i = 0; i < RADEON_NUM_RINGS; i++) {
struct radeon_ring *ring = &rdev->ring[i];
- if (ring->ready)
- radeon_fence_wait_empty_locked(rdev, i);
+ if (!ring->ready) {
+ continue;
+ }
+ r = radeon_fence_wait_empty_locked(rdev, i);
+ if (r) {
+ /* needs a GPU reset dont reset here */
+ mutex_unlock(&rdev->ring_lock);
+ up_write(&rdev->pm.mclk_lock);
+ mutex_unlock(&rdev->ddev->struct_mutex);
+ return;
+ }
}
radeon_unmap_vram_bos(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index e09521858f64..26c23bb651c6 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -194,6 +194,7 @@ struct drm_gem_object *radeon_gem_prime_import(struct drm_device *dev,
bo = dma_buf->priv;
if (bo->gem_base.dev == dev) {
drm_gem_object_reference(&bo->gem_base);
+ dma_buf_put(dma_buf);
return &bo->gem_base;
}
}
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index ebd69562ef6c..2430d80b1871 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -770,22 +770,30 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
int ridx = *(int*)node->info_ent->data;
struct radeon_ring *ring = &rdev->ring[ridx];
unsigned count, i, j;
+ u32 tmp;
radeon_ring_free_size(rdev, ring);
count = (ring->ring_size / 4) - ring->ring_free_dw;
- seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg));
- seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg));
+ tmp = RREG32(ring->wptr_reg) >> ring->ptr_reg_shift;
+ seq_printf(m, "wptr(0x%04x): 0x%08x [%5d]\n", ring->wptr_reg, tmp, tmp);
+ tmp = RREG32(ring->rptr_reg) >> ring->ptr_reg_shift;
+ seq_printf(m, "rptr(0x%04x): 0x%08x [%5d]\n", ring->rptr_reg, tmp, tmp);
if (ring->rptr_save_reg) {
seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg,
RREG32(ring->rptr_save_reg));
}
- seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr);
- seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr);
+ seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", ring->wptr, ring->wptr);
+ seq_printf(m, "driver's copy of the rptr: 0x%08x [%5d]\n", ring->rptr, ring->rptr);
+ seq_printf(m, "last semaphore signal addr : 0x%016llx\n", ring->last_semaphore_signal_addr);
+ seq_printf(m, "last semaphore wait addr : 0x%016llx\n", ring->last_semaphore_wait_addr);
seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
seq_printf(m, "%u dwords in ring\n", count);
- i = ring->rptr;
- for (j = 0; j <= count; j++) {
- seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
+ /* print 8 dw before current rptr as often it's the last executed
+ * packet that is the root issue
+ */
+ i = (ring->rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
+ for (j = 0; j <= (count + 32); j++) {
+ seq_printf(m, "r[%5d]=0x%08x\n", i, ring->ring[i]);
i = (i + 1) & ring->ptr_mask;
}
return 0;
@@ -794,11 +802,15 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
+static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX;
+static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
static struct drm_info_list radeon_debugfs_ring_info_list[] = {
{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index},
{"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index},
{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index},
+ {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index},
+ {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index},
};
static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 97f3ece81cd2..8dcc20f53d73 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -95,6 +95,10 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
/* we assume caller has already allocated space on waiters ring */
radeon_semaphore_emit_wait(rdev, waiter, semaphore);
+ /* for debugging lockup only, used by sysfs debug files */
+ rdev->ring[signaler].last_semaphore_signal_addr = semaphore->gpu_addr;
+ rdev->ring[waiter].last_semaphore_wait_addr = semaphore->gpu_addr;
+
return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 587c09a00ba2..fda09c9ea689 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -26,16 +26,31 @@
#include "radeon_reg.h"
#include "radeon.h"
+#define RADEON_TEST_COPY_BLIT 1
+#define RADEON_TEST_COPY_DMA 0
+
/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
-void radeon_test_moves(struct radeon_device *rdev)
+static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
{
struct radeon_bo *vram_obj = NULL;
struct radeon_bo **gtt_obj = NULL;
struct radeon_fence *fence = NULL;
uint64_t gtt_addr, vram_addr;
unsigned i, n, size;
- int r;
+ int r, ring;
+
+ switch (flag) {
+ case RADEON_TEST_COPY_DMA:
+ ring = radeon_copy_dma_ring_index(rdev);
+ break;
+ case RADEON_TEST_COPY_BLIT:
+ ring = radeon_copy_blit_ring_index(rdev);
+ break;
+ default:
+ DRM_ERROR("Unknown copy method\n");
+ return;
+ }
size = 1024 * 1024;
@@ -106,7 +121,10 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(gtt_obj[i]);
- r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+ if (ring == R600_RING_TYPE_DMA_INDEX)
+ r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+ else
+ r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
if (r) {
DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
goto out_cleanup;
@@ -149,7 +167,10 @@ void radeon_test_moves(struct radeon_device *rdev)
radeon_bo_kunmap(vram_obj);
- r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+ if (ring == R600_RING_TYPE_DMA_INDEX)
+ r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+ else
+ r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
if (r) {
DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
goto out_cleanup;
@@ -223,6 +244,14 @@ out_cleanup:
}
}
+void radeon_test_moves(struct radeon_device *rdev)
+{
+ if (rdev->asic->copy.dma)
+ radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA);
+ if (rdev->asic->copy.blit)
+ radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
+}
+
void radeon_test_ring_sync(struct radeon_device *rdev,
struct radeon_ring *ringA,
struct radeon_ring *ringB)
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 5ebe1b3e5db2..1d8ff2f850ba 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -216,7 +216,7 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
}
static int radeon_move_blit(struct ttm_buffer_object *bo,
- bool evict, int no_wait_reserve, bool no_wait_gpu,
+ bool evict, bool no_wait_gpu,
struct ttm_mem_reg *new_mem,
struct ttm_mem_reg *old_mem)
{
@@ -265,15 +265,15 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
&fence);
/* FIXME: handle copy error */
- r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
- evict, no_wait_reserve, no_wait_gpu, new_mem);
+ r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
+ evict, no_wait_gpu, new_mem);
radeon_fence_unref(&fence);
return r;
}
static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
bool evict, bool interruptible,
- bool no_wait_reserve, bool no_wait_gpu,
+ bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
{
struct radeon_device *rdev;
@@ -294,7 +294,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
placement.busy_placement = &placements;
placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
- interruptible, no_wait_reserve, no_wait_gpu);
+ interruptible, no_wait_gpu);
if (unlikely(r)) {
return r;
}
@@ -308,11 +308,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
if (unlikely(r)) {
goto out_cleanup;
}
- r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem, old_mem);
+ r = radeon_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
- r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
+ r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
out_cleanup:
ttm_bo_mem_put(bo, &tmp_mem);
return r;
@@ -320,7 +320,7 @@ out_cleanup:
static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
bool evict, bool interruptible,
- bool no_wait_reserve, bool no_wait_gpu,
+ bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
{
struct radeon_device *rdev;
@@ -340,15 +340,16 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
- r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu);
+ r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
+ interruptible, no_wait_gpu);
if (unlikely(r)) {
return r;
}
- r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
+ r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
if (unlikely(r)) {
goto out_cleanup;
}
- r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+ r = radeon_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
@@ -359,7 +360,7 @@ out_cleanup:
static int radeon_bo_move(struct ttm_buffer_object *bo,
bool evict, bool interruptible,
- bool no_wait_reserve, bool no_wait_gpu,
+ bool no_wait_gpu,
struct ttm_mem_reg *new_mem)
{
struct radeon_device *rdev;
@@ -388,18 +389,18 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
if (old_mem->mem_type == TTM_PL_VRAM &&
new_mem->mem_type == TTM_PL_SYSTEM) {
r = radeon_move_vram_ram(bo, evict, interruptible,
- no_wait_reserve, no_wait_gpu, new_mem);
+ no_wait_gpu, new_mem);
} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
new_mem->mem_type == TTM_PL_VRAM) {
r = radeon_move_ram_vram(bo, evict, interruptible,
- no_wait_reserve, no_wait_gpu, new_mem);
+ no_wait_gpu, new_mem);
} else {
- r = radeon_move_blit(bo, evict, no_wait_reserve, no_wait_gpu, new_mem, old_mem);
+ r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
}
if (r) {
memcpy:
- r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+ r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
}
return r;
}
@@ -471,13 +472,12 @@ static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
{
}
-static int radeon_sync_obj_wait(void *sync_obj, void *sync_arg,
- bool lazy, bool interruptible)
+static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
{
return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
}
-static int radeon_sync_obj_flush(void *sync_obj, void *sync_arg)
+static int radeon_sync_obj_flush(void *sync_obj)
{
return 0;
}
@@ -492,7 +492,7 @@ static void *radeon_sync_obj_ref(void *sync_obj)
return radeon_fence_ref((struct radeon_fence *)sync_obj);
}
-static bool radeon_sync_obj_signaled(void *sync_obj, void *sync_arg)
+static bool radeon_sync_obj_signaled(void *sync_obj)
{
return radeon_fence_signaled((struct radeon_fence *)sync_obj);
}
diff --git a/drivers/gpu/drm/radeon/reg_srcs/rv515 b/drivers/gpu/drm/radeon/reg_srcs/rv515
index 911a8fbd32bb..78d5e99d759d 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/rv515
+++ b/drivers/gpu/drm/radeon/reg_srcs/rv515
@@ -324,6 +324,8 @@ rv515 0x6d40
0x46AC US_OUT_FMT_2
0x46B0 US_OUT_FMT_3
0x46B4 US_W_FMT
+0x46C0 RB3D_COLOR_CLEAR_VALUE_AR
+0x46C4 RB3D_COLOR_CLEAR_VALUE_GB
0x4BC0 FG_FOG_BLEND
0x4BC4 FG_FOG_FACTOR
0x4BC8 FG_FOG_COLOR_R
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 785d09590b24..2bb6d0e84b3d 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -40,6 +40,12 @@ static int rv515_debugfs_ga_info_init(struct radeon_device *rdev);
static void rv515_gpu_init(struct radeon_device *rdev);
int rv515_mc_wait_for_idle(struct radeon_device *rdev);
+static const u32 crtc_offsets[2] =
+{
+ 0,
+ AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL
+};
+
void rv515_debugfs(struct radeon_device *rdev)
{
if (r100_debugfs_rbbm_init(rdev)) {
@@ -281,30 +287,114 @@ static int rv515_debugfs_ga_info_init(struct radeon_device *rdev)
void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save)
{
+ u32 crtc_enabled, tmp, frame_count, blackout;
+ int i, j;
+
save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL);
save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL);
- /* Stop all video */
- WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
+ /* disable VGA render */
WREG32(R_000300_VGA_RENDER_CONTROL, 0);
- WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1);
- WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1);
- WREG32(R_006080_D1CRTC_CONTROL, 0);
- WREG32(R_006880_D2CRTC_CONTROL, 0);
- WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0);
- WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
- WREG32(R_000330_D1VGA_CONTROL, 0);
- WREG32(R_000338_D2VGA_CONTROL, 0);
+ /* blank the display controllers */
+ for (i = 0; i < rdev->num_crtc; i++) {
+ crtc_enabled = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]) & AVIVO_CRTC_EN;
+ if (crtc_enabled) {
+ save->crtc_enabled[i] = true;
+ tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]);
+ if (!(tmp & AVIVO_CRTC_DISP_READ_REQUEST_DISABLE)) {
+ radeon_wait_for_vblank(rdev, i);
+ tmp |= AVIVO_CRTC_DISP_READ_REQUEST_DISABLE;
+ WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp);
+ }
+ /* wait for the next frame */
+ frame_count = radeon_get_vblank_counter(rdev, i);
+ for (j = 0; j < rdev->usec_timeout; j++) {
+ if (radeon_get_vblank_counter(rdev, i) != frame_count)
+ break;
+ udelay(1);
+ }
+ } else {
+ save->crtc_enabled[i] = false;
+ }
+ }
+
+ radeon_mc_wait_for_idle(rdev);
+
+ if (rdev->family >= CHIP_R600) {
+ if (rdev->family >= CHIP_RV770)
+ blackout = RREG32(R700_MC_CITF_CNTL);
+ else
+ blackout = RREG32(R600_CITF_CNTL);
+ if ((blackout & R600_BLACKOUT_MASK) != R600_BLACKOUT_MASK) {
+ /* Block CPU access */
+ WREG32(R600_BIF_FB_EN, 0);
+ /* blackout the MC */
+ blackout |= R600_BLACKOUT_MASK;
+ if (rdev->family >= CHIP_RV770)
+ WREG32(R700_MC_CITF_CNTL, blackout);
+ else
+ WREG32(R600_CITF_CNTL, blackout);
+ }
+ }
}
void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save)
{
- WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start);
- WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start);
- WREG32(R_006910_D2GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start);
- WREG32(R_006918_D2GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start);
- WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start);
- /* Unlock host access */
+ u32 tmp, frame_count;
+ int i, j;
+
+ /* update crtc base addresses */
+ for (i = 0; i < rdev->num_crtc; i++) {
+ if (rdev->family >= CHIP_RV770) {
+ if (i == 1) {
+ WREG32(R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
+ upper_32_bits(rdev->mc.vram_start));
+ WREG32(R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
+ upper_32_bits(rdev->mc.vram_start));
+ } else {
+ WREG32(R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
+ upper_32_bits(rdev->mc.vram_start));
+ WREG32(R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
+ upper_32_bits(rdev->mc.vram_start));
+ }
+ }
+ WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
+ (u32)rdev->mc.vram_start);
+ WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
+ (u32)rdev->mc.vram_start);
+ }
+ WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, (u32)rdev->mc.vram_start);
+
+ if (rdev->family >= CHIP_R600) {
+ /* unblackout the MC */
+ if (rdev->family >= CHIP_RV770)
+ tmp = RREG32(R700_MC_CITF_CNTL);
+ else
+ tmp = RREG32(R600_CITF_CNTL);
+ tmp &= ~R600_BLACKOUT_MASK;
+ if (rdev->family >= CHIP_RV770)
+ WREG32(R700_MC_CITF_CNTL, tmp);
+ else
+ WREG32(R600_CITF_CNTL, tmp);
+ /* allow CPU access */
+ WREG32(R600_BIF_FB_EN, R600_FB_READ_EN | R600_FB_WRITE_EN);
+ }
+
+ for (i = 0; i < rdev->num_crtc; i++) {
+ if (save->crtc_enabled[i]) {
+ tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]);
+ tmp &= ~AVIVO_CRTC_DISP_READ_REQUEST_DISABLE;
+ WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp);
+ /* wait for the next frame */
+ frame_count = radeon_get_vblank_counter(rdev, i);
+ for (j = 0; j < rdev->usec_timeout; j++) {
+ if (radeon_get_vblank_counter(rdev, i) != frame_count)
+ break;
+ udelay(1);
+ }
+ }
+ }
+ /* Unlock vga access */
WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control);
mdelay(1);
WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 79814a08c8e5..1b2444f4d8f4 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0);
+ rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
}
static int rv770_cp_load_microcode(struct radeon_device *rdev)
@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev)
WREG32(GB_TILING_CONFIG, gb_tiling_config);
WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
+ WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
+ WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
WREG32(CGTS_SYS_TCC_DISABLE, 0);
WREG32(CGTS_TCC_DISABLE, 0);
@@ -884,9 +887,83 @@ static int rv770_mc_init(struct radeon_device *rdev)
return 0;
}
+/**
+ * rv770_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (r7xx).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int rv770_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence)
+{
+ struct radeon_semaphore *sem = NULL;
+ int ring_index = rdev->asic->copy.dma_ring_index;
+ struct radeon_ring *ring = &rdev->ring[ring_index];
+ u32 size_in_dw, cur_size_in_dw;
+ int i, num_loops;
+ int r = 0;
+
+ r = radeon_semaphore_create(rdev, &sem);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ return r;
+ }
+
+ size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+ num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
+ r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ radeon_semaphore_free(rdev, &sem, NULL);
+ return r;
+ }
+
+ if (radeon_fence_need_sync(*fence, ring->idx)) {
+ radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+ ring->idx);
+ radeon_fence_note_sync(*fence, ring->idx);
+ } else {
+ radeon_semaphore_free(rdev, &sem, NULL);
+ }
+
+ for (i = 0; i < num_loops; i++) {
+ cur_size_in_dw = size_in_dw;
+ if (cur_size_in_dw > 0xFFFF)
+ cur_size_in_dw = 0xFFFF;
+ size_in_dw -= cur_size_in_dw;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+ radeon_ring_write(ring, dst_offset & 0xfffffffc);
+ radeon_ring_write(ring, src_offset & 0xfffffffc);
+ radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+ radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+ src_offset += cur_size_in_dw * 4;
+ dst_offset += cur_size_in_dw * 4;
+ }
+
+ r = radeon_fence_emit(rdev, fence, ring->idx);
+ if (r) {
+ radeon_ring_unlock_undo(rdev, ring);
+ return r;
+ }
+
+ radeon_ring_unlock_commit(rdev, ring);
+ radeon_semaphore_free(rdev, &sem, *fence);
+
+ return r;
+}
+
static int rv770_startup(struct radeon_device *rdev)
{
- struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+ struct radeon_ring *ring;
int r;
/* enable pcie gen2 link */
@@ -932,6 +1009,12 @@ static int rv770_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = r600_irq_init(rdev);
if (r) {
@@ -941,11 +1024,20 @@ static int rv770_startup(struct radeon_device *rdev)
}
r600_irq_set(rdev);
+ ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
R600_CP_RB_RPTR, R600_CP_RB_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
if (r)
return r;
+
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR, DMA_RB_WPTR,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+ if (r)
+ return r;
+
r = rv770_cp_load_microcode(rdev);
if (r)
return r;
@@ -953,6 +1045,10 @@ static int rv770_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = r600_dma_resume(rdev);
+ if (r)
+ return r;
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -995,7 +1091,7 @@ int rv770_suspend(struct radeon_device *rdev)
{
r600_audio_fini(rdev);
r700_cp_stop(rdev);
- rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+ r600_dma_stop(rdev);
r600_irq_suspend(rdev);
radeon_wb_disable(rdev);
rv770_pcie_gart_disable(rdev);
@@ -1066,6 +1162,9 @@ int rv770_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
+ rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
+ r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -1078,6 +1177,7 @@ int rv770_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
r700_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
@@ -1093,6 +1193,7 @@ void rv770_fini(struct radeon_device *rdev)
{
r600_blit_fini(rdev);
r700_cp_fini(rdev);
+ r600_dma_fini(rdev);
r600_irq_fini(rdev);
radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index b0adfc595d75..20e29d23d348 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -109,6 +109,9 @@
#define PIPE_TILING__SHIFT 1
#define PIPE_TILING__MASK 0x0000000e
+#define DMA_TILING_CONFIG 0x3ec8
+#define DMA_TILING_CONFIG2 0xd0b8
+
#define GC_USER_SHADER_PIPE_CONFIG 0x8954
#define INACTIVE_QD_PIPES(x) ((x) << 8)
#define INACTIVE_QD_PIPES_MASK 0x0000FF00
@@ -358,6 +361,26 @@
#define WAIT_UNTIL 0x8040
+/* async DMA */
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+/* async DMA packets */
+#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFF) << 0))
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_NOP 0xf
+
+
#define SRBM_STATUS 0x0E50
/* DCE 3.2 HDMI */
@@ -551,6 +574,54 @@
#define HDMI_OFFSET0 (0x7400 - 0x7400)
#define HDMI_OFFSET1 (0x7800 - 0x7400)
+/* DCE3.2 ELD audio interface */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x71c8 /* LPCM */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x71cc /* AC3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x71d0 /* MPEG1 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x71d4 /* MP3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x71d8 /* MPEG2 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x71dc /* AAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x71e0 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x71e4 /* ATRAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x71e8 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x71ec /* Dolby Digital */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x71f0 /* DTS-HD */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x71f4 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x71f8 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x71fc /* WMA Pro */
+# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
+/* max channels minus one. 7 = 8 channels */
+# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
+# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
+# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+
+#define AZ_HOT_PLUG_CONTROL 0x7300
+# define AZ_FORCE_CODEC_WAKE (1 << 0)
+# define PIN0_JACK_DETECTION_ENABLE (1 << 4)
+# define PIN1_JACK_DETECTION_ENABLE (1 << 5)
+# define PIN2_JACK_DETECTION_ENABLE (1 << 6)
+# define PIN3_JACK_DETECTION_ENABLE (1 << 7)
+# define PIN0_UNSOLICITED_RESPONSE_ENABLE (1 << 8)
+# define PIN1_UNSOLICITED_RESPONSE_ENABLE (1 << 9)
+# define PIN2_UNSOLICITED_RESPONSE_ENABLE (1 << 10)
+# define PIN3_UNSOLICITED_RESPONSE_ENABLE (1 << 11)
+# define CODEC_HOT_PLUG_ENABLE (1 << 12)
+# define PIN0_AUDIO_ENABLED (1 << 24)
+# define PIN1_AUDIO_ENABLED (1 << 25)
+# define PIN2_AUDIO_ENABLED (1 << 26)
+# define PIN3_AUDIO_ENABLED (1 << 27)
+# define AUDIO_ENABLED (1 << 31)
+
+
#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110
#define D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6914
#define D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6114
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index ea4691f79ccd..ae8b48205a6c 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1660,6 +1660,8 @@ static void si_gpu_init(struct radeon_device *rdev)
WREG32(GB_ADDR_CONFIG, gb_addr_config);
WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
WREG32(HDP_ADDR_CONFIG, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
+ WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
si_tiling_mode_table_init(rdev);
@@ -1836,6 +1838,9 @@ static void si_cp_enable(struct radeon_device *rdev, bool enable)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
WREG32(SCRATCH_UMSK, 0);
+ rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+ rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+ rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
}
udelay(50);
}
@@ -2121,15 +2126,13 @@ bool si_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
return radeon_ring_test_lockup(rdev, ring);
}
-static int si_gpu_soft_reset(struct radeon_device *rdev)
+static void si_gpu_soft_reset_gfx(struct radeon_device *rdev)
{
- struct evergreen_mc_save save;
u32 grbm_reset = 0;
if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
- return 0;
+ return;
- dev_info(rdev->dev, "GPU softreset \n");
dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
RREG32(GRBM_STATUS));
dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
@@ -2140,10 +2143,7 @@ static int si_gpu_soft_reset(struct radeon_device *rdev)
RREG32(GRBM_STATUS_SE1));
dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
RREG32(SRBM_STATUS));
- evergreen_mc_stop(rdev, &save);
- if (radeon_mc_wait_for_idle(rdev)) {
- dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
- }
+
/* Disable CP parsing/prefetching */
WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
@@ -2168,8 +2168,7 @@ static int si_gpu_soft_reset(struct radeon_device *rdev)
udelay(50);
WREG32(GRBM_SOFT_RESET, 0);
(void)RREG32(GRBM_SOFT_RESET);
- /* Wait a little for things to settle down */
- udelay(50);
+
dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
RREG32(GRBM_STATUS));
dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
@@ -2180,13 +2179,81 @@ static int si_gpu_soft_reset(struct radeon_device *rdev)
RREG32(GRBM_STATUS_SE1));
dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
RREG32(SRBM_STATUS));
+}
+
+static void si_gpu_soft_reset_dma(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ return;
+
+ dev_info(rdev->dev, " DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+
+ /* dma0 */
+ tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
+
+ /* dma1 */
+ tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+ tmp &= ~DMA_RB_ENABLE;
+ WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
+
+ /* Reset dma */
+ WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+ RREG32(SRBM_SOFT_RESET);
+ udelay(50);
+ WREG32(SRBM_SOFT_RESET, 0);
+
+ dev_info(rdev->dev, " DMA_STATUS_REG = 0x%08X\n",
+ RREG32(DMA_STATUS_REG));
+}
+
+static int si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
+{
+ struct evergreen_mc_save save;
+
+ if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
+ reset_mask &= ~(RADEON_RESET_GFX | RADEON_RESET_COMPUTE);
+
+ if (RREG32(DMA_STATUS_REG) & DMA_IDLE)
+ reset_mask &= ~RADEON_RESET_DMA;
+
+ if (reset_mask == 0)
+ return 0;
+
+ dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
+
+ dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+ dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+
+ evergreen_mc_stop(rdev, &save);
+ if (radeon_mc_wait_for_idle(rdev)) {
+ dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
+ }
+
+ if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE))
+ si_gpu_soft_reset_gfx(rdev);
+
+ if (reset_mask & RADEON_RESET_DMA)
+ si_gpu_soft_reset_dma(rdev);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+
evergreen_mc_resume(rdev, &save);
return 0;
}
int si_asic_reset(struct radeon_device *rdev)
{
- return si_gpu_soft_reset(rdev);
+ return si_gpu_soft_reset(rdev, (RADEON_RESET_GFX |
+ RADEON_RESET_COMPUTE |
+ RADEON_RESET_DMA));
}
/* MC */
@@ -2426,9 +2493,20 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
/* enable context1-15 */
WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(rdev->dummy_page.addr >> 12));
- WREG32(VM_CONTEXT1_CNTL2, 0);
+ WREG32(VM_CONTEXT1_CNTL2, 4);
WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
- RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
+ RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
+ PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
+ VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
+ READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT |
+ WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
si_pcie_gart_tlb_flush(rdev);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -2474,6 +2552,7 @@ static bool si_vm_reg_valid(u32 reg)
/* check config regs */
switch (reg) {
case GRBM_GFX_INDEX:
+ case CP_STRMOUT_CNTL:
case VGT_VTX_VECT_EJECT_REG:
case VGT_CACHE_INVALIDATION:
case VGT_ESGS_RING_SIZE:
@@ -2533,6 +2612,7 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
u32 idx = pkt->idx + 1;
u32 idx_value = ib[idx];
u32 start_reg, end_reg, reg, i;
+ u32 command, info;
switch (pkt->opcode) {
case PACKET3_NOP:
@@ -2632,6 +2712,52 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
return -EINVAL;
}
break;
+ case PACKET3_CP_DMA:
+ command = ib[idx + 4];
+ info = ib[idx + 1];
+ if (command & PACKET3_CP_DMA_CMD_SAS) {
+ /* src address space is register */
+ if (((info & 0x60000000) >> 29) == 0) {
+ start_reg = idx_value << 2;
+ if (command & PACKET3_CP_DMA_CMD_SAIC) {
+ reg = start_reg;
+ if (!si_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad SRC register\n");
+ return -EINVAL;
+ }
+ } else {
+ for (i = 0; i < (command & 0x1fffff); i++) {
+ reg = start_reg + (4 * i);
+ if (!si_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad SRC register\n");
+ return -EINVAL;
+ }
+ }
+ }
+ }
+ }
+ if (command & PACKET3_CP_DMA_CMD_DAS) {
+ /* dst address space is register */
+ if (((info & 0x00300000) >> 20) == 0) {
+ start_reg = ib[idx + 2];
+ if (command & PACKET3_CP_DMA_CMD_DAIC) {
+ reg = start_reg;
+ if (!si_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad DST register\n");
+ return -EINVAL;
+ }
+ } else {
+ for (i = 0; i < (command & 0x1fffff); i++) {
+ reg = start_reg + (4 * i);
+ if (!si_vm_reg_valid(reg)) {
+ DRM_ERROR("CP DMA Bad DST register\n");
+ return -EINVAL;
+ }
+ }
+ }
+ }
+ }
+ break;
default:
DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
return -EINVAL;
@@ -2808,30 +2934,86 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
{
struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
-
- while (count) {
- unsigned ndw = 2 + count * 2;
- if (ndw > 0x3FFE)
- ndw = 0x3FFE;
-
- radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
- radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(1)));
- radeon_ring_write(ring, pe);
- radeon_ring_write(ring, upper_32_bits(pe));
- for (; ndw > 2; ndw -= 2, --count, pe += 8) {
- uint64_t value;
- if (flags & RADEON_VM_PAGE_SYSTEM) {
- value = radeon_vm_map_gart(rdev, addr);
- value &= 0xFFFFFFFFFFFFF000ULL;
- } else if (flags & RADEON_VM_PAGE_VALID)
- value = addr;
- else
- value = 0;
- addr += incr;
- value |= r600_flags;
- radeon_ring_write(ring, value);
- radeon_ring_write(ring, upper_32_bits(value));
+ uint64_t value;
+ unsigned ndw;
+
+ if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
+ while (count) {
+ ndw = 2 + count * 2;
+ if (ndw > 0x3FFE)
+ ndw = 0x3FFE;
+
+ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
+ radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(1)));
+ radeon_ring_write(ring, pe);
+ radeon_ring_write(ring, upper_32_bits(pe));
+ for (; ndw > 2; ndw -= 2, --count, pe += 8) {
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ value = radeon_vm_map_gart(rdev, addr);
+ value &= 0xFFFFFFFFFFFFF000ULL;
+ } else if (flags & RADEON_VM_PAGE_VALID) {
+ value = addr;
+ } else {
+ value = 0;
+ }
+ addr += incr;
+ value |= r600_flags;
+ radeon_ring_write(ring, value);
+ radeon_ring_write(ring, upper_32_bits(value));
+ }
+ }
+ } else {
+ /* DMA */
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ while (count) {
+ ndw = count * 2;
+ if (ndw > 0xFFFFE)
+ ndw = 0xFFFFE;
+
+ /* for non-physically contiguous pages (system) */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
+ radeon_ring_write(ring, pe);
+ radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+ for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+ if (flags & RADEON_VM_PAGE_SYSTEM) {
+ value = radeon_vm_map_gart(rdev, addr);
+ value &= 0xFFFFFFFFFFFFF000ULL;
+ } else if (flags & RADEON_VM_PAGE_VALID) {
+ value = addr;
+ } else {
+ value = 0;
+ }
+ addr += incr;
+ value |= r600_flags;
+ radeon_ring_write(ring, value);
+ radeon_ring_write(ring, upper_32_bits(value));
+ }
+ }
+ } else {
+ while (count) {
+ ndw = count * 2;
+ if (ndw > 0xFFFFE)
+ ndw = 0xFFFFE;
+
+ if (flags & RADEON_VM_PAGE_VALID)
+ value = addr;
+ else
+ value = 0;
+ /* for physically contiguous pages (vram) */
+ radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
+ radeon_ring_write(ring, pe); /* dst addr */
+ radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
+ radeon_ring_write(ring, r600_flags); /* mask */
+ radeon_ring_write(ring, 0);
+ radeon_ring_write(ring, value); /* value */
+ radeon_ring_write(ring, upper_32_bits(value));
+ radeon_ring_write(ring, incr); /* increment size */
+ radeon_ring_write(ring, 0);
+ pe += ndw * 4;
+ addr += (ndw / 2) * incr;
+ count -= ndw / 2;
+ }
}
}
}
@@ -2879,6 +3061,32 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
radeon_ring_write(ring, 0x0);
}
+void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+ struct radeon_ring *ring = &rdev->ring[ridx];
+
+ if (vm == NULL)
+ return;
+
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ if (vm->id < 8) {
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+ } else {
+ radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
+ }
+ radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+ /* flush hdp cache */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+ radeon_ring_write(ring, 1);
+
+ /* bits 0-7 are the VM contexts0-7 */
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+ radeon_ring_write(ring, 1 << vm->id);
+}
+
/*
* RLC
*/
@@ -3047,6 +3255,10 @@ static void si_disable_interrupt_state(struct radeon_device *rdev)
WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
WREG32(CP_INT_CNTL_RING1, 0);
WREG32(CP_INT_CNTL_RING2, 0);
+ tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
+ WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
+ tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
+ WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
WREG32(GRBM_INT_CNTL, 0);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -3166,6 +3378,7 @@ int si_irq_set(struct radeon_device *rdev)
u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
u32 grbm_int_cntl = 0;
u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
+ u32 dma_cntl, dma_cntl1;
if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3186,6 +3399,9 @@ int si_irq_set(struct radeon_device *rdev)
hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
+ dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
+ dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
+
/* enable CP interrupts on all rings */
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
DRM_DEBUG("si_irq_set: sw int gfx\n");
@@ -3199,6 +3415,15 @@ int si_irq_set(struct radeon_device *rdev)
DRM_DEBUG("si_irq_set: sw int cp2\n");
cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
}
+ if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
+ DRM_DEBUG("si_irq_set: sw int dma\n");
+ dma_cntl |= TRAP_ENABLE;
+ }
+
+ if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
+ DRM_DEBUG("si_irq_set: sw int dma1\n");
+ dma_cntl1 |= TRAP_ENABLE;
+ }
if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("si_irq_set: vblank 0\n");
@@ -3258,6 +3483,9 @@ int si_irq_set(struct radeon_device *rdev)
WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
+ WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
+ WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
+
WREG32(GRBM_INT_CNTL, grbm_int_cntl);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3683,6 +3911,16 @@ restart_ih:
break;
}
break;
+ case 146:
+ case 147:
+ dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
+ dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+ dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
+ RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+ /* reset addr and status */
+ WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
+ break;
case 176: /* RINGID0 CP_INT */
radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
break;
@@ -3706,9 +3944,17 @@ restart_ih:
break;
}
break;
+ case 224: /* DMA trap event */
+ DRM_DEBUG("IH: DMA trap\n");
+ radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
+ break;
case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n");
break;
+ case 244: /* DMA trap event */
+ DRM_DEBUG("IH: DMA1 trap\n");
+ radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+ break;
default:
DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
break;
@@ -3732,6 +3978,80 @@ restart_ih:
return IRQ_HANDLED;
}
+/**
+ * si_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (SI).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int si_copy_dma(struct radeon_device *rdev,
+ uint64_t src_offset, uint64_t dst_offset,
+ unsigned num_gpu_pages,
+ struct radeon_fence **fence)
+{
+ struct radeon_semaphore *sem = NULL;
+ int ring_index = rdev->asic->copy.dma_ring_index;
+ struct radeon_ring *ring = &rdev->ring[ring_index];
+ u32 size_in_bytes, cur_size_in_bytes;
+ int i, num_loops;
+ int r = 0;
+
+ r = radeon_semaphore_create(rdev, &sem);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ return r;
+ }
+
+ size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
+ num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
+ r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
+ if (r) {
+ DRM_ERROR("radeon: moving bo (%d).\n", r);
+ radeon_semaphore_free(rdev, &sem, NULL);
+ return r;
+ }
+
+ if (radeon_fence_need_sync(*fence, ring->idx)) {
+ radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+ ring->idx);
+ radeon_fence_note_sync(*fence, ring->idx);
+ } else {
+ radeon_semaphore_free(rdev, &sem, NULL);
+ }
+
+ for (i = 0; i < num_loops; i++) {
+ cur_size_in_bytes = size_in_bytes;
+ if (cur_size_in_bytes > 0xFFFFF)
+ cur_size_in_bytes = 0xFFFFF;
+ size_in_bytes -= cur_size_in_bytes;
+ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
+ radeon_ring_write(ring, dst_offset & 0xffffffff);
+ radeon_ring_write(ring, src_offset & 0xffffffff);
+ radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+ radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+ src_offset += cur_size_in_bytes;
+ dst_offset += cur_size_in_bytes;
+ }
+
+ r = radeon_fence_emit(rdev, fence, ring->idx);
+ if (r) {
+ radeon_ring_unlock_undo(rdev, ring);
+ return r;
+ }
+
+ radeon_ring_unlock_commit(rdev, ring);
+ radeon_semaphore_free(rdev, &sem, *fence);
+
+ return r;
+}
+
/*
* startup/shutdown callbacks
*/
@@ -3803,6 +4123,18 @@ static int si_startup(struct radeon_device *rdev)
return r;
}
+ r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
+ r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
+ if (r) {
+ dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
+ return r;
+ }
+
/* Enable IRQ */
r = si_irq_init(rdev);
if (r) {
@@ -3833,6 +4165,22 @@ static int si_startup(struct radeon_device *rdev)
if (r)
return r;
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
+ DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
+ DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+ if (r)
+ return r;
+
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
+ DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
+ DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
+ 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+ if (r)
+ return r;
+
r = si_cp_load_microcode(rdev);
if (r)
return r;
@@ -3840,6 +4188,10 @@ static int si_startup(struct radeon_device *rdev)
if (r)
return r;
+ r = cayman_dma_resume(rdev);
+ if (r)
+ return r;
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -3881,9 +4233,7 @@ int si_resume(struct radeon_device *rdev)
int si_suspend(struct radeon_device *rdev)
{
si_cp_enable(rdev, false);
- rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
- rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
- rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
+ cayman_dma_stop(rdev);
si_irq_suspend(rdev);
radeon_wb_disable(rdev);
si_pcie_gart_disable(rdev);
@@ -3961,6 +4311,14 @@ int si_init(struct radeon_device *rdev)
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 1024 * 1024);
+ ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 64 * 1024);
+
+ ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 64 * 1024);
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -3973,6 +4331,7 @@ int si_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
si_cp_fini(rdev);
+ cayman_dma_fini(rdev);
si_irq_fini(rdev);
si_rlc_fini(rdev);
radeon_wb_fini(rdev);
@@ -4001,6 +4360,7 @@ void si_fini(struct radeon_device *rdev)
r600_blit_fini(rdev);
#endif
si_cp_fini(rdev);
+ cayman_dma_fini(rdev);
si_irq_fini(rdev);
si_rlc_fini(rdev);
radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 7d2a20e56577..c056aae814f0 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -62,6 +62,22 @@
#define SRBM_STATUS 0xE50
+#define SRBM_SOFT_RESET 0x0E60
+#define SOFT_RESET_BIF (1 << 1)
+#define SOFT_RESET_DC (1 << 5)
+#define SOFT_RESET_DMA1 (1 << 6)
+#define SOFT_RESET_GRBM (1 << 8)
+#define SOFT_RESET_HDP (1 << 9)
+#define SOFT_RESET_IH (1 << 10)
+#define SOFT_RESET_MC (1 << 11)
+#define SOFT_RESET_ROM (1 << 14)
+#define SOFT_RESET_SEM (1 << 15)
+#define SOFT_RESET_VMC (1 << 17)
+#define SOFT_RESET_DMA (1 << 20)
+#define SOFT_RESET_TST (1 << 21)
+#define SOFT_RESET_REGBB (1 << 22)
+#define SOFT_RESET_ORB (1 << 23)
+
#define CC_SYS_RB_BACKEND_DISABLE 0xe80
#define GC_USER_SYS_RB_BACKEND_DISABLE 0xe84
@@ -91,7 +107,18 @@
#define VM_CONTEXT0_CNTL 0x1410
#define ENABLE_CONTEXT (1 << 0)
#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
+#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
+#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
+#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
+#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
+#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
+#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
+#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
+#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
+#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
+#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
#define VM_CONTEXT1_CNTL 0x1414
#define VM_CONTEXT0_CNTL2 0x1430
#define VM_CONTEXT1_CNTL2 0x1434
@@ -104,6 +131,9 @@
#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x1450
#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x1454
+#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC
+#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC
+
#define VM_INVALIDATE_REQUEST 0x1478
#define VM_INVALIDATE_RESPONSE 0x147c
@@ -424,6 +454,7 @@
# define RDERR_INT_ENABLE (1 << 0)
# define GUI_IDLE_INT_ENABLE (1 << 19)
+#define CP_STRMOUT_CNTL 0x84FC
#define SCRATCH_REG0 0x8500
#define SCRATCH_REG1 0x8504
#define SCRATCH_REG2 0x8508
@@ -834,6 +865,54 @@
#define PACKET3_WAIT_REG_MEM 0x3C
#define PACKET3_MEM_WRITE 0x3D
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_CP_DMA 0x41
+/* 1. header
+ * 2. SRC_ADDR_LO or DATA [31:0]
+ * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
+ * SRC_ADDR_HI [7:0]
+ * 4. DST_ADDR_LO [31:0]
+ * 5. DST_ADDR_HI [7:0]
+ * 6. COMMAND [30:21] | BYTE_COUNT [20:0]
+ */
+# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ */
+# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR
+ * 1 - GDS
+ * 2 - DATA
+ */
+# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_CP_DMA_DIS_WC (1 << 21)
+# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
+ /* 0 - none
+ * 1 - 8 in 16
+ * 2 - 8 in 32
+ * 3 - 8 in 64
+ */
+# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
+# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
+# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_SURFACE_SYNC 0x43
# define PACKET3_DEST_BASE_0_ENA (1 << 0)
@@ -921,4 +1000,63 @@
#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
#define PACKET3_SWITCH_BUFFER 0x8B
+/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
+#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
+#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
+
+#define DMA_RB_CNTL 0xd000
+# define DMA_RB_ENABLE (1 << 0)
+# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
+# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
+# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
+# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
+#define DMA_RB_BASE 0xd004
+#define DMA_RB_RPTR 0xd008
+#define DMA_RB_WPTR 0xd00c
+
+#define DMA_RB_RPTR_ADDR_HI 0xd01c
+#define DMA_RB_RPTR_ADDR_LO 0xd020
+
+#define DMA_IB_CNTL 0xd024
+# define DMA_IB_ENABLE (1 << 0)
+# define DMA_IB_SWAP_ENABLE (1 << 4)
+#define DMA_IB_RPTR 0xd028
+#define DMA_CNTL 0xd02c
+# define TRAP_ENABLE (1 << 0)
+# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
+# define SEM_WAIT_INT_ENABLE (1 << 2)
+# define DATA_SWAP_ENABLE (1 << 3)
+# define FENCE_SWAP_ENABLE (1 << 4)
+# define CTXEMPTY_INT_ENABLE (1 << 28)
+#define DMA_STATUS_REG 0xd034
+# define DMA_IDLE (1 << 0)
+#define DMA_TILING_CONFIG 0xd0b8
+
+#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
+ (((b) & 0x1) << 26) | \
+ (((t) & 0x1) << 23) | \
+ (((s) & 0x1) << 22) | \
+ (((n) & 0xFFFFF) << 0))
+
+#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
+ (((vmid) & 0xF) << 20) | \
+ (((n) & 0xFFFFF) << 0))
+
+#define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \
+ (1 << 26) | \
+ (1 << 21) | \
+ (((n) & 0xFFFFF) << 0))
+
+/* async DMA Packet types */
+#define DMA_PACKET_WRITE 0x2
+#define DMA_PACKET_COPY 0x3
+#define DMA_PACKET_INDIRECT_BUFFER 0x4
+#define DMA_PACKET_SEMAPHORE 0x5
+#define DMA_PACKET_FENCE 0x6
+#define DMA_PACKET_TRAP 0x7
+#define DMA_PACKET_SRBM_WRITE 0x9
+#define DMA_PACKET_CONSTANT_FILL 0xd
+#define DMA_PACKET_NOP 0xf
+
#endif