Merge tag 'v4.6-rc3' into drm-intel-next-queued

Linux 4.6-rc3 Backmerge requested by Chris Wilson to make his patches apply cleanly. Tiny conflict in vmalloc.c with the (properly acked and all) patch in drm-intel-next: commit 4da56b99d99e5a7df2b7f11e87bfea935f909732 Author: Chris Wilson <chris@chris-wilson.co.uk> Date: Mon Apr 4 14:46:42 2016 +0100 mm/vmap: Add a notifier for when we run out of vmap address space and Linus' tree. Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
author: Daniel Vetter <daniel.vetter@ffwll.ch> 2016-04-11 19:21:06 +0200
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2016-04-11 19:25:13 +0200
commit: 39702853197b191bda32315260255053aa3e57f7 (patch)
tree: 00185427bd7c5e6a335c9ea99ed7ee65b9ceaa9c /drivers/gpu/drm
parent: fb8621d3bee88badeb25dccce0fb59ad145dba9e (diff)
parent: bf16200689118d19de1b8d2a3c314fc21f5dc7bb (diff)
download: linux-39702853197b191bda32315260255053aa3e57f7.tar.gz
linux-39702853197b191bda32315260255053aa3e57f7.tar.bz2
linux-39702853197b191bda32315260255053aa3e57f7.zip
178 files changed, 6394 insertions, 3768 deletions
diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig
index 0f734ee05274..ca77ec10147c 100644
--- a/drivers/gpu/drm/amd/acp/Kconfig
+++ b/drivers/gpu/drm/amd/acp/Kconfig
@@ -1,10 +1,14 @@
-menu "ACP Configuration"
+menu "ACP (Audio CoProcessor) Configuration"
 
 config DRM_AMD_ACP
-       bool "Enable ACP IP support"
+       bool "Enable AMD Audio CoProcessor IP support"
        select MFD_CORE
        select PM_GENERIC_DOMAINS if PM
        help
 	Choose this option to enable ACP IP support for AMD SOCs.
+	This adds the ACP (Audio CoProcessor) IP driver and wires
+	it up into the amdgpu driver.  The ACP block provides the DMA
+	engine for the i2s-based ALSA driver. It is required for audio
+	on APUs which utilize an i2s codec.
 
 endmenu
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d0489722fc7e..62a778012fe0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -141,7 +141,6 @@ extern unsigned amdgpu_pcie_lane_cap;
 #define CIK_CURSOR_HEIGHT 128
 
 struct amdgpu_device;
-struct amdgpu_fence;
 struct amdgpu_ib;
 struct amdgpu_vm;
 struct amdgpu_ring;
@@ -287,9 +286,11 @@ struct amdgpu_ring_funcs {
 			struct amdgpu_ib *ib);
 	void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
 			   uint64_t seq, unsigned flags);
+	void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
 	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
 			      uint64_t pd_addr);
 	void (*emit_hdp_flush)(struct amdgpu_ring *ring);
+	void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
 	void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
 				uint32_t gds_base, uint32_t gds_size,
 				uint32_t gws_base, uint32_t gws_size,
@@ -346,13 +347,15 @@ struct amdgpu_fence_driver {
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
 	/* sync_seq is protected by ring emission lock */
-	uint64_t			sync_seq;
-	atomic64_t			last_seq;
+	uint32_t			sync_seq;
+	atomic_t			last_seq;
 	bool				initialized;
 	struct amdgpu_irq_src		*irq_src;
 	unsigned			irq_type;
 	struct timer_list		fallback_timer;
-	wait_queue_head_t		fence_queue;
+	unsigned			num_fences_mask;
+	spinlock_t			lock;
+	struct fence			**fences;
 };
 
 /* some special values for the owner field */
@@ -362,19 +365,6 @@ struct amdgpu_fence_driver {
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 
-struct amdgpu_fence {
-	struct fence base;
-
-	/* RB, DMA, etc. */
-	struct amdgpu_ring		*ring;
-	uint64_t			seq;
-
-	/* filp or special value for fence creator */
-	void				*owner;
-
-	wait_queue_t			fence_wake;
-};
-
 struct amdgpu_user_fence {
 	/* write-back bo */
 	struct amdgpu_bo 	*bo;
@@ -386,16 +376,15 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
 
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+				  unsigned num_hw_submission);
 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 				   struct amdgpu_irq_src *irq_src,
 				   unsigned irq_type);
 void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
-		      struct amdgpu_fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
 
@@ -434,6 +423,8 @@ struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
+	struct page			**user_pages;
+	int				user_invalidated;
 };
 
 struct amdgpu_bo_va_mapping {
@@ -445,7 +436,6 @@ struct amdgpu_bo_va_mapping {
 
 /* bo virtual addresses in a specific vm */
 struct amdgpu_bo_va {
-	struct mutex		        mutex;
 	/* protected by bo being reserved */
 	struct list_head		bo_list;
 	struct fence		        *last_pt_update;
@@ -540,11 +530,14 @@ int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
  * Assumption is that there won't be hole (all object on same
  * alignment).
  */
+
+#define AMDGPU_SA_NUM_FENCE_LISTS	32
+
 struct amdgpu_sa_manager {
 	wait_queue_head_t	wq;
 	struct amdgpu_bo	*bo;
 	struct list_head	*hole;
-	struct list_head	flist[AMDGPU_MAX_RINGS];
+	struct list_head	flist[AMDGPU_SA_NUM_FENCE_LISTS];
 	struct list_head	olist;
 	unsigned		size;
 	uint64_t		gpu_addr;
@@ -596,6 +589,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
 int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
+int amdgpu_sync_init(void);
+void amdgpu_sync_fini(void);
 
 /*
  * GART structures, functions & helpers
@@ -726,7 +721,6 @@ struct amdgpu_ib {
 	uint32_t			length_dw;
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
-	struct amdgpu_fence		*fence;
 	struct amdgpu_user_fence        *user;
 	struct amdgpu_vm		*vm;
 	unsigned			vm_id;
@@ -845,7 +839,6 @@ struct amdgpu_vm_id {
 
 struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
-	spinlock_t		it_lock;
 	struct rb_root		va;
 
 	/* protecting invalidated */
@@ -882,6 +875,13 @@ struct amdgpu_vm_manager_id {
 	struct list_head	list;
 	struct fence		*active;
 	atomic_long_t		owner;
+
+	uint32_t		gds_base;
+	uint32_t		gds_size;
+	uint32_t		gws_base;
+	uint32_t		gws_size;
+	uint32_t		oa_base;
+	uint32_t		oa_size;
 };
 
 struct amdgpu_vm_manager {
@@ -917,8 +917,11 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
 		      unsigned *vm_id, uint64_t *vm_pd_addr);
 void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     unsigned vmid,
-		     uint64_t pd_addr);
+		     unsigned vm_id, uint64_t pd_addr,
+		     uint32_t gds_base, uint32_t gds_size,
+		     uint32_t gws_base, uint32_t gws_size,
+		     uint32_t oa_base, uint32_t oa_size);
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 				    struct amdgpu_vm *vm);
@@ -1006,7 +1009,7 @@ struct amdgpu_bo_list {
 	struct amdgpu_bo *gds_obj;
 	struct amdgpu_bo *gws_obj;
 	struct amdgpu_bo *oa_obj;
-	bool has_userptr;
+	unsigned first_userptr;
 	unsigned num_entries;
 	struct amdgpu_bo_list_entry *array;
 };
@@ -1133,10 +1136,9 @@ struct amdgpu_gfx {
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib);
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
-		       struct amdgpu_ib *ib, void *owner,
-		       struct fence *last_vm_update,
+		       struct amdgpu_ib *ib, struct fence *last_vm_update,
 		       struct fence **f);
 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
@@ -1155,7 +1157,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
 		     enum amdgpu_ring_type ring_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
 
 /*
  * CS.
@@ -1197,6 +1198,7 @@ struct amdgpu_job {
 	struct amdgpu_ring	*ring;
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
+	struct fence		*fence; /* the hw fence */
 	uint32_t		num_ibs;
 	void			*owner;
 	struct amdgpu_user_fence uf;
@@ -1589,6 +1591,7 @@ struct amdgpu_uvd {
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
+	void			*saved_bo;
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct delayed_work	idle_work;
@@ -2012,7 +2015,6 @@ struct amdgpu_device {
 	struct amdgpu_sdma		sdma;
 
 	/* uvd */
-	bool				has_uvd;
 	struct amdgpu_uvd		uvd;
 
 	/* vce */
@@ -2059,20 +2061,6 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
 
 /*
- * Cast helper
- */
-extern const struct fence_ops amdgpu_fence_ops;
-static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
-{
-	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
-
-	if (__f->base.ops == &amdgpu_fence_ops)
-		return __f;
-
-	return NULL;
-}
-
-/*
  * Registers read & write functions.
  */
 #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false)
@@ -2186,10 +2174,12 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
 #define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib))
+#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
 #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
+#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
@@ -2314,12 +2304,15 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 		       struct amdgpu_ring **out_ring);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+				       int *last_invalidated);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 				 struct ttm_mem_reg *mem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index fa948dcbdd5d..0020a0ea43ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -63,6 +63,10 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }
 
+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+	return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
 /**
  * amdgpu_atpx_call - call an ATPX method
  *
@@ -142,10 +146,6 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
  */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	atpx->functions.power_cntl = true;
-
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 4792f9d0b7d4..eacd810fc09b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -91,7 +91,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo;
 	struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo;
 
-	bool has_userptr = false;
+	unsigned last_entry = 0, first_userptr = num_entries;
 	unsigned i;
 	int r;
 
@@ -101,8 +101,9 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
 
 	for (i = 0; i < num_entries; ++i) {
-		struct amdgpu_bo_list_entry *entry = &array[i];
+		struct amdgpu_bo_list_entry *entry;
 		struct drm_gem_object *gobj;
+		struct amdgpu_bo *bo;
 		struct mm_struct *usermm;
 
 		gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle);
@@ -111,19 +112,24 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 			goto error_free;
 		}
 
-		entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+		bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 		drm_gem_object_unreference_unlocked(gobj);
-		entry->priority = min(info[i].bo_priority,
-				      AMDGPU_BO_LIST_MAX_PRIORITY);
-		usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm);
+
+		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
 		if (usermm) {
 			if (usermm != current->mm) {
-				amdgpu_bo_unref(&entry->robj);
+				amdgpu_bo_unref(&bo);
 				r = -EPERM;
 				goto error_free;
 			}
-			has_userptr = true;
+			entry = &array[--first_userptr];
+		} else {
+			entry = &array[last_entry++];
 		}
+
+		entry->robj = bo;
+		entry->priority = min(info[i].bo_priority,
+				      AMDGPU_BO_LIST_MAX_PRIORITY);
 		entry->tv.bo = &entry->robj->tbo;
 		entry->tv.shared = true;
 
@@ -145,7 +151,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	list->gds_obj = gds_obj;
 	list->gws_obj = gws_obj;
 	list->oa_obj = oa_obj;
-	list->has_userptr = has_userptr;
+	list->first_userptr = first_userptr;
 	list->array = array;
 	list->num_entries = num_entries;
 
@@ -194,6 +200,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 
 		list_add_tail(&list->array[i].tv.head,
 			      &bucket[priority]);
+		list->array[i].user_pages = NULL;
 	}
 
 	/* Connect the sorted buckets in the output list. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 7a4b101e10c6..6043dc7c3a94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -816,10 +816,13 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
 	struct drm_device *ddev = adev->ddev;
 	struct drm_crtc *crtc;
 	uint32_t line_time_us, vblank_lines;
+	struct cgs_mode_info *mode_info;
 
 	if (info == NULL)
 		return -EINVAL;
 
+	mode_info = info->mode_info;
+
 	if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
 		list_for_each_entry(crtc,
 				&ddev->mode_config.crtc_list, head) {
@@ -828,7 +831,7 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
 				info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
 				info->display_count++;
 			}
-			if (info->mode_info != NULL &&
+			if (mode_info != NULL &&
 				crtc->enabled && amdgpu_crtc->enabled &&
 				amdgpu_crtc->hw_mode.clock) {
 				line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
@@ -836,10 +839,10 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
 				vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
 							amdgpu_crtc->hw_mode.crtc_vdisplay +
 							(amdgpu_crtc->v_border * 2);
-				info->mode_info->vblank_time_us = vblank_lines * line_time_us;
-				info->mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
-				info->mode_info->ref_clock = adev->clock.spll.reference_freq;
-				info->mode_info++;
+				mode_info->vblank_time_us = vblank_lines * line_time_us;
+				mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
+				mode_info->ref_clock = adev->clock.spll.reference_freq;
+				mode_info = NULL;
 			}
 		}
 	}
@@ -847,6 +850,16 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device,
 	return 0;
 }
 
+
+static int amdgpu_cgs_notify_dpm_enabled(void *cgs_device, bool enabled)
+{
+	CGS_FUNC_ADEV;
+
+	adev->pm.dpm_enabled = enabled;
+
+	return 0;
+}
+
 /** \brief evaluate acpi namespace object, handle or pathname must be valid
  *  \param cgs_device
  *  \param info input/output arguments for the control method
@@ -1097,6 +1110,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	amdgpu_cgs_set_powergating_state,
 	amdgpu_cgs_set_clockgating_state,
 	amdgpu_cgs_get_active_displays_info,
+	amdgpu_cgs_notify_dpm_enabled,
 	amdgpu_cgs_call_acpi_method,
 	amdgpu_cgs_query_system_info,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 52c3eb96b199..9392e50a7ba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -25,6 +25,7 @@
  *    Jerome Glisse <glisse@freedesktop.org>
  */
 #include <linux/list_sort.h>
+#include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -111,6 +112,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
 	p->uf_entry.tv.shared = true;
+	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_unreference_unlocked(gobj);
 	return 0;
@@ -297,6 +299,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 
 	list_for_each_entry(lobj, validated, tv.head) {
 		struct amdgpu_bo *bo = lobj->robj;
+		bool binding_userptr = false;
 		struct mm_struct *usermm;
 		uint32_t domain;
 
@@ -304,6 +307,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		if (usermm && usermm != current->mm)
 			return -EPERM;
 
+		/* Check if we have user pages and nobody bound the BO already */
+		if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
+			size_t size = sizeof(struct page *);
+
+			size *= bo->tbo.ttm->num_pages;
+			memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
+			binding_userptr = true;
+		}
+
 		if (bo->pin_count)
 			continue;
 
@@ -334,6 +346,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 			}
 			return r;
 		}
+
+		if (binding_userptr) {
+			drm_free_large(lobj->user_pages);
+			lobj->user_pages = NULL;
+		}
 	}
 	return 0;
 }
@@ -342,15 +359,18 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 				union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_bo_list_entry *e;
 	struct list_head duplicates;
 	bool need_mmap_lock = false;
+	unsigned i, tries = 10;
 	int r;
 
 	INIT_LIST_HEAD(&p->validated);
 
 	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
 	if (p->bo_list) {
-		need_mmap_lock = p->bo_list->has_userptr;
+		need_mmap_lock = p->bo_list->first_userptr !=
+			p->bo_list->num_entries;
 		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 	}
 
@@ -363,9 +383,81 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (need_mmap_lock)
 		down_read(&current->mm->mmap_sem);
 
-	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
-	if (unlikely(r != 0))
-		goto error_reserve;
+	while (1) {
+		struct list_head need_pages;
+		unsigned i;
+
+		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+					   &duplicates);
+		if (unlikely(r != 0))
+			goto error_free_pages;
+
+		/* Without a BO list we don't have userptr BOs */
+		if (!p->bo_list)
+			break;
+
+		INIT_LIST_HEAD(&need_pages);
+		for (i = p->bo_list->first_userptr;
+		     i < p->bo_list->num_entries; ++i) {
+
+			e = &p->bo_list->array[i];
+
+			if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
+				 &e->user_invalidated) && e->user_pages) {
+
+				/* We acquired a page array, but somebody
+				 * invalidated it. Free it an try again
+				 */
+				release_pages(e->user_pages,
+					      e->robj->tbo.ttm->num_pages,
+					      false);
+				drm_free_large(e->user_pages);
+				e->user_pages = NULL;
+			}
+
+			if (e->robj->tbo.ttm->state != tt_bound &&
+			    !e->user_pages) {
+				list_del(&e->tv.head);
+				list_add(&e->tv.head, &need_pages);
+
+				amdgpu_bo_unreserve(e->robj);
+			}
+		}
+
+		if (list_empty(&need_pages))
+			break;
+
+		/* Unreserve everything again. */
+		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+
+		/* We tried to often, just abort */
+		if (!--tries) {
+			r = -EDEADLK;
+			goto error_free_pages;
+		}
+
+		/* Fill the page arrays for all useptrs. */
+		list_for_each_entry(e, &need_pages, tv.head) {
+			struct ttm_tt *ttm = e->robj->tbo.ttm;
+
+			e->user_pages = drm_calloc_large(ttm->num_pages,
+							 sizeof(struct page*));
+			if (!e->user_pages) {
+				r = -ENOMEM;
+				goto error_free_pages;
+			}
+
+			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
+			if (r) {
+				drm_free_large(e->user_pages);
+				e->user_pages = NULL;
+				goto error_free_pages;
+			}
+		}
+
+		/* And try again. */
+		list_splice(&need_pages, &p->validated);
+	}
 
 	amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
 
@@ -397,10 +489,26 @@ error_validate:
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 	}
 
-error_reserve:
+error_free_pages:
+
 	if (need_mmap_lock)
 		up_read(&current->mm->mmap_sem);
 
+	if (p->bo_list) {
+		for (i = p->bo_list->first_userptr;
+		     i < p->bo_list->num_entries; ++i) {
+			e = &p->bo_list->array[i];
+
+			if (!e->user_pages)
+				continue;
+
+			release_pages(e->user_pages,
+				      e->robj->tbo.ttm->num_pages,
+				      false);
+			drm_free_large(e->user_pages);
+		}
+	}
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2139da773da6..612117478b57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -62,6 +62,12 @@ static const char *amdgpu_asic_name[] = {
 	"LAST",
 };
 
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+#else
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+#endif
+
 bool amdgpu_device_is_px(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
@@ -1479,7 +1485,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	if (amdgpu_runtime_pm == 1)
 		runtime = true;
-	if (amdgpu_device_is_px(ddev))
+	if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl())
 		runtime = true;
 	vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
 	if (runtime)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index f0ed974bd4e0..3fb405b3a614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -57,7 +57,7 @@ static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
 	if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
 		return true;
 
-	fence_put(*f);
+	fence_put(fence);
 	return false;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 74a2f8a6be1f..f1e17d60055a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -555,6 +555,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
 
 static int __init amdgpu_init(void)
 {
+	amdgpu_sync_init();
 #ifdef CONFIG_VGA_CONSOLE
 	if (vgacon_text_force()) {
 		DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
@@ -577,6 +578,7 @@ static void __exit amdgpu_exit(void)
 	amdgpu_amdkfd_fini();
 	drm_pci_exit(driver, pdriver);
 	amdgpu_unregister_atpx_handler();
+	amdgpu_sync_fini();
 }
 
 module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 97db196dc6f8..d81f1f4883a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,9 +47,30 @@
  * that the the relevant GPU caches have been flushed.
  */
 
+struct amdgpu_fence {
+	struct fence base;
+
+	/* RB, DMA, etc. */
+	struct amdgpu_ring		*ring;
+};
+
 static struct kmem_cache *amdgpu_fence_slab;
 static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
 
+/*
+ * Cast helper
+ */
+static const struct fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
+{
+	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+	if (__f->base.ops == &amdgpu_fence_ops)
+		return __f;
+
+	return NULL;
+}
+
 /**
  * amdgpu_fence_write - write a fence value
  *
@@ -82,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
 	if (drv->cpu_addr)
 		seq = le32_to_cpu(*drv->cpu_addr);
 	else
-		seq = lower_32_bits(atomic64_read(&drv->last_seq));
+		seq = atomic_read(&drv->last_seq);
 
 	return seq;
 }
@@ -91,32 +112,45 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  * amdgpu_fence_emit - emit a fence on the requested ring
  *
  * @ring: ring the fence is associated with
- * @owner: creator of the fence
- * @fence: amdgpu fence object
+ * @f: resulting fence object
  *
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
-		      struct amdgpu_fence **fence)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_fence *fence;
+	struct fence *old, **ptr;
+	uint32_t seq;
 
-	/* we are protected by the ring emission mutex */
-	*fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
-	if ((*fence) == NULL) {
+	fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+	if (fence == NULL)
 		return -ENOMEM;
-	}
-	(*fence)->seq = ++ring->fence_drv.sync_seq;
-	(*fence)->ring = ring;
-	(*fence)->owner = owner;
-	fence_init(&(*fence)->base, &amdgpu_fence_ops,
-		&ring->fence_drv.fence_queue.lock,
-		adev->fence_context + ring->idx,
-		(*fence)->seq);
+
+	seq = ++ring->fence_drv.sync_seq;
+	fence->ring = ring;
+	fence_init(&fence->base, &amdgpu_fence_ops,
+		   &ring->fence_drv.lock,
+		   adev->fence_context + ring->idx,
+		   seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
-			       (*fence)->seq,
-			       AMDGPU_FENCE_FLAG_INT);
+			       seq, AMDGPU_FENCE_FLAG_INT);
+
+	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+	/* This function can't be called concurrently anyway, otherwise
+	 * emitting the fence would mess up the hardware ring buffer.
+	 */
+	old = rcu_dereference_protected(*ptr, 1);
+	if (old && !fence_is_signaled(old)) {
+		DRM_INFO("rcu slot is busy\n");
+		fence_wait(old, false);
+	}
+
+	rcu_assign_pointer(*ptr, fence_get(&fence->base));
+
+	*f = &fence->base;
+
 	return 0;
 }
 
@@ -134,89 +168,48 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
 }
 
 /**
- * amdgpu_fence_activity - check for fence activity
+ * amdgpu_fence_process - check for fence activity
  *
  * @ring: pointer to struct amdgpu_ring
  *
  * Checks the current fence value and calculates the last
- * signalled fence value. Returns true if activity occured
- * on the ring, and the fence_queue should be waken up.
+ * signalled fence value. Wakes the fence queue if the
+ * sequence number has increased.
  */
-static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
+void amdgpu_fence_process(struct amdgpu_ring *ring)
 {
-	uint64_t seq, last_seq, last_emitted;
-	unsigned count_loop = 0;
-	bool wake = false;
-
-	/* Note there is a scenario here for an infinite loop but it's
-	 * very unlikely to happen. For it to happen, the current polling
-	 * process need to be interrupted by another process and another
-	 * process needs to update the last_seq btw the atomic read and
-	 * xchg of the current process.
-	 *
-	 * More over for this to go in infinite loop there need to be
-	 * continuously new fence signaled ie amdgpu_fence_read needs
-	 * to return a different value each time for both the currently
-	 * polling process and the other process that xchg the last_seq
-	 * btw atomic read and xchg of the current process. And the
-	 * value the other process set as last seq must be higher than
-	 * the seq value we just read. Which means that current process
-	 * need to be interrupted after amdgpu_fence_read and before
-	 * atomic xchg.
-	 *
-	 * To be even more safe we count the number of time we loop and
-	 * we bail after 10 loop just accepting the fact that we might
-	 * have temporarly set the last_seq not to the true real last
-	 * seq but to an older one.
-	 */
-	last_seq = atomic64_read(&ring->fence_drv.last_seq);
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	uint32_t seq, last_seq;
+	int r;
+
 	do {
-		last_emitted = ring->fence_drv.sync_seq;
+		last_seq = atomic_read(&ring->fence_drv.last_seq);
 		seq = amdgpu_fence_read(ring);
-		seq |= last_seq & 0xffffffff00000000LL;
-		if (seq < last_seq) {
-			seq &= 0xffffffff;
-			seq |= last_emitted & 0xffffffff00000000LL;
-		}
 
-		if (seq <= last_seq || seq > last_emitted) {
-			break;
-		}
-		/* If we loop over we don't want to return without
-		 * checking if a fence is signaled as it means that the
-		 * seq we just read is different from the previous on.
-		 */
-		wake = true;
-		last_seq = seq;
-		if ((count_loop++) > 10) {
-			/* We looped over too many time leave with the
-			 * fact that we might have set an older fence
-			 * seq then the current real last seq as signaled
-			 * by the hw.
-			 */
-			break;
-		}
-	} while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
+	} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
 
-	if (seq < last_emitted)
+	if (seq != ring->fence_drv.sync_seq)
 		amdgpu_fence_schedule_fallback(ring);
 
-	return wake;
-}
+	while (last_seq != seq) {
+		struct fence *fence, **ptr;
 
-/**
- * amdgpu_fence_process - process a fence
- *
- * @adev: amdgpu_device pointer
- * @ring: ring index the fence is associated with
- *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
- */
-void amdgpu_fence_process(struct amdgpu_ring *ring)
-{
-	if (amdgpu_fence_activity(ring))
-		wake_up_all(&ring->fence_drv.fence_queue);
+		ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+
+		/* There is always exactly one thread signaling this fence slot */
+		fence = rcu_dereference_protected(*ptr, 1);
+		rcu_assign_pointer(*ptr, NULL);
+
+		BUG_ON(!fence);
+
+		r = fence_signal(fence);
+		if (!r)
+			FENCE_TRACE(fence, "signaled from irq context\n");
+		else
+			BUG();
+
+		fence_put(fence);
+	}
 }
 
 /**
@@ -234,77 +227,6 @@ static void amdgpu_fence_fallback(unsigned long arg)
 }
 
 /**
- * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
- *
- * @ring: ring the fence is associated with
- * @seq: sequence number
- *
- * Check if the last signaled fence sequnce number is >= the requested
- * sequence number (all asics).
- * Returns true if the fence has signaled (current fence value
- * is >= requested value) or false if it has not (current fence
- * value is < the requested value.  Helper function for
- * amdgpu_fence_signaled().
- */
-static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
-{
-	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
-		return true;
-
-	/* poll new last sequence at least once */
-	amdgpu_fence_process(ring);
-	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
-		return true;
-
-	return false;
-}
-
-/*
- * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
- * @ring: ring to wait on for the seq number
- * @seq: seq number wait for
- *
- * return value:
- * 0: seq signaled, and gpu not hang
- * -EINVAL: some paramter is not valid
- */
-static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
-{
-	BUG_ON(!ring);
-	if (seq > ring->fence_drv.sync_seq)
-		return -EINVAL;
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
-		return 0;
-
-	amdgpu_fence_schedule_fallback(ring);
-	wait_event(ring->fence_drv.fence_queue,
-		   amdgpu_fence_seq_signaled(ring, seq));
-
-	return 0;
-}
-
-/**
- * amdgpu_fence_wait_next - wait for the next fence to signal
- *
- * @adev: amdgpu device pointer
- * @ring: ring index the fence is associated with
- *
- * Wait for the next fence on the requested ring to signal (all asics).
- * Returns 0 if the next fence has passed, error for all other cases.
- * Caller must hold ring lock.
- */
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
-{
-	uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
-
-	if (seq >= ring->fence_drv.sync_seq)
-		return -ENOENT;
-
-	return amdgpu_fence_ring_wait_seq(ring, seq);
-}
-
-/**
  * amdgpu_fence_wait_empty - wait for all fences to signal
  *
  * @adev: amdgpu device pointer
@@ -312,16 +234,28 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
  *
  * Wait for all fences on the requested ring to signal (all asics).
  * Returns 0 if the fences have passed, error for all other cases.
- * Caller must hold ring lock.
  */
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 {
-	uint64_t seq = ring->fence_drv.sync_seq;
+	uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
+	struct fence *fence, **ptr;
+	int r;
 
 	if (!seq)
 		return 0;
 
-	return amdgpu_fence_ring_wait_seq(ring, seq);
+	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+	rcu_read_lock();
+	fence = rcu_dereference(*ptr);
+	if (!fence || !fence_get_rcu(fence)) {
+		rcu_read_unlock();
+		return 0;
+	}
+	rcu_read_unlock();
+
+	r = fence_wait(fence, false);
+	fence_put(fence);
+	return r;
 }
 
 /**
@@ -341,13 +275,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
 	 * but it's ok to report slightly wrong fence count here.
 	 */
 	amdgpu_fence_process(ring);
-	emitted = ring->fence_drv.sync_seq
-		- atomic64_read(&ring->fence_drv.last_seq);
-	/* to avoid 32bits warp around */
-	if (emitted > 0x10000000)
-		emitted = 0x10000000;
-
-	return (unsigned)emitted;
+	emitted = 0x100000000ull;
+	emitted -= atomic_read(&ring->fence_drv.last_seq);
+	emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
+	return lower_32_bits(emitted);
 }
 
 /**
@@ -379,7 +310,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 		ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
 		ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
 	}
-	amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
+	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
 	amdgpu_irq_get(adev, irq_src, irq_type);
 
 	ring->fence_drv.irq_src = irq_src;
@@ -397,25 +328,36 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
  * for the requested ring.
  *
  * @ring: ring to init the fence driver on
+ * @num_hw_submission: number of entries on the hardware queue
  *
  * Init the fence driver for the requested ring (all asics).
  * Helper function for amdgpu_fence_driver_init().
  */
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+				  unsigned num_hw_submission)
 {
 	long timeout;
 	int r;
 
+	/* Check that num_hw_submission is a power of two */
+	if ((num_hw_submission & (num_hw_submission - 1)) != 0)
+		return -EINVAL;
+
 	ring->fence_drv.cpu_addr = NULL;
 	ring->fence_drv.gpu_addr = 0;
 	ring->fence_drv.sync_seq = 0;
-	atomic64_set(&ring->fence_drv.last_seq, 0);
+	atomic_set(&ring->fence_drv.last_seq, 0);
 	ring->fence_drv.initialized = false;
 
 	setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
 		    (unsigned long)ring);
 
-	init_waitqueue_head(&ring->fence_drv.fence_queue);
+	ring->fence_drv.num_fences_mask = num_hw_submission - 1;
+	spin_lock_init(&ring->fence_drv.lock);
+	ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
+					 GFP_KERNEL);
+	if (!ring->fence_drv.fences)
+		return -ENOMEM;
 
 	timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
 	if (timeout == 0) {
@@ -429,7 +371,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
 		timeout = MAX_SCHEDULE_TIMEOUT;
 	}
 	r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
-			   amdgpu_sched_hw_submission,
+			   num_hw_submission,
 			   timeout, ring->name);
 	if (r) {
 		DRM_ERROR("Failed to create scheduler on ring %s.\n",
@@ -477,10 +419,9 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
  */
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 {
-	int i, r;
+	unsigned i, j;
+	int r;
 
-	if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
-		kmem_cache_destroy(amdgpu_fence_slab);
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
@@ -491,13 +432,18 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 			/* no need to trigger GPU reset as we are unloading */
 			amdgpu_fence_driver_force_completion(adev);
 		}
-		wake_up_all(&ring->fence_drv.fence_queue);
 		amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
 		amd_sched_fini(&ring->sched);
 		del_timer_sync(&ring->fence_drv.fallback_timer);
+		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+			fence_put(ring->fence_drv.fences[i]);
+		kfree(ring->fence_drv.fences);
 		ring->fence_drv.initialized = false;
 	}
+
+	if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+		kmem_cache_destroy(amdgpu_fence_slab);
 }
 
 /**
@@ -594,103 +540,57 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f)
 }
 
 /**
- * amdgpu_fence_is_signaled - test if fence is signaled
- *
- * @f: fence to test
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
  *
- * Test the fence sequence number if it is already signaled. If it isn't
- * signaled start fence processing. Returns True if the fence is signaled.
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
  */
-static bool amdgpu_fence_is_signaled(struct fence *f)
+static bool amdgpu_fence_enable_signaling(struct fence *f)
 {
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	struct amdgpu_ring *ring = fence->ring;
 
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return true;
-
-	amdgpu_fence_process(ring);
+	if (!timer_pending(&ring->fence_drv.fallback_timer))
+		amdgpu_fence_schedule_fallback(ring);
 
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return true;
+	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
 
-	return false;
+	return true;
 }
 
 /**
- * amdgpu_fence_check_signaled - callback from fence_queue
+ * amdgpu_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
  *
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
+ * Free up the fence memory after the RCU grace period.
  */
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static void amdgpu_fence_free(struct rcu_head *rcu)
 {
-	struct amdgpu_fence *fence;
-	struct amdgpu_device *adev;
-	u64 seq;
-	int ret;
-
-	fence = container_of(wait, struct amdgpu_fence, fence_wake);
-	adev = fence->ring->adev;
-
-	/*
-	 * We cannot use amdgpu_fence_process here because we're already
-	 * in the waitqueue, in a call from wake_up_all.
-	 */
-	seq = atomic64_read(&fence->ring->fence_drv.last_seq);
-	if (seq >= fence->seq) {
-		ret = fence_signal_locked(&fence->base);
-		if (!ret)
-			FENCE_TRACE(&fence->base, "signaled from irq context\n");
-		else
-			FENCE_TRACE(&fence->base, "was already signaled\n");
-
-		__remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
-		fence_put(&fence->base);
-	} else
-		FENCE_TRACE(&fence->base, "pending\n");
-	return 0;
+	struct fence *f = container_of(rcu, struct fence, rcu);
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	kmem_cache_free(amdgpu_fence_slab, fence);
 }
 
 /**
- * amdgpu_fence_enable_signaling - enable signalling on fence
+ * amdgpu_fence_release - callback that fence can be freed
+ *
  * @fence: fence
  *
- * This function is called with fence_queue lock held, and adds a callback
- * to fence_queue that checks if this fence is signaled, and if so it
- * signals the fence and removes itself.
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
  */
-static bool amdgpu_fence_enable_signaling(struct fence *f)
-{
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	struct amdgpu_ring *ring = fence->ring;
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return false;
-
-	fence->fence_wake.flags = 0;
-	fence->fence_wake.private = NULL;
-	fence->fence_wake.func = amdgpu_fence_check_signaled;
-	__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
-	fence_get(f);
-	if (!timer_pending(&ring->fence_drv.fallback_timer))
-		amdgpu_fence_schedule_fallback(ring);
-	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
-	return true;
-}
-
 static void amdgpu_fence_release(struct fence *f)
 {
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	kmem_cache_free(amdgpu_fence_slab, fence);
+	call_rcu(&f->rcu, amdgpu_fence_free);
 }
 
-const struct fence_ops amdgpu_fence_ops = {
+static const struct fence_ops amdgpu_fence_ops = {
 	.get_driver_name = amdgpu_fence_get_driver_name,
 	.get_timeline_name = amdgpu_fence_get_timeline_name,
 	.enable_signaling = amdgpu_fence_enable_signaling,
-	.signaled = amdgpu_fence_is_signaled,
 	.wait = fence_default_wait,
 	.release = amdgpu_fence_release,
 };
@@ -714,9 +614,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 		amdgpu_fence_process(ring);
 
 		seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
-		seq_printf(m, "Last signaled fence 0x%016llx\n",
-			   (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
-		seq_printf(m, "Last emitted        0x%016llx\n",
+		seq_printf(m, "Last signaled fence 0x%08x\n",
+			   atomic_read(&ring->fence_drv.last_seq));
+		seq_printf(m, "Last emitted        0x%08x\n",
 			   ring->fence_drv.sync_seq);
 	}
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7a47c45b2131..fa6a27bff298 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -26,6 +26,7 @@
  *          Jerome Glisse
  */
 #include <linux/ktime.h>
+#include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -140,25 +141,40 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			     struct drm_file *file_priv)
 {
-	struct amdgpu_bo *rbo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = rbo->adev;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = bo->adev;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
+
+	struct amdgpu_bo_list_entry vm_pd;
+	struct list_head list, duplicates;
+	struct ttm_validate_buffer tv;
+	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
 	int r;
-	r = amdgpu_bo_reserve(rbo, true);
+
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
+
+	tv.bo = &bo->tbo;
+	tv.shared = true;
+	list_add(&tv.head, &list);
+
+	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
 		return;
 	}
-	bo_va = amdgpu_vm_bo_find(vm, rbo);
+	bo_va = amdgpu_vm_bo_find(vm, bo);
 	if (bo_va) {
 		if (--bo_va->ref_count == 0) {
 			amdgpu_vm_bo_rmv(adev, bo_va);
 		}
 	}
-	amdgpu_bo_unreserve(rbo);
+	ttm_eu_backoff_reservation(&ticket, &list);
 }
 
 static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
@@ -243,12 +259,10 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	    AMDGPU_GEM_USERPTR_REGISTER))
 		return -EINVAL;
 
-	if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
-	     !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
-	     !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
+	if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) &&
+	     !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
 
-		/* if we want to write to it we must require anonymous
-		   memory and install a MMU notifier */
+		/* if we want to write to it we must install a MMU notifier */
 		return -EACCES;
 	}
 
@@ -274,18 +288,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
 		down_read(&current->mm->mmap_sem);
+
+		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+						 bo->tbo.ttm->pages);
+		if (r)
+			goto unlock_mmap_sem;
+
 		r = amdgpu_bo_reserve(bo, true);
-		if (r) {
-			up_read(&current->mm->mmap_sem);
-			goto release_object;
-		}
+		if (r)
+			goto free_pages;
 
 		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		amdgpu_bo_unreserve(bo);
-		up_read(&current->mm->mmap_sem);
 		if (r)
-			goto release_object;
+			goto free_pages;
+
+		up_read(&current->mm->mmap_sem);
 	}
 
 	r = drm_gem_handle_create(filp, gobj, &handle);
@@ -297,6 +316,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	args->handle = handle;
 	return 0;
 
+free_pages:
+	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);
+
+unlock_mmap_sem:
+	up_read(&current->mm->mmap_sem);
+
 release_object:
 	drm_gem_object_unreference_unlocked(gobj);
 
@@ -569,11 +594,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	tv.shared = true;
 	list_add(&tv.head, &list);
 
-	if (args->operation == AMDGPU_VA_OP_MAP) {
-		tv_pd.bo = &fpriv->vm.page_directory->tbo;
-		tv_pd.shared = true;
-		list_add(&tv_pd.head, &list);
-	}
+	tv_pd.bo = &fpriv->vm.page_directory->tbo;
+	tv_pd.shared = true;
+	list_add(&tv_pd.head, &list);
+
 	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r) {
 		drm_gem_object_unreference_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index db14a7bbb8f4..8443cea6821a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -85,14 +85,13 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  *
  * @adev: amdgpu_device pointer
  * @ib: IB object to free
+ * @f: the fence SA bo need wait on for the ib alloation
  *
  * Free an IB (all asics).
  */
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f)
 {
-	amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
-	if (ib->fence)
-		fence_put(&ib->fence->base);
+	amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
 }
 
 /**
@@ -101,7 +100,6 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
  * @adev: amdgpu_device pointer
  * @num_ibs: number of IBs to schedule
  * @ibs: IB objects to schedule
- * @owner: owner for creating the fences
  * @f: fence created during this submission
  *
  * Schedule an IB on the associated ring (all asics).
@@ -118,14 +116,14 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
  * to SI there was just a DE IB.
  */
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
-		       struct amdgpu_ib *ibs, void *owner,
-		       struct fence *last_vm_update,
+		       struct amdgpu_ib *ibs, struct fence *last_vm_update,
 		       struct fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
 	struct amdgpu_ctx *ctx, *old_ctx;
 	struct amdgpu_vm *vm;
+	struct fence *hwf;
 	unsigned i;
 	int r = 0;
 
@@ -153,13 +151,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
 	if (vm) {
 		/* do context switch */
-		amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr);
-
-		if (ring->funcs->emit_gds_switch)
-			amdgpu_ring_emit_gds_switch(ring, ib->vm_id,
-						    ib->gds_base, ib->gds_size,
-						    ib->gws_base, ib->gws_size,
-						    ib->oa_base, ib->oa_size);
+		amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
+				ib->gds_base, ib->gds_size,
+				ib->gws_base, ib->gws_size,
+				ib->oa_base, ib->oa_size);
 
 		if (ring->funcs->emit_hdp_flush)
 			amdgpu_ring_emit_hdp_flush(ring);
@@ -171,6 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
 		if (ib->ctx != ctx || ib->vm != vm) {
 			ring->current_ctx = old_ctx;
+			if (ib->vm_id)
+				amdgpu_vm_reset_id(adev, ib->vm_id);
 			amdgpu_ring_undo(ring);
 			return -EINVAL;
 		}
@@ -178,10 +175,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		ring->current_ctx = ctx;
 	}
 
-	r = amdgpu_fence_emit(ring, owner, &ib->fence);
+	if (vm) {
+		if (ring->funcs->emit_hdp_invalidate)
+			amdgpu_ring_emit_hdp_invalidate(ring);
+	}
+
+	r = amdgpu_fence_emit(ring, &hwf);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		ring->current_ctx = old_ctx;
+		if (ib->vm_id)
+			amdgpu_vm_reset_id(adev, ib->vm_id);
 		amdgpu_ring_undo(ring);
 		return r;
 	}
@@ -195,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	}
 
 	if (f)
-		*f = fence_get(&ib->fence->base);
+		*f = fence_get(hwf);
 
 	amdgpu_ring_commit(ring);
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index f594cfaa97e5..762cfdb85147 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -219,6 +219,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 	if (r) {
 		return r;
 	}
+	adev->ddev->vblank_disable_allowed = true;
+
 	/* enable msi */
 	adev->irq.msi_enabled = false;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 90e52f7e17a0..9c9b19e2f353 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -70,9 +70,13 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 void amdgpu_job_free(struct amdgpu_job *job)
 {
 	unsigned i;
+	struct fence *f;
+	/* use sched fence if available */
+	f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
 
 	for (i = 0; i < job->num_ibs; ++i)
-		amdgpu_ib_free(job->adev, &job->ibs[i]);
+		amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
+	fence_put(job->fence);
 
 	amdgpu_bo_unref(&job->uf.bo);
 	amdgpu_sync_free(&job->sync);
@@ -148,7 +152,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	}
 
 	trace_amdgpu_sched_run_job(job);
-	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner,
+	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
 			       job->sync.last_vm_update, &fence);
 	if (r) {
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
@@ -156,6 +160,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	}
 
 err:
+	job->fence = fence;
 	amdgpu_job_free(job);
 	return fence;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 7805a8706af7..598eb0cd5aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -382,6 +382,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		struct drm_amdgpu_info_vram_gtt vram_gtt;
 
 		vram_gtt.vram_size = adev->mc.real_vram_size;
+		vram_gtt.vram_size -= adev->vram_pin_size;
 		vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size;
 		vram_gtt.vram_cpu_accessible_size -= adev->vram_pin_size;
 		vram_gtt.gtt_size  = adev->mc.gtt_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index d7ec9bd6755f..9f4a45cd2aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -48,7 +48,8 @@ struct amdgpu_mn {
 	/* protected by adev->mn_lock */
 	struct hlist_node	node;
 
-	/* objects protected by mm->mmap_sem */
+	/* objects protected by lock */
+	struct mutex		lock;
 	struct rb_root		objects;
 };
 
@@ -72,7 +73,7 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	struct amdgpu_bo *bo, *next_bo;
 
 	mutex_lock(&adev->mn_lock);
-	down_write(&rmn->mm->mmap_sem);
+	mutex_lock(&rmn->lock);
 	hash_del(&rmn->node);
 	rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
 					     it.rb) {
@@ -82,7 +83,7 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 		}
 		kfree(node);
 	}
-	up_write(&rmn->mm->mmap_sem);
+	mutex_unlock(&rmn->lock);
 	mutex_unlock(&adev->mn_lock);
 	mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
 	kfree(rmn);
@@ -105,6 +106,76 @@ static void amdgpu_mn_release(struct mmu_notifier *mn,
 }
 
 /**
+ * amdgpu_mn_invalidate_node - unmap all BOs of a node
+ *
+ * @node: the node with the BOs to unmap
+ *
+ * We block for all BOs and unmap them by move them
+ * into system domain again.
+ */
+static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
+				      unsigned long start,
+				      unsigned long end)
+{
+	struct amdgpu_bo *bo;
+	long r;
+
+	list_for_each_entry(bo, &node->bos, mn_list) {
+
+		if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
+			continue;
+
+		r = amdgpu_bo_reserve(bo, true);
+		if (r) {
+			DRM_ERROR("(%ld) failed to reserve user bo\n", r);
+			continue;
+		}
+
+		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+			true, false, MAX_SCHEDULE_TIMEOUT);
+		if (r <= 0)
+			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (r)
+			DRM_ERROR("(%ld) failed to validate user bo\n", r);
+
+		amdgpu_bo_unreserve(bo);
+	}
+}
+
+/**
+ * amdgpu_mn_invalidate_page - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @address: address of invalidate page
+ *
+ * Invalidation of a single page. Blocks for all BOs mapping it
+ * and unmap them by move them into system domain again.
+ */
+static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
+				      struct mm_struct *mm,
+				      unsigned long address)
+{
+	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct interval_tree_node *it;
+
+	mutex_lock(&rmn->lock);
+
+	it = interval_tree_iter_first(&rmn->objects, address, address);
+	if (it) {
+		struct amdgpu_mn_node *node;
+
+		node = container_of(it, struct amdgpu_mn_node, it);
+		amdgpu_mn_invalidate_node(node, address, address);
+	}
+
+	mutex_unlock(&rmn->lock);
+}
+
+/**
  * amdgpu_mn_invalidate_range_start - callback to notify about mm change
  *
  * @mn: our notifier
@@ -126,44 +197,24 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
+	mutex_lock(&rmn->lock);
+
 	it = interval_tree_iter_first(&rmn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
-		struct amdgpu_bo *bo;
-		long r;
 
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
-		list_for_each_entry(bo, &node->bos, mn_list) {
-
-			if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
-							  end))
-				continue;
-
-			r = amdgpu_bo_reserve(bo, true);
-			if (r) {
-				DRM_ERROR("(%ld) failed to reserve user bo\n", r);
-				continue;
-			}
-
-			r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
-				true, false, MAX_SCHEDULE_TIMEOUT);
-			if (r <= 0)
-				DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
-			amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
-			r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
-			if (r)
-				DRM_ERROR("(%ld) failed to validate user bo\n", r);
-
-			amdgpu_bo_unreserve(bo);
-		}
+		amdgpu_mn_invalidate_node(node, start, end);
 	}
+
+	mutex_unlock(&rmn->lock);
 }
 
 static const struct mmu_notifier_ops amdgpu_mn_ops = {
 	.release = amdgpu_mn_release,
+	.invalidate_page = amdgpu_mn_invalidate_page,
 	.invalidate_range_start = amdgpu_mn_invalidate_range_start,
 };
 
@@ -196,6 +247,7 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	rmn->adev = adev;
 	rmn->mm = mm;
 	rmn->mn.ops = &amdgpu_mn_ops;
+	mutex_init(&rmn->lock);
 	rmn->objects = RB_ROOT;
 
 	r = __mmu_notifier_register(&rmn->mn, mm);
@@ -242,7 +294,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 
 	INIT_LIST_HEAD(&bos);
 
-	down_write(&rmn->mm->mmap_sem);
+	mutex_lock(&rmn->lock);
 
 	while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
 		kfree(node);
@@ -256,7 +308,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	if (!node) {
 		node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
 		if (!node) {
-			up_write(&rmn->mm->mmap_sem);
+			mutex_unlock(&rmn->lock);
 			return -ENOMEM;
 		}
 	}
@@ -271,7 +323,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 
 	interval_tree_insert(&node->it, &rmn->objects);
 
-	up_write(&rmn->mm->mmap_sem);
+	mutex_unlock(&rmn->lock);
 
 	return 0;
 }
@@ -297,7 +349,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 		return;
 	}
 
-	down_write(&rmn->mm->mmap_sem);
+	mutex_lock(&rmn->lock);
 
 	/* save the next list entry for later */
 	head = bo->mn_list.next;
@@ -312,6 +364,6 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 		kfree(node);
 	}
 
-	up_write(&rmn->mm->mmap_sem);
+	mutex_unlock(&rmn->lock);
 	mutex_unlock(&adev->mn_lock);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9a025a77958d..5b6639faa731 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -308,7 +308,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 {
 	bool is_iomem;
-	int r;
+	long r;
 
 	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
 		return -EPERM;
@@ -319,14 +319,20 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 		}
 		return 0;
 	}
+
+	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
+						MAX_SCHEDULE_TIMEOUT);
+	if (r < 0)
+		return r;
+
 	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
-	if (r) {
+	if (r)
 		return r;
-	}
+
 	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
-	if (ptr) {
+	if (ptr)
 		*ptr = bo->kptr;
-	}
+
 	return 0;
 }
 
@@ -470,6 +476,17 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
 	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
 }
 
+static const char *amdgpu_vram_names[] = {
+	"UNKNOWN",
+	"GDDR1",
+	"DDR2",
+	"GDDR3",
+	"GDDR4",
+	"GDDR5",
+	"HBM",
+	"DDR3"
+};
+
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
 	/* Add an MTRR for the VRAM */
@@ -478,8 +495,8 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
 	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
 		adev->mc.mc_vram_size >> 20,
 		(unsigned long long)adev->mc.aper_size >> 20);
-	DRM_INFO("RAM width %dbits DDR\n",
-			adev->mc.vram_width);
+	DRM_INFO("RAM width %dbits %s\n",
+		 adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
 	return amdgpu_ttm_init(adev);
 }
 
@@ -602,6 +619,10 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	if ((offset + size) <= adev->mc.visible_vram_size)
 		return 0;
 
+	/* Can't move a pinned BO to visible VRAM */
+	if (abo->pin_count > 0)
+		return -EINVAL;
+
 	/* hurrah the memory is not visible ! */
 	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 3cb6d6c413c7..e9c6ae6ed2f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -143,7 +143,7 @@ static int amdgpu_pp_late_init(void *handle)
 					adev->powerplay.pp_handle);
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled) {
+	if (adev->pp_enabled && adev->pm.dpm_enabled) {
 		amdgpu_pm_sysfs_init(adev);
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL);
 	}
@@ -161,12 +161,8 @@ static int amdgpu_pp_sw_init(void *handle)
 					adev->powerplay.pp_handle);
 
 #ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled) {
-		if (amdgpu_dpm == 0)
-			adev->pm.dpm_enabled = false;
-		else
-			adev->pm.dpm_enabled = true;
-	}
+	if (adev->pp_enabled)
+		adev->pm.dpm_enabled = true;
 #endif
 
 	return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 56c07e3fdb33..972eed2ef787 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -236,7 +236,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		ring->adev = adev;
 		ring->idx = adev->num_rings++;
 		adev->rings[ring->idx] = ring;
-		r = amdgpu_fence_driver_init_ring(ring);
+		r = amdgpu_fence_driver_init_ring(ring,
+			amdgpu_sched_hw_submission);
 		if (r)
 			return r;
 	}
@@ -352,30 +353,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	}
 }
 
-/**
- * amdgpu_ring_from_fence - get ring from fence
- *
- * @f: fence structure
- *
- * Extract the ring a fence belongs to. Handles both scheduler as
- * well as hardware fences.
- */
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
-{
-	struct amdgpu_fence *a_fence;
-	struct amd_sched_fence *s_fence;
-
-	s_fence = to_amd_sched_fence(f);
-	if (s_fence)
-		return container_of(s_fence->sched, struct amdgpu_ring, sched);
-
-	a_fence = to_amdgpu_fence(f);
-	if (a_fence)
-		return a_fence->ring;
-
-	return NULL;
-}
-
 /*
  * Debugfs info
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 2faf03bcda21..8bf84efafb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -60,9 +60,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
 	sa_manager->align = align;
 	sa_manager->hole = &sa_manager->olist;
 	INIT_LIST_HEAD(&sa_manager->olist);
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 		INIT_LIST_HEAD(&sa_manager->flist[i]);
-	}
 
 	r = amdgpu_bo_create(adev, size, align, true, domain,
 			     0, NULL, NULL, &sa_manager->bo);
@@ -228,11 +227,9 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
 	unsigned soffset, eoffset, wasted;
 	int i;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		if (!list_empty(&sa_manager->flist[i])) {
+	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
+		if (!list_empty(&sa_manager->flist[i]))
 			return true;
-		}
-	}
 
 	soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
 	eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
@@ -265,12 +262,11 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 	/* go over all fence list and try to find the closest sa_bo
 	 * of the current last
 	 */
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
 		struct amdgpu_sa_bo *sa_bo;
 
-		if (list_empty(&sa_manager->flist[i])) {
+		if (list_empty(&sa_manager->flist[i]))
 			continue;
-		}
 
 		sa_bo = list_first_entry(&sa_manager->flist[i],
 					 struct amdgpu_sa_bo, flist);
@@ -299,7 +295,9 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 	}
 
 	if (best_bo) {
-		uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx;
+		uint32_t idx = best_bo->fence->context;
+
+		idx %= AMDGPU_SA_NUM_FENCE_LISTS;
 		++tries[idx];
 		sa_manager->hole = best_bo->olist.prev;
 
@@ -315,8 +313,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     struct amdgpu_sa_bo **sa_bo,
 		     unsigned size, unsigned align)
 {
-	struct fence *fences[AMDGPU_MAX_RINGS];
-	unsigned tries[AMDGPU_MAX_RINGS];
+	struct fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
+	unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
 	unsigned count;
 	int i, r;
 	signed long t;
@@ -338,7 +336,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 
 	spin_lock(&sa_manager->wq.lock);
 	do {
-		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
 			fences[i] = NULL;
 			tries[i] = 0;
 		}
@@ -355,7 +353,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 			/* see if we can skip over some allocations */
 		} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
 
-		for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
+		for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 			if (fences[i])
 				fences[count++] = fence_get(fences[i]);
 
@@ -397,8 +395,9 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
 	spin_lock(&sa_manager->wq.lock);
 	if (fence && !fence_is_signaled(fence)) {
 		uint32_t idx;
+
 		(*sa_bo)->fence = fence_get(fence);
-		idx = amdgpu_ring_from_fence(fence)->idx;
+		idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
 		list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
 	} else {
 		amdgpu_sa_bo_remove_locked(*sa_bo);
@@ -410,25 +409,6 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
 
 #if defined(CONFIG_DEBUG_FS)
 
-static void amdgpu_sa_bo_dump_fence(struct fence *fence, struct seq_file *m)
-{
-	struct amdgpu_fence *a_fence = to_amdgpu_fence(fence);
-	struct amd_sched_fence *s_fence = to_amd_sched_fence(fence);
-
-	if (a_fence)
-		seq_printf(m, " protected by 0x%016llx on ring %d",
-			   a_fence->seq, a_fence->ring->idx);
-
-	if (s_fence) {
-		struct amdgpu_ring *ring;
-
-
-		ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
-		seq_printf(m, " protected by 0x%016x on ring %d",
-			   s_fence->base.seqno, ring->idx);
-	}
-}
-
 void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 				  struct seq_file *m)
 {
@@ -445,8 +425,11 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 		}
 		seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
 			   soffset, eoffset, eoffset - soffset);
+
 		if (i->fence)
-			amdgpu_sa_bo_dump_fence(i->fence, m);
+			seq_printf(m, " protected by 0x%08x on context %d",
+				   i->fence->seqno, i->fence->context);
+
 		seq_printf(m, "\n");
 	}
 	spin_unlock(&sa_manager->wq.lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index c15be00de904..c48b4fce5e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -37,6 +37,8 @@ struct amdgpu_sync_entry {
 	struct fence		*fence;
 };
 
+static struct kmem_cache *amdgpu_sync_slab;
+
 /**
  * amdgpu_sync_create - zero init sync object
  *
@@ -50,14 +52,18 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
 	sync->last_vm_update = NULL;
 }
 
+/**
+ * amdgpu_sync_same_dev - test if fence belong to us
+ *
+ * @adev: amdgpu device to use for the test
+ * @f: fence to test
+ *
+ * Test if the fence was issued by us.
+ */
 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
 {
-	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
 
-	if (a_fence)
-		return a_fence->ring->adev == adev;
-
 	if (s_fence) {
 		struct amdgpu_ring *ring;
 
@@ -68,17 +74,31 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
 	return false;
 }
 
-static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
+/**
+ * amdgpu_sync_get_owner - extract the owner of a fence
+ *
+ * @fence: fence get the owner from
+ *
+ * Extract who originally created the fence.
+ */
+static void *amdgpu_sync_get_owner(struct fence *f)
 {
-	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
 	if (s_fence)
-		return s_fence->owner == owner;
-	if (a_fence)
-		return a_fence->owner == owner;
-	return false;
+		return s_fence->owner;
+
+	return AMDGPU_FENCE_OWNER_UNDEFINED;
 }
 
+/**
+ * amdgpu_sync_keep_later - Keep the later fence
+ *
+ * @keep: existing fence to test
+ * @fence: new fence
+ *
+ * Either keep the existing fence or the new one, depending which one is later.
+ */
 static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
 {
 	if (*keep && fence_is_later(*keep, fence))
@@ -104,7 +124,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		return 0;
 
 	if (amdgpu_sync_same_dev(adev, f) &&
-	    amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
+	    amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
 		amdgpu_sync_keep_later(&sync->last_vm_update, f);
 
 	hash_for_each_possible(sync->fences, e, node, f->context) {
@@ -115,7 +135,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		return 0;
 	}
 
-	e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
+	e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
 	if (!e)
 		return -ENOMEM;
 
@@ -124,18 +144,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 	return 0;
 }
 
-static void *amdgpu_sync_get_owner(struct fence *f)
-{
-	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
-	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
-
-	if (s_fence)
-		return s_fence->owner;
-	else if (a_fence)
-		return a_fence->owner;
-	return AMDGPU_FENCE_OWNER_UNDEFINED;
-}
-
 /**
  * amdgpu_sync_resv - sync to a reservation object
  *
@@ -208,7 +216,7 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 		f = e->fence;
 
 		hash_del(&e->node);
-		kfree(e);
+		kmem_cache_free(amdgpu_sync_slab, e);
 
 		if (!fence_is_signaled(f))
 			return f;
@@ -231,7 +239,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
 
 		hash_del(&e->node);
 		fence_put(e->fence);
-		kfree(e);
+		kmem_cache_free(amdgpu_sync_slab, e);
 	}
 
 	return 0;
@@ -253,8 +261,34 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		hash_del(&e->node);
 		fence_put(e->fence);
-		kfree(e);
+		kmem_cache_free(amdgpu_sync_slab, e);
 	}
 
 	fence_put(sync->last_vm_update);
 }
+
+/**
+ * amdgpu_sync_init - init sync object subsystem
+ *
+ * Allocate the slab allocator.
+ */
+int amdgpu_sync_init(void)
+{
+	amdgpu_sync_slab = kmem_cache_create(
+		"amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
+		SLAB_HWCACHE_ALIGN, NULL);
+	if (!amdgpu_sync_slab)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * amdgpu_sync_fini - fini sync object subsystem
+ *
+ * Free the slab allocator.
+ */
+void amdgpu_sync_fini(void)
+{
+	kmem_cache_destroy(amdgpu_sync_slab);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 9ccdd189d717..6f3369de232f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -384,9 +384,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 			struct ttm_mem_reg *new_mem)
 {
 	struct amdgpu_device *adev;
+	struct amdgpu_bo *abo;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int r;
 
+	/* Can't move a pinned BO */
+	abo = container_of(bo, struct amdgpu_bo, tbo);
+	if (WARN_ON_ONCE(abo->pin_count > 0))
+		return -EINVAL;
+
 	adev = amdgpu_get_adev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		amdgpu_move_null(bo, new_mem);
@@ -494,29 +500,32 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 /*
  * TTM backend functions.
  */
+struct amdgpu_ttm_gup_task_list {
+	struct list_head	list;
+	struct task_struct	*task;
+};
+
 struct amdgpu_ttm_tt {
-	struct ttm_dma_tt		ttm;
-	struct amdgpu_device		*adev;
-	u64				offset;
-	uint64_t			userptr;
-	struct mm_struct		*usermm;
-	uint32_t			userflags;
+	struct ttm_dma_tt	ttm;
+	struct amdgpu_device	*adev;
+	u64			offset;
+	uint64_t		userptr;
+	struct mm_struct	*usermm;
+	uint32_t		userflags;
+	spinlock_t              guptasklock;
+	struct list_head        guptasks;
+	atomic_t		mmu_invalidations;
 };
 
-/* prepare the sg table with the user pages */
-static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
-	struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	unsigned pinned = 0, nents;
-	int r;
-
 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
-	enum dma_data_direction direction = write ?
-		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	unsigned pinned = 0;
+	int r;
 
 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
-		/* check that we only pin down anonymous memory
+		/* check that we only use anonymous memory
 		   to prevent problems with writeback */
 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
 		struct vm_area_struct *vma;
@@ -529,10 +538,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 	do {
 		unsigned num_pages = ttm->num_pages - pinned;
 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
-		struct page **pages = ttm->pages + pinned;
+		struct page **p = pages + pinned;
+		struct amdgpu_ttm_gup_task_list guptask;
+
+		guptask.task = current;
+		spin_lock(&gtt->guptasklock);
+		list_add(&guptask.list, &gtt->guptasks);
+		spin_unlock(&gtt->guptasklock);
+
+		r = get_user_pages(userptr, num_pages, write, 0, p, NULL);
+
+		spin_lock(&gtt->guptasklock);
+		list_del(&guptask.list);
+		spin_unlock(&gtt->guptasklock);
 
-		r = get_user_pages(current, current->mm, userptr, num_pages,
-				   write, 0, pages, NULL);
 		if (r < 0)
 			goto release_pages;
 
@@ -540,6 +559,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 
 	} while (pinned < ttm->num_pages);
 
+	return 0;
+
+release_pages:
+	release_pages(pages, pinned, 0);
+	return r;
+}
+
+/* prepare the sg table with the user pages */
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+{
+	struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	unsigned nents;
+	int r;
+
+	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+	enum dma_data_direction direction = write ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
 				      ttm->num_pages << PAGE_SHIFT,
 				      GFP_KERNEL);
@@ -558,9 +596,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 
 release_sg:
 	kfree(ttm->sg);
-
-release_pages:
-	release_pages(ttm->pages, pinned, 0);
 	return r;
 }
 
@@ -587,7 +622,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	sg_free_table(ttm->sg);
@@ -783,6 +818,10 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 	gtt->userptr = addr;
 	gtt->usermm = current->mm;
 	gtt->userflags = flags;
+	spin_lock_init(&gtt->guptasklock);
+	INIT_LIST_HEAD(&gtt->guptasks);
+	atomic_set(&gtt->mmu_invalidations, 0);
+
 	return 0;
 }
 
@@ -800,21 +839,40 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	struct amdgpu_ttm_gup_task_list *entry;
 	unsigned long size;
 
-	if (gtt == NULL)
-		return false;
-
-	if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr)
+	if (gtt == NULL || !gtt->userptr)
 		return false;
 
 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 
+	spin_lock(&gtt->guptasklock);
+	list_for_each_entry(entry, &gtt->guptasks, list) {
+		if (entry->task == current) {
+			spin_unlock(&gtt->guptasklock);
+			return false;
+		}
+	}
+	spin_unlock(&gtt->guptasklock);
+
+	atomic_inc(&gtt->mmu_invalidations);
+
 	return true;
 }
 
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+				       int *last_invalidated)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	int prev_invalidated = *last_invalidated;
+
+	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
+	return prev_invalidated != *last_invalidated;
+}
+
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 1de82bf4fc79..338da80006b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -241,32 +241,28 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 
 int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring = &adev->uvd.ring;
-	int i, r;
+	unsigned size;
+	void *ptr;
+	int i;
 
 	if (adev->uvd.vcpu_bo == NULL)
 		return 0;
 
-	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
-		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
-		if (handle != 0) {
-			struct fence *fence;
+	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i)
+		if (atomic_read(&adev->uvd.handles[i]))
+			break;
 
-			amdgpu_uvd_note_usage(adev);
+	if (i == AMDGPU_MAX_UVD_HANDLES)
+		return 0;
 
-			r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence);
-			if (r) {
-				DRM_ERROR("Error destroying UVD (%d)!\n", r);
-				continue;
-			}
+	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
+	ptr = adev->uvd.cpu_addr;
 
-			fence_wait(fence, false);
-			fence_put(fence);
+	adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
+	if (!adev->uvd.saved_bo)
+		return -ENOMEM;
 
-			adev->uvd.filp[i] = NULL;
-			atomic_set(&adev->uvd.handles[i], 0);
-		}
-	}
+	memcpy(adev->uvd.saved_bo, ptr, size);
 
 	return 0;
 }
@@ -275,23 +271,29 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
 {
 	unsigned size;
 	void *ptr;
-	const struct common_firmware_header *hdr;
-	unsigned offset;
 
 	if (adev->uvd.vcpu_bo == NULL)
 		return -EINVAL;
 
-	hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
-	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
-	memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
-		(adev->uvd.fw->size) - offset);
-
 	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
-	size -= le32_to_cpu(hdr->ucode_size_bytes);
 	ptr = adev->uvd.cpu_addr;
-	ptr += le32_to_cpu(hdr->ucode_size_bytes);
 
-	memset(ptr, 0, size);
+	if (adev->uvd.saved_bo != NULL) {
+		memcpy(ptr, adev->uvd.saved_bo, size);
+		kfree(adev->uvd.saved_bo);
+		adev->uvd.saved_bo = NULL;
+	} else {
+		const struct common_firmware_header *hdr;
+		unsigned offset;
+
+		hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+		offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+		memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
+			(adev->uvd.fw->size) - offset);
+		size -= le32_to_cpu(hdr->ucode_size_bytes);
+		ptr += le32_to_cpu(hdr->ucode_size_bytes);
+		memset(ptr, 0, size);
+	}
 
 	return 0;
 }
@@ -539,13 +541,6 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 		return -EINVAL;
 	}
 
-	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
-						MAX_SCHEDULE_TIMEOUT);
-	if (r < 0) {
-		DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r);
-		return r;
-	}
-
 	r = amdgpu_bo_kmap(bo, &ptr);
 	if (r) {
 		DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
@@ -886,8 +881,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	ib->length_dw = 16;
 
 	if (direct) {
-		r = amdgpu_ib_schedule(ring, 1, ib,
-				       AMDGPU_FENCE_OWNER_UNDEFINED, NULL, &f);
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		job->fence = f;
 		if (r)
 			goto err_free;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 39c3aa60381a..4bec0c108cea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -425,8 +425,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;
 
-	r = amdgpu_ib_schedule(ring, 1, ib, AMDGPU_FENCE_OWNER_UNDEFINED,
-			       NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+	job->fence = f;
 	if (r)
 		goto err;
 
@@ -487,9 +487,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 		ib->ptr[i] = 0x0;
 
 	if (direct) {
-		r = amdgpu_ib_schedule(ring, 1, ib,
-				       AMDGPU_FENCE_OWNER_UNDEFINED,
-				       NULL, &f);
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		job->fence = f;
 		if (r)
 			goto err;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d9dc8bea5e98..b6c011b83641 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -95,6 +95,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	entry->priority = 0;
 	entry->tv.bo = &vm->page_directory->tbo;
 	entry->tv.shared = true;
+	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
 }
 
@@ -188,6 +189,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (!is_later && owner == (long)id &&
 		    pd_addr == id->pd_gpu_addr) {
 
+			r = amdgpu_sync_fence(ring->adev, sync,
+					      id->mgr_id->active);
+			if (r) {
+				mutex_unlock(&adev->vm_manager.lock);
+				return r;
+			}
+
 			fence_put(id->mgr_id->active);
 			id->mgr_id->active = fence_get(fence);
 
@@ -234,19 +242,68 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
  * amdgpu_vm_flush - hardware flush the vm
  *
  * @ring: ring to use for flush
- * @vmid: vmid number to use
+ * @vm_id: vmid number to use
  * @pd_addr: address of the page directory
  *
  * Emit a VM flush when it is necessary.
  */
 void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     unsigned vmid,
-		     uint64_t pd_addr)
+		     unsigned vm_id, uint64_t pd_addr,
+		     uint32_t gds_base, uint32_t gds_size,
+		     uint32_t gws_base, uint32_t gws_size,
+		     uint32_t oa_base, uint32_t oa_size)
 {
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
+		mgr_id->gds_base != gds_base ||
+		mgr_id->gds_size != gds_size ||
+		mgr_id->gws_base != gws_base ||
+		mgr_id->gws_size != gws_size ||
+		mgr_id->oa_base != oa_base ||
+		mgr_id->oa_size != oa_size);
+
+	if (ring->funcs->emit_pipeline_sync && (
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
+		amdgpu_ring_emit_pipeline_sync(ring);
+
 	if (pd_addr != AMDGPU_VM_NO_FLUSH) {
-		trace_amdgpu_vm_flush(pd_addr, ring->idx, vmid);
-		amdgpu_ring_emit_vm_flush(ring, vmid, pd_addr);
+		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
+		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
 	}
+
+	if (gds_switch_needed) {
+		mgr_id->gds_base = gds_base;
+		mgr_id->gds_size = gds_size;
+		mgr_id->gws_base = gws_base;
+		mgr_id->gws_size = gws_size;
+		mgr_id->oa_base = oa_base;
+		mgr_id->oa_size = oa_size;
+		amdgpu_ring_emit_gds_switch(ring, vm_id,
+					    gds_base, gds_size,
+					    gws_base, gws_size,
+					    oa_base, oa_size);
+	}
+}
+
+/**
+ * amdgpu_vm_reset_id - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ * @vm_id: vmid number to use
+ *
+ * Reset saved GDW, GWS and OA to force switch on next flush.
+ */
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
+{
+	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+
+	mgr_id->gds_base = 0;
+	mgr_id->gds_size = 0;
+	mgr_id->gws_base = 0;
+	mgr_id->gws_size = 0;
+	mgr_id->oa_base = 0;
+	mgr_id->oa_size = 0;
 }
 
 /**
@@ -810,7 +867,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	while (start != mapping->it.last + 1) {
 		uint64_t last;
 
-		last = min((uint64_t)mapping->it.last, start + max_size);
+		last = min((uint64_t)mapping->it.last, start + max_size - 1);
 		r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
 						start, last, flags, addr,
 						fence);
@@ -818,7 +875,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 			return r;
 
 		start = last + 1;
-		addr += max_size;
+		addr += max_size * AMDGPU_GPU_PAGE_SIZE;
 	}
 
 	return 0;
@@ -914,22 +971,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 	struct amdgpu_bo_va_mapping *mapping;
 	int r;
 
-	spin_lock(&vm->freed_lock);
 	while (!list_empty(&vm->freed)) {
 		mapping = list_first_entry(&vm->freed,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
-		spin_unlock(&vm->freed_lock);
+
 		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
 					       0, NULL);
 		kfree(mapping);
 		if (r)
 			return r;
 
-		spin_lock(&vm->freed_lock);
 	}
-	spin_unlock(&vm->freed_lock);
-
 	return 0;
 
 }
@@ -956,9 +1009,8 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
 		bo_va = list_first_entry(&vm->invalidated,
 			struct amdgpu_bo_va, vm_status);
 		spin_unlock(&vm->status_lock);
-		mutex_lock(&bo_va->mutex);
+
 		r = amdgpu_vm_bo_update(adev, bo_va, NULL);
-		mutex_unlock(&bo_va->mutex);
 		if (r)
 			return r;
 
@@ -1002,7 +1054,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&bo_va->valids);
 	INIT_LIST_HEAD(&bo_va->invalids);
 	INIT_LIST_HEAD(&bo_va->vm_status);
-	mutex_init(&bo_va->mutex);
+
 	list_add_tail(&bo_va->bo_list, &bo->va);
 
 	return bo_va;
@@ -1054,9 +1106,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-	spin_lock(&vm->it_lock);
 	it = interval_tree_iter_first(&vm->va, saddr, eaddr);
-	spin_unlock(&vm->it_lock);
 	if (it) {
 		struct amdgpu_bo_va_mapping *tmp;
 		tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1080,13 +1130,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	mutex_lock(&bo_va->mutex);
 	list_add(&mapping->list, &bo_va->invalids);
-	mutex_unlock(&bo_va->mutex);
-	spin_lock(&vm->it_lock);
 	interval_tree_insert(&mapping->it, &vm->va);
-	spin_unlock(&vm->it_lock);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	/* Make sure the page tables are allocated */
 	saddr >>= amdgpu_vm_block_size;
@@ -1130,6 +1175,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 		entry->priority = 0;
 		entry->tv.bo = &entry->robj->tbo;
 		entry->tv.shared = true;
+		entry->user_pages = NULL;
 		vm->page_tables[pt_idx].addr = 0;
 	}
 
@@ -1137,9 +1183,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
 error_free:
 	list_del(&mapping->list);
-	spin_lock(&vm->it_lock);
 	interval_tree_remove(&mapping->it, &vm->va);
-	spin_unlock(&vm->it_lock);
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 	kfree(mapping);
 
@@ -1168,7 +1212,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 	bool valid = true;
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
-	mutex_lock(&bo_va->mutex);
+
 	list_for_each_entry(mapping, &bo_va->valids, list) {
 		if (mapping->it.start == saddr)
 			break;
@@ -1182,25 +1226,18 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 				break;
 		}
 
-		if (&mapping->list == &bo_va->invalids) {
-			mutex_unlock(&bo_va->mutex);
+		if (&mapping->list == &bo_va->invalids)
 			return -ENOENT;
-		}
 	}
-	mutex_unlock(&bo_va->mutex);
+
 	list_del(&mapping->list);
-	spin_lock(&vm->it_lock);
 	interval_tree_remove(&mapping->it, &vm->va);
-	spin_unlock(&vm->it_lock);
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
-	if (valid) {
-		spin_lock(&vm->freed_lock);
+	if (valid)
 		list_add(&mapping->list, &vm->freed);
-		spin_unlock(&vm->freed_lock);
-	} else {
+	else
 		kfree(mapping);
-	}
 
 	return 0;
 }
@@ -1229,23 +1266,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
-		spin_lock(&vm->it_lock);
 		interval_tree_remove(&mapping->it, &vm->va);
-		spin_unlock(&vm->it_lock);
 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
-		spin_lock(&vm->freed_lock);
 		list_add(&mapping->list, &vm->freed);
-		spin_unlock(&vm->freed_lock);
 	}
 	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
 		list_del(&mapping->list);
-		spin_lock(&vm->it_lock);
 		interval_tree_remove(&mapping->it, &vm->va);
-		spin_unlock(&vm->it_lock);
 		kfree(mapping);
 	}
+
 	fence_put(bo_va->last_pt_update);
-	mutex_destroy(&bo_va->mutex);
 	kfree(bo_va);
 }
 
@@ -1298,8 +1329,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->cleared);
 	INIT_LIST_HEAD(&vm->freed);
-	spin_lock_init(&vm->it_lock);
-	spin_lock_init(&vm->freed_lock);
+
 	pd_size = amdgpu_vm_directory_size(adev);
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
@@ -1386,6 +1416,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_vm_id *id = &vm->ids[i];
 
@@ -1410,9 +1441,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	INIT_LIST_HEAD(&adev->vm_manager.ids_lru);
 
 	/* skip over VMID 0, since it is the system VM */
-	for (i = 1; i < adev->vm_manager.num_ids; ++i)
+	for (i = 1; i < adev->vm_manager.num_ids; ++i) {
+		amdgpu_vm_reset_id(adev, i);
 		list_add_tail(&adev->vm_manager.ids[i].list,
 			      &adev->vm_manager.ids_lru);
+	}
 
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 474ca02b0949..1f9109d3348b 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -3017,7 +3017,6 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
 						      &memory_level->MinVddcPhases);
 
 	memory_level->EnabledForThrottle = 1;
-	memory_level->EnabledForActivity = 1;
 	memory_level->UpH = 0;
 	memory_level->DownH = 100;
 	memory_level->VoltageDownH = 0;
@@ -3376,7 +3375,6 @@ static int ci_populate_single_graphic_level(struct amdgpu_device *adev,
 	graphic_level->SpllSpreadSpectrum2 = cpu_to_be32(graphic_level->SpllSpreadSpectrum2);
 	graphic_level->CcPwrDynRm = cpu_to_be32(graphic_level->CcPwrDynRm);
 	graphic_level->CcPwrDynRm1 = cpu_to_be32(graphic_level->CcPwrDynRm1);
-	graphic_level->EnabledForActivity = 1;
 
 	return 0;
 }
@@ -3407,6 +3405,7 @@ static int ci_populate_all_graphic_levels(struct amdgpu_device *adev)
 			pi->smc_state_table.GraphicsLevel[i].DisplayWatermark =
 				PPSMC_DISPLAY_WATERMARK_HIGH;
 	}
+	pi->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1;
 
 	pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count;
 	pi->dpm_level_enable_mask.sclk_dpm_enable_mask =
@@ -3450,6 +3449,8 @@ static int ci_populate_all_memory_levels(struct amdgpu_device *adev)
 			return ret;
 	}
 
+	pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
+
 	if ((dpm_table->mclk_table.count >= 2) &&
 	    ((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) {
 		pi->smc_state_table.MemoryLevel[1].MinVddc =
@@ -4381,26 +4382,6 @@ static int ci_dpm_force_performance_level(struct amdgpu_device *adev,
 				}
 			}
 		}
-		if ((!pi->pcie_dpm_key_disabled) &&
-		    pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
-			levels = 0;
-			tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
-			while (tmp >>= 1)
-				levels++;
-			if (levels) {
-				ret = ci_dpm_force_state_pcie(adev, level);
-				if (ret)
-					return ret;
-				for (i = 0; i < adev->usec_timeout; i++) {
-					tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX_1) &
-					TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK) >>
-					TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT;
-					if (tmp == levels)
-						break;
-					udelay(1);
-				}
-			}
-		}
 	} else if (level == AMDGPU_DPM_FORCED_LEVEL_LOW) {
 		if ((!pi->sclk_dpm_key_disabled) &&
 		    pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
@@ -5395,30 +5376,6 @@ static int ci_dpm_enable(struct amdgpu_device *adev)
 
 	ci_update_current_ps(adev, boot_ps);
 
-	if (adev->irq.installed &&
-	    amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) {
-#if 0
-		PPSMC_Result result;
-#endif
-		ret = ci_thermal_set_temperature_range(adev, CISLANDS_TEMP_RANGE_MIN,
-						       CISLANDS_TEMP_RANGE_MAX);
-		if (ret) {
-			DRM_ERROR("ci_thermal_set_temperature_range failed\n");
-			return ret;
-		}
-		amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq,
-			       AMDGPU_THERMAL_IRQ_LOW_TO_HIGH);
-		amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq,
-			       AMDGPU_THERMAL_IRQ_HIGH_TO_LOW);
-
-#if 0
-		result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_EnableThermalInterrupt);
-
-		if (result != PPSMC_Result_OK)
-			DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
-#endif
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 192ab13e9f05..bddc9ba11495 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -2028,8 +2028,6 @@ static int cik_common_early_init(void *handle)
 
 	adev->asic_funcs = &cik_asic_funcs;
 
-	adev->has_uvd = true;
-
 	adev->rev_id = cik_get_rev_id(adev);
 	adev->external_rev_id = 0xFF;
 	switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 266db15daf2c..d3ac3298fba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -261,6 +261,13 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }
 
+static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 1);
+}
+
 /**
  * cik_sdma_ring_emit_fence - emit a fence on the DMA ring
  *
@@ -636,8 +643,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[3] = 1;
 	ib.ptr[4] = 0xDEADBEEF;
 	ib.length_dw = 5;
-	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
-			       NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err1;
 
@@ -663,7 +669,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 
 err1:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -816,6 +823,30 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
 }
 
 /**
+ * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	/* wait for idle */
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0,
+					    SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+					    SDMA_POLL_REG_MEM_EXTRA_FUNC(3) | /* equal */
+					    SDMA_POLL_REG_MEM_EXTRA_M));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq); /* reference */
+	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
+}
+
+/**
  * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA
  *
  * @ring: amdgpu_ring pointer
@@ -1270,8 +1301,10 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
 	.parse_cs = NULL,
 	.emit_ib = cik_sdma_ring_emit_ib,
 	.emit_fence = cik_sdma_ring_emit_fence,
+	.emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
 	.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
 	.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
 	.test_ring = cik_sdma_ring_test_ring,
 	.test_ib = cik_sdma_ring_test_ib,
 	.insert_nop = cik_sdma_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index e3ff809a0cae..6de2ce535e37 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1668,6 +1668,9 @@ static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
 {
 	int i;
 
+	if (!amdgpu_audio)
+		return;
+
 	if (!adev->mode_info.audio.enabled)
 		return;
 
@@ -1973,7 +1976,7 @@ static void dce_v10_0_afmt_enable(struct drm_encoder *encoder, bool enable)
 		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
 }
 
-static void dce_v10_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v10_0_afmt_init(struct amdgpu_device *adev)
 {
 	int i;
 
@@ -1986,8 +1989,16 @@ static void dce_v10_0_afmt_init(struct amdgpu_device *adev)
 		if (adev->mode_info.afmt[i]) {
 			adev->mode_info.afmt[i]->offset = dig_offsets[i];
 			adev->mode_info.afmt[i]->id = i;
+		} else {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			return -ENOMEM;
 		}
 	}
+	return 0;
 }
 
 static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
@@ -2064,8 +2075,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		target_fb = fb;
-	}
-	else {
+	} else {
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		target_fb = crtc->primary->fb;
 	}
@@ -2079,9 +2089,9 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (unlikely(r != 0))
 		return r;
 
-	if (atomic)
+	if (atomic) {
 		fb_location = amdgpu_bo_gpu_offset(rbo);
-	else {
+	} else {
 		r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
 		if (unlikely(r != 0)) {
 			amdgpu_bo_unreserve(rbo);
@@ -2700,13 +2710,13 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
 		dce_v10_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
 		if (amdgpu_crtc->enabled) {
 			dce_v10_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2980,8 +2990,6 @@ static int dce_v10_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	adev->mode_info.mode_config_initialized = true;
-
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
 	adev->ddev->mode_config.max_width = 16384;
@@ -3012,7 +3020,9 @@ static int dce_v10_0_sw_init(void *handle)
 		return -EINVAL;
 
 	/* setup afmt */
-	dce_v10_0_afmt_init(adev);
+	r = dce_v10_0_afmt_init(adev);
+	if (r)
+		return r;
 
 	r = dce_v10_0_audio_init(adev);
 	if (r)
@@ -3020,7 +3030,8 @@ static int dce_v10_0_sw_init(void *handle)
 
 	drm_kms_helper_poll_init(adev->ddev);
 
-	return r;
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
 }
 
 static int dce_v10_0_sw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 6b6c9b6879ae..e9ccc6b787f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1658,6 +1658,9 @@ static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
 {
 	int i;
 
+	if (!amdgpu_audio)
+		return;
+
 	if (!adev->mode_info.audio.enabled)
 		return;
 
@@ -1963,7 +1966,7 @@ static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
 		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
 }
 
-static void dce_v11_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
 {
 	int i;
 
@@ -1976,8 +1979,16 @@ static void dce_v11_0_afmt_init(struct amdgpu_device *adev)
 		if (adev->mode_info.afmt[i]) {
 			adev->mode_info.afmt[i]->offset = dig_offsets[i];
 			adev->mode_info.afmt[i]->id = i;
+		} else {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			return -ENOMEM;
 		}
 	}
+	return 0;
 }
 
 static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
@@ -2054,8 +2065,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		target_fb = fb;
-	}
-	else {
+	} else {
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		target_fb = crtc->primary->fb;
 	}
@@ -2069,9 +2079,9 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (unlikely(r != 0))
 		return r;
 
-	if (atomic)
+	if (atomic) {
 		fb_location = amdgpu_bo_gpu_offset(rbo);
-	else {
+	} else {
 		r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
 		if (unlikely(r != 0)) {
 			amdgpu_bo_unreserve(rbo);
@@ -2691,13 +2701,13 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
 		dce_v11_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
 		if (amdgpu_crtc->enabled) {
 			dce_v11_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2961,7 +2971,7 @@ static int dce_v11_0_sw_init(void *handle)
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq);
 		if (r)
-		return r;
+			return r;
 	}
 
 	for (i = 8; i < 20; i += 2) {
@@ -2973,9 +2983,7 @@ static int dce_v11_0_sw_init(void *handle)
 	/* HPD hotplug */
 	r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq);
 	if (r)
-	return r;
-
-	adev->mode_info.mode_config_initialized = true;
+		return r;
 
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
@@ -2994,6 +3002,7 @@ static int dce_v11_0_sw_init(void *handle)
 	adev->ddev->mode_config.max_width = 16384;
 	adev->ddev->mode_config.max_height = 16384;
 
+
 	/* allocate crtcs */
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		r = dce_v11_0_crtc_init(adev, i);
@@ -3007,7 +3016,9 @@ static int dce_v11_0_sw_init(void *handle)
 		return -EINVAL;
 
 	/* setup afmt */
-	dce_v11_0_afmt_init(adev);
+	r = dce_v11_0_afmt_init(adev);
+	if (r)
+		return r;
 
 	r = dce_v11_0_audio_init(adev);
 	if (r)
@@ -3015,7 +3026,8 @@ static int dce_v11_0_sw_init(void *handle)
 
 	drm_kms_helper_poll_init(adev->ddev);
 
-	return r;
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
 }
 
 static int dce_v11_0_sw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 56bea36a6b18..e56b55d8c280 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1639,6 +1639,9 @@ static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
 {
 	int i;
 
+	if (!amdgpu_audio)
+		return;
+
 	if (!adev->mode_info.audio.enabled)
 		return;
 
@@ -1910,7 +1913,7 @@ static void dce_v8_0_afmt_enable(struct drm_encoder *encoder, bool enable)
 		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
 }
 
-static void dce_v8_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v8_0_afmt_init(struct amdgpu_device *adev)
 {
 	int i;
 
@@ -1923,8 +1926,16 @@ static void dce_v8_0_afmt_init(struct amdgpu_device *adev)
 		if (adev->mode_info.afmt[i]) {
 			adev->mode_info.afmt[i]->offset = dig_offsets[i];
 			adev->mode_info.afmt[i]->id = i;
+		} else {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			return -ENOMEM;
 		}
 	}
+	return 0;
 }
 
 static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
@@ -2001,8 +2012,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		target_fb = fb;
-	}
-	else {
+	} else {
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		target_fb = crtc->primary->fb;
 	}
@@ -2016,9 +2026,9 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (unlikely(r != 0))
 		return r;
 
-	if (atomic)
+	if (atomic) {
 		fb_location = amdgpu_bo_gpu_offset(rbo);
-	else {
+	} else {
 		r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
 		if (unlikely(r != 0)) {
 			amdgpu_bo_unreserve(rbo);
@@ -2612,13 +2622,13 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
 		dce_v8_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
 		if (amdgpu_crtc->enabled) {
 			dce_v8_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2890,8 +2900,6 @@ static int dce_v8_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	adev->mode_info.mode_config_initialized = true;
-
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
 	adev->ddev->mode_config.max_width = 16384;
@@ -2922,7 +2930,9 @@ static int dce_v8_0_sw_init(void *handle)
 		return -EINVAL;
 
 	/* setup afmt */
-	dce_v8_0_afmt_init(adev);
+	r = dce_v8_0_afmt_init(adev);
+	if (r)
+		return r;
 
 	r = dce_v8_0_audio_init(adev);
 	if (r)
@@ -2930,7 +2940,8 @@ static int dce_v8_0_sw_init(void *handle)
 
 	drm_kms_helper_poll_init(adev->ddev);
 
-	return r;
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
 }
 
 static int dce_v8_0_sw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 4411b94775db..bb8709066fd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1925,6 +1925,25 @@ static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 }
 
 /**
+ * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
+ *
+ * @adev: amdgpu_device pointer
+ * @ridx: amdgpu ring index
+ *
+ * Emits an hdp invalidate on the cp.
+ */
+static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+				 WRITE_DATA_DST_SEL(0) |
+				 WR_CONFIRM));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
  *
  * @adev: amdgpu_device pointer
@@ -2117,8 +2136,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
-			       NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err2;
 
@@ -2145,7 +2163,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 
 err2:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
@@ -3023,6 +3042,26 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
+ *
+ * @ring: the ring to emmit the commands to
+ *
+ * Sync the command pipeline with the PFP. E.g. wait for everything
+ * to be completed.
+ */
+static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+	if (usepfp) {
+		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+		amdgpu_ring_write(ring, 0);
+	}
+}
+
 /*
  * vm
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -3054,14 +3093,6 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, 0xffffffff);
 	amdgpu_ring_write(ring, 4); /* poll interval */
 
-	if (usepfp) {
-		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-	}
-
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
@@ -5142,9 +5173,11 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.parse_cs = NULL,
 	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
+	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
@@ -5158,9 +5191,11 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 	.parse_cs = NULL,
 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
+	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1b85c001f860..f0c7b3596480 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -706,8 +706,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
-			       NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err2;
 
@@ -733,7 +732,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	}
 err2:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
@@ -1262,8 +1262,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
 
 	/* shedule the ib on the ring */
-	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
-			       NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r) {
 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
 		goto fail;
@@ -1291,7 +1290,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 fail:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 
 	return r;
 }
@@ -4589,6 +4589,18 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0x20); /* poll interval */
 }
 
+static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+				 WRITE_DATA_DST_SEL(0) |
+				 WR_CONFIRM));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 1);
+
+}
+
 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib)
 {
@@ -4682,8 +4694,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
 
 }
 
-static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 {
 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
 	uint32_t seq = ring->fence_drv.sync_seq;
@@ -4706,6 +4717,12 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
 		amdgpu_ring_write(ring, 0);
 	}
+}
+
+static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+					unsigned vm_id, uint64_t pd_addr)
+{
+	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -5028,9 +5045,11 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.parse_cs = NULL,
 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
+	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
@@ -5044,9 +5063,11 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.parse_cs = NULL,
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
+	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 711840a23bd3..05b0353d3880 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -339,7 +339,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 
 	tmp = RREG32(mmHDP_MISC_CNTL);
-	tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+	tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
 	WREG32(mmHDP_MISC_CNTL, tmp);
 
 	tmp = RREG32(mmHDP_HOST_PATH_CNTL);
@@ -903,14 +903,6 @@ static int gmc_v7_0_early_init(void *handle)
 	gmc_v7_0_set_gart_funcs(adev);
 	gmc_v7_0_set_irq_funcs(adev);
 
-	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-	} else {
-		u32 tmp = RREG32(mmMC_SEQ_MISC0);
-		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
-	}
-
 	return 0;
 }
 
@@ -927,6 +919,14 @@ static int gmc_v7_0_sw_init(void *handle)
 	int dma_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->flags & AMD_IS_APU) {
+		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+	} else {
+		u32 tmp = RREG32(mmMC_SEQ_MISC0);
+		tmp &= MC_SEQ_MISC0__MT__MASK;
+		adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
+	}
+
 	r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 757803ae7c4a..02deb3229405 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -386,7 +386,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 
 	tmp = RREG32(mmHDP_MISC_CNTL);
-	tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+	tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
 	WREG32(mmHDP_MISC_CNTL, tmp);
 
 	tmp = RREG32(mmHDP_HOST_PATH_CNTL);
@@ -863,14 +863,6 @@ static int gmc_v8_0_early_init(void *handle)
 	gmc_v8_0_set_gart_funcs(adev);
 	gmc_v8_0_set_irq_funcs(adev);
 
-	if (adev->flags & AMD_IS_APU) {
-		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
-	} else {
-		u32 tmp = RREG32(mmMC_SEQ_MISC0);
-		tmp &= MC_SEQ_MISC0__MT__MASK;
-		adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
-	}
-
 	return 0;
 }
 
@@ -881,12 +873,27 @@ static int gmc_v8_0_late_init(void *handle)
 	return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
 }
 
+#define mmMC_SEQ_MISC0_FIJI 0xA71
+
 static int gmc_v8_0_sw_init(void *handle)
 {
 	int r;
 	int dma_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->flags & AMD_IS_APU) {
+		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+	} else {
+		u32 tmp;
+
+		if (adev->asic_type == CHIP_FIJI)
+			tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
+		else
+			tmp = RREG32(mmMC_SEQ_MISC0);
+		tmp &= MC_SEQ_MISC0__MT__MASK;
+		adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
+	}
+
 	r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index dddb8d6a81f3..6e0a86a563f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -32,8 +32,8 @@
 #include "oss/oss_2_4_d.h"
 #include "oss/oss_2_4_sh_mask.h"
 
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
 
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
@@ -300,6 +300,13 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
 }
 
+static void sdma_v2_4_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 1);
+}
 /**
  * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
  *
@@ -694,8 +701,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
 	ib.length_dw = 8;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
-			       NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err1;
 
@@ -721,7 +727,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 
 err1:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -874,6 +881,31 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
 }
 
 /**
+ * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	/* wait for idle */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq); /* reference */
+	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
  * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
  *
  * @ring: amdgpu_ring pointer
@@ -1274,8 +1306,10 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
 	.parse_cs = NULL,
 	.emit_ib = sdma_v2_4_ring_emit_ib,
 	.emit_fence = sdma_v2_4_ring_emit_fence,
+	.emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
 	.emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
 	.emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = sdma_v2_4_ring_emit_hdp_invalidate,
 	.test_ring = sdma_v2_4_ring_test_ring,
 	.test_ib = sdma_v2_4_ring_test_ib,
 	.insert_nop = sdma_v2_4_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 19e02f7a06f3..8c8ca98dd129 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -410,6 +410,14 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
 }
 
+static void sdma_v3_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 1);
+}
+
 /**
  * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring
  *
@@ -845,8 +853,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
 	ib.length_dw = 8;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
-			       NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err1;
 
@@ -871,7 +878,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	}
 err1:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -1024,6 +1032,31 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
 }
 
 /**
+ * sdma_v3_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	/* wait for idle */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq); /* reference */
+	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
  * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA
  *
  * @ring: amdgpu_ring pointer
@@ -1541,8 +1574,10 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
 	.parse_cs = NULL,
 	.emit_ib = sdma_v3_0_ring_emit_ib,
 	.emit_fence = sdma_v3_0_ring_emit_fence,
+	.emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
 	.emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate,
 	.test_ring = sdma_v3_0_ring_test_ring,
 	.test_ib = sdma_v3_0_ring_test_ib,
 	.insert_nop = sdma_v3_0_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c606ccb38d8b..cb463753115b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -224,11 +224,11 @@ static int uvd_v4_2_suspend(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = amdgpu_uvd_suspend(adev);
+	r = uvd_v4_2_hw_fini(adev);
 	if (r)
 		return r;
 
-	r = uvd_v4_2_hw_fini(adev);
+	r = amdgpu_uvd_suspend(adev);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index e3c852d9d79a..16476d80f475 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -220,11 +220,11 @@ static int uvd_v5_0_suspend(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = amdgpu_uvd_suspend(adev);
+	r = uvd_v5_0_hw_fini(adev);
 	if (r)
 		return r;
 
-	r = uvd_v5_0_hw_fini(adev);
+	r = amdgpu_uvd_suspend(adev);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 3375e614ac67..d49379145ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -214,15 +214,16 @@ static int uvd_v6_0_suspend(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	r = uvd_v6_0_hw_fini(adev);
+	if (r)
+		return r;
+
 	/* Skip this for APU for now */
 	if (!(adev->flags & AMD_IS_APU)) {
 		r = amdgpu_uvd_suspend(adev);
 		if (r)
 			return r;
 	}
-	r = uvd_v6_0_hw_fini(adev);
-	if (r)
-		return r;
 
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index b72cf063df1a..1c120efa292c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1071,26 +1071,22 @@ static int vi_common_early_init(void *handle)
 	adev->external_rev_id = 0xFF;
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-		adev->has_uvd = false;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = 0x1;
 		break;
 	case CHIP_FIJI:
-		adev->has_uvd = true;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x3c;
 		break;
 	case CHIP_TONGA:
-		adev->has_uvd = true;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x14;
 		break;
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
-		adev->has_uvd = true;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x1;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index d2b49c026cf6..07ac724e3ec9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -107,7 +107,7 @@ static int kfd_open(struct inode *inode, struct file *filep)
 	if (iminor(inode) != 0)
 		return -ENODEV;
 
-	is_32bit_user_mode = is_compat_task();
+	is_32bit_user_mode = in_compat_syscall();
 
 	if (is_32bit_user_mode == true) {
 		dev_warn(kfd_device,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a902ae037398..ac005796b71c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -311,7 +311,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 		goto err_process_pqm_init;
 
 	/* init process apertures*/
-	process->is_32bit_user_mode = is_compat_task();
+	process->is_32bit_user_mode = in_compat_syscall();
 	if (kfd_init_apertures(process) != 0)
 		goto err_init_apretures;
 
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
index 9d4347dd6125..dfe78799100d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
@@ -6225,6 +6225,12 @@ typedef enum TCC_CACHE_POLICIES {
 	TCC_CACHE_POLICY_STREAM                          = 0x1,
 	TCC_CACHE_POLICY_BYPASS                          = 0x2,
 } TCC_CACHE_POLICIES;
+typedef enum MTYPE {
+	MTYPE_NC_NV                                      = 0x0,
+	MTYPE_NC                                         = 0x1,
+	MTYPE_CC                                         = 0x2,
+	MTYPE_UC                                         = 0x3,
+} MTYPE;
 typedef enum PERFMON_COUNTER_MODE {
 	PERFMON_COUNTER_MODE_ACCUM                       = 0x0,
 	PERFMON_COUNTER_MODE_ACTIVE_CYCLES               = 0x1,
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index aec38fc3834f..ab84d4947247 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -589,6 +589,8 @@ typedef int(*cgs_get_active_displays_info)(
 					void *cgs_device,
 					struct cgs_display_info *info);
 
+typedef int (*cgs_notify_dpm_enabled)(void *cgs_device, bool enabled);
+
 typedef int (*cgs_call_acpi_method)(void *cgs_device,
 					uint32_t acpi_method,
 					uint32_t acpi_function,
@@ -644,6 +646,8 @@ struct cgs_ops {
 	cgs_set_clockgating_state set_clockgating_state;
 	/* display manager */
 	cgs_get_active_displays_info get_active_displays_info;
+	/* notify dpm enabled */
+	cgs_notify_dpm_enabled notify_dpm_enabled;
 	/* ACPI */
 	cgs_call_acpi_method call_acpi_method;
 	/* get system info */
@@ -734,8 +738,12 @@ struct cgs_device
 	CGS_CALL(set_powergating_state, dev, block_type, state)
 #define cgs_set_clockgating_state(dev, block_type, state)	\
 	CGS_CALL(set_clockgating_state, dev, block_type, state)
+#define cgs_notify_dpm_enabled(dev, enabled)	\
+	CGS_CALL(notify_dpm_enabled, dev, enabled)
+
 #define cgs_get_active_displays_info(dev, info)	\
 	CGS_CALL(get_active_displays_info, dev, info)
+
 #define cgs_call_acpi_method(dev, acpi_method, acpi_function, pintput, poutput, output_count, input_size, output_size)	\
 	CGS_CALL(call_acpi_method, dev, acpi_method, acpi_function, pintput, poutput, output_count, input_size, output_size)
 #define cgs_query_system_info(dev, sys_info)	\
diff --git a/drivers/gpu/drm/amd/powerplay/Makefile b/drivers/gpu/drm/amd/powerplay/Makefile
index e195bf59da86..043e6ebab575 100644
--- a/drivers/gpu/drm/amd/powerplay/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/Makefile
@@ -1,17 +1,17 @@
 
 subdir-ccflags-y += -Iinclude/drm  \
-		-Idrivers/gpu/drm/amd/powerplay/inc/  \
-		-Idrivers/gpu/drm/amd/include/asic_reg  \
-		-Idrivers/gpu/drm/amd/include  \
-		-Idrivers/gpu/drm/amd/powerplay/smumgr\
-		-Idrivers/gpu/drm/amd/powerplay/hwmgr \
-		-Idrivers/gpu/drm/amd/powerplay/eventmgr
+		-I$(FULL_AMD_PATH)/powerplay/inc/  \
+		-I$(FULL_AMD_PATH)/include/asic_reg  \
+		-I$(FULL_AMD_PATH)/include  \
+		-I$(FULL_AMD_PATH)/powerplay/smumgr\
+		-I$(FULL_AMD_PATH)/powerplay/hwmgr \
+		-I$(FULL_AMD_PATH)/powerplay/eventmgr
 
 AMD_PP_PATH = ../powerplay
 
 PP_LIBS = smumgr hwmgr eventmgr
 
-AMD_POWERPLAY = $(addsuffix /Makefile,$(addprefix drivers/gpu/drm/amd/powerplay/,$(PP_LIBS)))
+AMD_POWERPLAY = $(addsuffix /Makefile,$(addprefix $(FULL_AMD_PATH)/powerplay/,$(PP_LIBS)))
 
 include $(AMD_POWERPLAY)
 
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
index 6b52c78cb404..56856a2864d1 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
@@ -137,14 +137,14 @@ static const pem_event_action *resume_event[] = {
 	reset_display_configCounter_tasks,
 	update_dal_configuration_tasks,
 	vari_bright_resume_tasks,
-	block_adjust_power_state_tasks,
 	setup_asic_tasks,
 	enable_stutter_mode_tasks, /*must do this in boot state and before SMC is started */
 	enable_dynamic_state_management_tasks,
 	enable_clock_power_gatings_tasks,
 	enable_disable_bapm_tasks,
 	initialize_thermal_controller_tasks,
-	reset_boot_state_tasks,
+	get_2d_performance_state_tasks,
+	set_performance_state_tasks,
 	adjust_power_state_tasks,
 	enable_disable_fps_tasks,
 	notify_hw_power_source_tasks,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index b8d6a82c1be2..5682490337e3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -241,6 +241,11 @@ static int cz_initialize_dpm_defaults(struct pp_hwmgr *hwmgr)
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_DynamicUVDState);
 
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_UVDDPM);
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_VCEDPM);
+
 	cz_hwmgr->cc6_settings.cpu_cc6_disable = false;
 	cz_hwmgr->cc6_settings.cpu_pstate_disable = false;
 	cz_hwmgr->cc6_settings.nb_pstate_switch_disable = false;
@@ -744,8 +749,9 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr,
 		cz_hwmgr->sclk_dpm.soft_max_clk  = table->entries[table->count - 1].clk;
 
 	clock = hwmgr->display_config.min_core_set_clock;
+;
 	if (clock == 0)
-		printk(KERN_ERR "[ powerplay ] min_core_set_clock not set\n");
+		printk(KERN_INFO "[ powerplay ] min_core_set_clock not set\n");
 
 	if (cz_hwmgr->sclk_dpm.hard_min_clk != clock) {
 		cz_hwmgr->sclk_dpm.hard_min_clk = clock;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 5cca2ecc6bea..89f31bc5b68b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -2389,6 +2389,7 @@ static int fiji_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 
 	for(count = 0; count < table->VceLevelCount; count++) {
 		table->VceLevel[count].Frequency = mm_table->entries[count].eclk;
+		table->VceLevel[count].MinVoltage = 0;
 		table->VceLevel[count].MinVoltage |=
 				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
 		table->VceLevel[count].MinVoltage |=
@@ -2465,6 +2466,7 @@ static int fiji_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 
 	for (count = 0; count < table->SamuLevelCount; count++) {
 		/* not sure whether we need evclk or not */
+		table->SamuLevel[count].MinVoltage = 0;
 		table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
 		table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
@@ -2562,6 +2564,7 @@ static int fiji_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 	table->UvdBootLevel = 0;
 
 	for (count = 0; count < table->UvdLevelCount; count++) {
+		table->UvdLevel[count].MinVoltage = 0;
 		table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk;
 		table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
 		table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
@@ -2900,6 +2903,8 @@ static int fiji_init_smc_table(struct pp_hwmgr *hwmgr)
 	if(FIJI_VOLTAGE_CONTROL_NONE != data->voltage_control)
 		fiji_populate_smc_voltage_tables(hwmgr, table);
 
+	table->SystemFlags = 0;
+
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_AutomaticDCTransition))
 		table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC;
@@ -2997,6 +3002,7 @@ static int fiji_init_smc_table(struct pp_hwmgr *hwmgr)
 	table->MemoryThermThrottleEnable = 1;
 	table->PCIeBootLinkLevel = 0;      /* 0:Gen1 1:Gen2 2:Gen3*/
 	table->PCIeGenInterval = 1;
+	table->VRConfig = 0;
 
 	result = fiji_populate_vr_config(hwmgr, table);
 	PP_ASSERT_WITH_CODE(0 == result,
@@ -4275,7 +4281,6 @@ static int fiji_populate_and_upload_sclk_mclk_dpm_levels(
 	if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) {
 		dpm_table->mclk_table.dpm_levels
 			[dpm_table->mclk_table.count - 1].value = mclk;
-
 		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_OD6PlusinACSupport) ||
 			phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -4886,6 +4891,10 @@ static void fiji_print_current_perforce_level(
 	activity_percent >>= 8;
 
 	seq_printf(m, "\n [GPU load]: %u%%\n\n", activity_percent > 100 ? 100 : activity_percent);
+
+	seq_printf(m, "uvd    %sabled\n", data->uvd_power_gated ? "dis" : "en");
+
+	seq_printf(m, "vce    %sabled\n", data->vce_power_gated ? "dis" : "en");
 }
 
 static int fiji_program_display_gap(struct pp_hwmgr *hwmgr)
@@ -5192,6 +5201,67 @@ static int fiji_print_clock_levels(struct pp_hwmgr *hwmgr,
 	return size;
 }
 
+static inline bool fiji_are_power_levels_equal(const struct fiji_performance_level *pl1,
+							   const struct fiji_performance_level *pl2)
+{
+	return ((pl1->memory_clock == pl2->memory_clock) &&
+		  (pl1->engine_clock == pl2->engine_clock) &&
+		  (pl1->pcie_gen == pl2->pcie_gen) &&
+		  (pl1->pcie_lane == pl2->pcie_lane));
+}
+
+int fiji_check_states_equal(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *pstate1, const struct pp_hw_power_state *pstate2, bool *equal)
+{
+	const struct fiji_power_state *psa = cast_const_phw_fiji_power_state(pstate1);
+	const struct fiji_power_state *psb = cast_const_phw_fiji_power_state(pstate2);
+	int i;
+
+	if (equal == NULL || psa == NULL || psb == NULL)
+		return -EINVAL;
+
+	/* If the two states don't even have the same number of performance levels they cannot be the same state. */
+	if (psa->performance_level_count != psb->performance_level_count) {
+		*equal = false;
+		return 0;
+	}
+
+	for (i = 0; i < psa->performance_level_count; i++) {
+		if (!fiji_are_power_levels_equal(&(psa->performance_levels[i]), &(psb->performance_levels[i]))) {
+			/* If we have found even one performance level pair that is different the states are different. */
+			*equal = false;
+			return 0;
+		}
+	}
+
+	/* If all performance levels are the same try to use the UVD clocks to break the tie.*/
+	*equal = ((psa->uvd_clks.vclk == psb->uvd_clks.vclk) && (psa->uvd_clks.dclk == psb->uvd_clks.dclk));
+	*equal &= ((psa->vce_clks.evclk == psb->vce_clks.evclk) && (psa->vce_clks.ecclk == psb->vce_clks.ecclk));
+	*equal &= (psa->sclk_threshold == psb->sclk_threshold);
+	*equal &= (psa->acp_clk == psb->acp_clk);
+
+	return 0;
+}
+
+bool fiji_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	bool is_update_required = false;
+	struct cgs_display_info info = {0,0,NULL};
+
+	cgs_get_active_displays_info(hwmgr->device, &info);
+
+	if (data->display_timing.num_existing_displays != info.display_count)
+		is_update_required = true;
+/* TO DO NEED TO GET DEEP SLEEP CLOCK FROM DAL
+	if (phm_cap_enabled(hwmgr->hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) {
+		cgs_get_min_clock_settings(hwmgr->device, &min_clocks);
+		if(min_clocks.engineClockInSR != data->display_timing.minClockInSR)
+			is_update_required = true;
+*/
+	return is_update_required;
+}
+
+
 static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.backend_init = &fiji_hwmgr_backend_init,
 	.backend_fini = &tonga_hwmgr_backend_fini,
@@ -5227,6 +5297,8 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt,
 	.set_fan_control_mode = fiji_set_fan_control_mode,
 	.get_fan_control_mode = fiji_get_fan_control_mode,
+	.check_states_equal = fiji_check_states_equal,
+	.check_smc_update_required_for_display_configuration = fiji_check_smc_update_required_for_display_configuration,
 	.get_pp_table = fiji_get_pp_table,
 	.set_pp_table = fiji_set_pp_table,
 	.force_clock_level = fiji_force_clock_level,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
index 22e273b1c1c5..a16f7cd4c238 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
@@ -29,6 +29,7 @@
 #include "smu73_discrete.h"
 #include "ppatomctrl.h"
 #include "fiji_ppsmc.h"
+#include "pp_endian.h"
 
 #define FIJI_MAX_HARDWARE_POWERLEVELS	2
 #define FIJI_AT_DFLT	30
@@ -347,15 +348,4 @@ int fiji_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 int fiji_update_acp_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 int fiji_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable);
 
-#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
-#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
-
-#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
-#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
-
-#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
-#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
-
-#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
-
 #endif /* _FIJI_HWMGR_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
index 9deadabbc81c..72cfecc4f9f7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
@@ -34,6 +34,11 @@ static int phm_run_table(struct pp_hwmgr *hwmgr,
 	int result = 0;
 	phm_table_function *function;
 
+	if (rt_table->function_list == NULL) {
+		printk(KERN_INFO "[ powerplay ] this function not implement!\n");
+		return 0;
+	}
+
 	for (function = rt_table->function_list; NULL != *function; function++) {
 		int tmp = (*function)(hwmgr, input, output, temp_storage, result);
 
@@ -57,9 +62,9 @@ int phm_dispatch_table(struct pp_hwmgr *hwmgr,
 	int result = 0;
 	void *temp_storage = NULL;
 
-	if (hwmgr == NULL || rt_table == NULL || rt_table->function_list == NULL) {
+	if (hwmgr == NULL || rt_table == NULL) {
 		printk(KERN_ERR "[ powerplay ] Invalid Parameter!\n");
-		return 0; /*temp return ture because some function not implement on some asic */
+		return -EINVAL;
 	}
 
 	if (0 != rt_table->storage_size) {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index be31bed2538a..fa208ada6892 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -58,6 +58,9 @@ void phm_init_dynamic_caps(struct pp_hwmgr *hwmgr)
 
 	phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_VpuRecoveryInProgress);
 
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_UVDDPM);
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_VCEDPM);
+
 	if (acpi_atcs_functions_supported(hwmgr->device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST) &&
 		acpi_atcs_functions_supported(hwmgr->device, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION))
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PCIEPerformanceRequest);
@@ -130,18 +133,25 @@ int phm_set_power_state(struct pp_hwmgr *hwmgr,
 
 int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 {
+	int ret = 1;
+	bool enabled;
 	PHM_FUNC_CHECK(hwmgr);
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 		PHM_PlatformCaps_TablelessHardwareInterface)) {
 		if (NULL != hwmgr->hwmgr_func->dynamic_state_management_enable)
-			return hwmgr->hwmgr_func->dynamic_state_management_enable(hwmgr);
+			ret = hwmgr->hwmgr_func->dynamic_state_management_enable(hwmgr);
 	} else {
-		return phm_dispatch_table(hwmgr,
+		ret = phm_dispatch_table(hwmgr,
 				&(hwmgr->enable_dynamic_state_management),
 				NULL, NULL);
 	}
-	return 0;
+
+	enabled = ret == 0 ? true : false;
+
+	cgs_notify_dpm_enabled(hwmgr->device, enabled);
+
+	return ret;
 }
 
 int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index aec4f8346d9c..0d5d8372953e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -5185,7 +5185,6 @@ tonga_print_current_perforce_level(struct pp_hwmgr *hwmgr, struct seq_file *m)
 	mclk = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
 	seq_printf(m, "\n [  mclk  ]: %u MHz\n\n [  sclk  ]: %u MHz\n", mclk/100, sclk/100);
 
-
 	offset = data->soft_regs_start + offsetof(SMU72_SoftRegisters, AverageGraphicsActivity);
 	activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset);
 	activity_percent += 0x80;
@@ -5193,6 +5192,9 @@ tonga_print_current_perforce_level(struct pp_hwmgr *hwmgr, struct seq_file *m)
 
 	seq_printf(m, "\n [GPU load]: %u%%\n\n", activity_percent > 100 ? 100 : activity_percent);
 
+	seq_printf(m, "uvd    %sabled\n", data->uvd_power_gated ? "dis" : "en");
+
+	seq_printf(m, "vce    %sabled\n", data->vce_power_gated ? "dis" : "en");
 }
 
 static int tonga_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
index 49168d262ccc..f88d3bbe6671 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
@@ -28,6 +28,7 @@
 #include "ppatomctrl.h"
 #include "ppinterrupt.h"
 #include "tonga_powertune.h"
+#include "pp_endian.h"
 
 #define TONGA_MAX_HARDWARE_POWERLEVELS 2
 #define TONGA_DYNCLK_NUMBER_OF_TREND_COEFFICIENTS 15
@@ -386,17 +387,6 @@ typedef struct tonga_hwmgr tonga_hwmgr;
 
 #define TONGA_UNUSED_GPIO_PIN                        0x7F
 
-#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
-#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
-
-#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
-#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
-
-#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
-#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
-
-#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
-
 int tonga_hwmgr_init(struct pp_hwmgr *hwmgr);
 int tonga_update_vce_dpm(struct pp_hwmgr *hwmgr, const void *input);
 int tonga_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
index 34f4bef3691f..b156481b50e8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
@@ -512,8 +512,10 @@ static int get_cac_tdp_table(
 
 	hwmgr->dyn_state.cac_dtp_table = kzalloc(table_size, GFP_KERNEL);
 
-	if (NULL == hwmgr->dyn_state.cac_dtp_table)
+	if (NULL == hwmgr->dyn_state.cac_dtp_table) {
+		kfree(tdp_table);
 		return -ENOMEM;
+	}
 
 	memset(hwmgr->dyn_state.cac_dtp_table, 0x00, table_size);
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h b/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h
new file mode 100644
index 000000000000..f49d1963fe85
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _PP_ENDIAN_H_
+#define _PP_ENDIAN_H_
+
+#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
+#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
+
+#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
+#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
+
+#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
+#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
+
+#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
+
+#endif /* _PP_ENDIAN_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index 504f035d1843..fc9e3d1dd409 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -32,6 +32,27 @@ struct pp_instance;
 #define smu_lower_32_bits(n) ((uint32_t)(n))
 #define smu_upper_32_bits(n) ((uint32_t)(((n)>>16)>>16))
 
+enum AVFS_BTC_STATUS {
+	AVFS_BTC_BOOT = 0,
+	AVFS_BTC_BOOT_STARTEDSMU,
+	AVFS_LOAD_VIRUS,
+	AVFS_BTC_VIRUS_LOADED,
+	AVFS_BTC_VIRUS_FAIL,
+	AVFS_BTC_COMPLETED_PREVIOUSLY,
+	AVFS_BTC_ENABLEAVFS,
+	AVFS_BTC_STARTED,
+	AVFS_BTC_FAILED,
+	AVFS_BTC_RESTOREVFT_FAILED,
+	AVFS_BTC_SAVEVFT_FAILED,
+	AVFS_BTC_DPMTABLESETUP_FAILED,
+	AVFS_BTC_COMPLETED_UNSAVED,
+	AVFS_BTC_COMPLETED_SAVED,
+	AVFS_BTC_COMPLETED_RESTORED,
+	AVFS_BTC_DISABLED,
+	AVFS_BTC_NOTSUPPORTED,
+	AVFS_BTC_SMUMSG_ERROR
+};
+
 struct pp_smumgr_func {
 	int (*smu_init)(struct pp_smumgr *smumgr);
 	int (*smu_fini)(struct pp_smumgr *smumgr);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
index 8cd22d9c9140..b4eb483215b1 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
@@ -23,24 +23,6 @@
 #ifndef _FIJI_SMUMANAGER_H_
 #define _FIJI_SMUMANAGER_H_
 
-enum AVFS_BTC_STATUS {
-	AVFS_BTC_BOOT = 0,
-	AVFS_BTC_BOOT_STARTEDSMU,
-	AVFS_LOAD_VIRUS,
-	AVFS_BTC_VIRUS_LOADED,
-	AVFS_BTC_VIRUS_FAIL,
-	AVFS_BTC_STARTED,
-	AVFS_BTC_FAILED,
-	AVFS_BTC_RESTOREVFT_FAILED,
-	AVFS_BTC_SAVEVFT_FAILED,
-	AVFS_BTC_DPMTABLESETUP_FAILED,
-	AVFS_BTC_COMPLETED_UNSAVED,
-	AVFS_BTC_COMPLETED_SAVED,
-	AVFS_BTC_COMPLETED_RESTORED,
-	AVFS_BTC_DISABLED,
-	AVFS_BTC_NOTSUPPORTED,
-	AVFS_BTC_SMUMSG_ERROR
-};
 
 struct fiji_smu_avfs {
 	enum AVFS_BTC_STATUS AvfsBtcStatus;
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 87c78eecea64..dc115aea352b 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -84,12 +84,33 @@ static bool amd_sched_fence_enable_signaling(struct fence *f)
 	return true;
 }
 
-static void amd_sched_fence_release(struct fence *f)
+/**
+ * amd_sched_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
+ *
+ * Free up the fence memory after the RCU grace period.
+ */
+static void amd_sched_fence_free(struct rcu_head *rcu)
 {
+	struct fence *f = container_of(rcu, struct fence, rcu);
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
 	kmem_cache_free(sched_fence_slab, fence);
 }
 
+/**
+ * amd_sched_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
+ */
+static void amd_sched_fence_release(struct fence *f)
+{
+	call_rcu(&f->rcu, amd_sched_fence_free);
+}
+
 const struct fence_ops amd_sched_fence_ops = {
 	.get_driver_name = amd_sched_fence_get_driver_name,
 	.get_timeline_name = amd_sched_fence_get_timeline_name,
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 56b829f97699..3ac1ae4d8caf 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -57,14 +57,13 @@ static int hdlcd_load(struct drm_device *drm, unsigned long flags)
 		DRM_ERROR("failed to map control registers area\n");
 		ret = PTR_ERR(hdlcd->mmio);
 		hdlcd->mmio = NULL;
-		goto fail;
+		return ret;
 	}
 
 	version = hdlcd_read(hdlcd, HDLCD_REG_VERSION);
 	if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) {
 		DRM_ERROR("unknown product id: 0x%x\n", version);
-		ret = -EINVAL;
-		goto fail;
+		return -EINVAL;
 	}
 	DRM_INFO("found ARM HDLCD version r%dp%d\n",
 		(version & HDLCD_VERSION_MAJOR_MASK) >> 8,
@@ -73,7 +72,7 @@ static int hdlcd_load(struct drm_device *drm, unsigned long flags)
 	/* Get the optional framebuffer memory resource */
 	ret = of_reserved_mem_device_init(drm->dev);
 	if (ret && ret != -ENODEV)
-		goto fail;
+		return ret;
 
 	ret = dma_set_mask_and_coherent(drm->dev, DMA_BIT_MASK(32));
 	if (ret)
@@ -101,8 +100,6 @@ irq_fail:
 	drm_crtc_cleanup(&hdlcd->crtc);
 setup_fail:
 	of_reserved_mem_device_release(drm->dev);
-fail:
-	devm_clk_put(drm->dev, hdlcd->clk);
 
 	return ret;
 }
@@ -412,7 +409,6 @@ err_unload:
 	pm_runtime_put_sync(drm->dev);
 	pm_runtime_disable(drm->dev);
 	of_reserved_mem_device_release(drm->dev);
-	devm_clk_put(dev, hdlcd->clk);
 err_free:
 	drm_dev_unref(drm);
 
@@ -436,10 +432,6 @@ static void hdlcd_drm_unbind(struct device *dev)
 	pm_runtime_put_sync(drm->dev);
 	pm_runtime_disable(drm->dev);
 	of_reserved_mem_device_release(drm->dev);
-	if (!IS_ERR(hdlcd->clk)) {
-		devm_clk_put(drm->dev, hdlcd->clk);
-		hdlcd->clk = NULL;
-	}
 	drm_mode_config_cleanup(drm);
 	drm_dev_unregister(drm);
 	drm_dev_unref(drm);
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 6e731db31aa4..aca7f9cc6109 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -481,7 +481,7 @@ armada_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
 
  release:
 	for_each_sg(sgt->sgl, sg, num, i)
-		page_cache_release(sg_page(sg));
+		put_page(sg_page(sg));
  free_table:
 	sg_free_table(sgt);
  free_sgt:
@@ -502,7 +502,7 @@ static void armada_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach,
 	if (dobj->obj.filp) {
 		struct scatterlist *sg;
 		for_each_sg(sgt->sgl, sg, sgt->nents, i)
-			page_cache_release(sg_page(sg));
+			put_page(sg_page(sg));
 	}
 
 	sg_free_table(sgt);
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 1ffe9c329c46..d65dcaee3832 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -558,7 +558,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 	if (!state->base.crtc || !fb)
 		return 0;
 
-	crtc_state = s->state->crtc_states[drm_crtc_index(s->crtc)];
+	crtc_state = drm_atomic_get_existing_crtc_state(s->state, s->crtc);
 	mode = &crtc_state->adjusted_mode;
 
 	state->src_x = s->src_x;
diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c
index 7f1a3604b19f..b332b4d3b0e2 100644
--- a/drivers/gpu/drm/bochs/bochs_drv.c
+++ b/drivers/gpu/drm/bochs/bochs_drv.c
@@ -182,8 +182,8 @@ static const struct pci_device_id bochs_pci_tbl[] = {
 	{
 		.vendor      = 0x1234,
 		.device      = 0x1111,
-		.subvendor   = 0x1af4,
-		.subdevice   = 0x1100,
+		.subvendor   = PCI_SUBVENDOR_ID_REDHAT_QUMRANET,
+		.subdevice   = PCI_SUBDEVICE_ID_QEMU,
 		.driver_data = BOCHS_QEMU_STDVGA,
 	},
 	{
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c
index b1619e29a564..7bc394ec9fb3 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
@@ -33,8 +33,9 @@ static struct drm_driver driver;
 
 /* only bind to the cirrus chip in qemu */
 static const struct pci_device_id pciidlist[] = {
-	{ PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0,
-	  0, 0 },
+	{ PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446,
+	  PCI_SUBVENDOR_ID_REDHAT_QUMRANET, PCI_SUBDEVICE_ID_QEMU,
+	  0, 0, 0 },
 	{ PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, PCI_VENDOR_ID_XEN,
 	  0x0001, 0, 0, 0 },
 	{0,}
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index a2596eb803fc..8ee1db866e80 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -380,7 +380,6 @@ EXPORT_SYMBOL(drm_atomic_set_mode_prop_for_crtc);
  * drm_atomic_replace_property_blob - replace a blob property
  * @blob: a pointer to the member blob to be replaced
  * @new_blob: the new blob to replace with
- * @expected_size: the expected size of the new blob
  * @replaced: whether the blob has been replaced
  *
  * RETURNS:
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 2bb90faa0ee2..4befe25c81c7 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -67,7 +67,8 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
 	struct drm_crtc_state *crtc_state;
 
 	if (plane->state->crtc) {
-		crtc_state = state->crtc_states[drm_crtc_index(plane->state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								plane->state->crtc);
 
 		if (WARN_ON(!crtc_state))
 			return;
@@ -76,8 +77,8 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
 	}
 
 	if (plane_state->crtc) {
-		crtc_state =
-			state->crtc_states[drm_crtc_index(plane_state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								plane_state->crtc);
 
 		if (WARN_ON(!crtc_state))
 			return;
@@ -374,8 +375,8 @@ mode_fixup(struct drm_atomic_state *state)
 		if (!conn_state->crtc || !conn_state->best_encoder)
 			continue;
 
-		crtc_state =
-			state->crtc_states[drm_crtc_index(conn_state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								conn_state->crtc);
 
 		/*
 		 * Each encoder has at most one connector (since we always steal
@@ -679,7 +680,8 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 		if (!old_conn_state->crtc)
 			continue;
 
-		old_crtc_state = old_state->crtc_states[drm_crtc_index(old_conn_state->crtc)];
+		old_crtc_state = drm_atomic_get_existing_crtc_state(old_state,
+								    old_conn_state->crtc);
 
 		if (!old_crtc_state->active ||
 		    !drm_atomic_crtc_needs_modeset(old_conn_state->crtc->state))
diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index bd93453afa61..b3654404abd0 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -186,7 +186,8 @@ void drm_bridge_disable(struct drm_bridge *bridge)
 
 	drm_bridge_disable(bridge->next);
 
-	bridge->funcs->disable(bridge);
+	if (bridge->funcs->disable)
+		bridge->funcs->disable(bridge);
 }
 EXPORT_SYMBOL(drm_bridge_disable);
 
@@ -206,7 +207,8 @@ void drm_bridge_post_disable(struct drm_bridge *bridge)
 	if (!bridge)
 		return;
 
-	bridge->funcs->post_disable(bridge);
+	if (bridge->funcs->post_disable)
+		bridge->funcs->post_disable(bridge);
 
 	drm_bridge_post_disable(bridge->next);
 }
@@ -256,7 +258,8 @@ void drm_bridge_pre_enable(struct drm_bridge *bridge)
 
 	drm_bridge_pre_enable(bridge->next);
 
-	bridge->funcs->pre_enable(bridge);
+	if (bridge->funcs->pre_enable)
+		bridge->funcs->pre_enable(bridge);
 }
 EXPORT_SYMBOL(drm_bridge_pre_enable);
 
@@ -276,7 +279,8 @@ void drm_bridge_enable(struct drm_bridge *bridge)
 	if (!bridge)
 		return;
 
-	bridge->funcs->enable(bridge);
+	if (bridge->funcs->enable)
+		bridge->funcs->enable(bridge);
 
 	drm_bridge_enable(bridge->next);
 }
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 7d58f594cffe..df64ed1c0139 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -179,7 +179,7 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 {
 	struct drm_dp_aux_msg msg;
 	unsigned int retry;
-	int err;
+	int err = 0;
 
 	memset(&msg, 0, sizeof(msg));
 	msg.address = offset;
@@ -187,6 +187,8 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 	msg.buffer = buffer;
 	msg.size = size;
 
+	mutex_lock(&aux->hw_mutex);
+
 	/*
 	 * The specification doesn't give any recommendation on how often to
 	 * retry native transactions. We used to retry 7 times like for
@@ -195,25 +197,24 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 	 */
 	for (retry = 0; retry < 32; retry++) {
 
-		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, &msg);
-		mutex_unlock(&aux->hw_mutex);
 		if (err < 0) {
 			if (err == -EBUSY)
 				continue;
 
-			return err;
+			goto unlock;
 		}
 
 
 		switch (msg.reply & DP_AUX_NATIVE_REPLY_MASK) {
 		case DP_AUX_NATIVE_REPLY_ACK:
 			if (err < size)
-				return -EPROTO;
-			return err;
+				err = -EPROTO;
+			goto unlock;
 
 		case DP_AUX_NATIVE_REPLY_NACK:
-			return -EIO;
+			err = -EIO;
+			goto unlock;
 
 		case DP_AUX_NATIVE_REPLY_DEFER:
 			usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
@@ -222,7 +223,11 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 	}
 
 	DRM_DEBUG_KMS("too many retries, giving up\n");
-	return -EIO;
+	err = -EIO;
+
+unlock:
+	mutex_unlock(&aux->hw_mutex);
+	return err;
 }
 
 /**
@@ -544,9 +549,7 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 	int max_retries = max(7, drm_dp_i2c_retry_count(msg, dp_aux_i2c_speed_khz));
 
 	for (retry = 0, defer_i2c = 0; retry < (max_retries + defer_i2c); retry++) {
-		mutex_lock(&aux->hw_mutex);
 		ret = aux->transfer(aux, msg);
-		mutex_unlock(&aux->hw_mutex);
 		if (ret < 0) {
 			if (ret == -EBUSY)
 				continue;
@@ -685,6 +688,8 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
 
 	memset(&msg, 0, sizeof(msg));
 
+	mutex_lock(&aux->hw_mutex);
+
 	for (i = 0; i < num; i++) {
 		msg.address = msgs[i].addr;
 		drm_dp_i2c_msg_set_request(&msg, &msgs[i]);
@@ -739,6 +744,8 @@ static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
 	msg.size = 0;
 	(void)drm_dp_i2c_do_msg(aux, &msg);
 
+	mutex_unlock(&aux->hw_mutex);
+
 	return err;
 }
 
diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c
index 698b8c3b09d9..9a401aed98e0 100644
--- a/drivers/gpu/drm/drm_edid_load.c
+++ b/drivers/gpu/drm/drm_edid_load.c
@@ -170,16 +170,11 @@ static void *edid_load(struct drm_connector *connector, const char *name,
 	int i, valid_extensions = 0;
 	bool print_bad_edid = !connector->bad_edid_counter || (drm_debug & DRM_UT_KMS);
 
-	builtin = 0;
-	for (i = 0; i < GENERIC_EDIDS; i++) {
-		if (strcmp(name, generic_edid_name[i]) == 0) {
-			fwdata = generic_edid[i];
-			fwsize = sizeof(generic_edid[i]);
-			builtin = 1;
-			break;
-		}
-	}
-	if (!builtin) {
+	builtin = match_string(generic_edid_name, GENERIC_EDIDS, name);
+	if (builtin >= 0) {
+		fwdata = generic_edid[builtin];
+		fwsize = sizeof(generic_edid[builtin]);
+	} else {
 		struct platform_device *pdev;
 		int err;
 
@@ -252,7 +247,7 @@ static void *edid_load(struct drm_connector *connector, const char *name,
 	}
 
 	DRM_INFO("Got %s EDID base block and %d extension%s from "
-	    "\"%s\" for connector \"%s\"\n", builtin ? "built-in" :
+	    "\"%s\" for connector \"%s\"\n", (builtin >= 0) ? "built-in" :
 	    "external", valid_extensions, valid_extensions == 1 ? "" : "s",
 	    name, connector_name);
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 2e8c77e71e1f..da0c5320789f 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -534,7 +534,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
 
 fail:
 	while (i--)
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 
 	drm_free_large(pages);
 	return ERR_CAST(p);
@@ -569,7 +569,7 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
 			mark_page_accessed(pages[i]);
 
 		/* Undo the reference we took when populating the table */
-		page_cache_release(pages[i]);
+		put_page(pages[i]);
 	}
 
 	drm_free_large(pages);
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index e5df53b6e229..1f500a1b9969 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -109,8 +109,8 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 	if (IS_ERR(cma_obj))
 		return cma_obj;
 
-	cma_obj->vaddr = dma_alloc_writecombine(drm->dev, size,
-			&cma_obj->paddr, GFP_KERNEL | __GFP_NOWARN);
+	cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
+				      GFP_KERNEL | __GFP_NOWARN);
 	if (!cma_obj->vaddr) {
 		dev_err(drm->dev, "failed to allocate buffer with size %zu\n",
 			size);
@@ -192,8 +192,8 @@ void drm_gem_cma_free_object(struct drm_gem_object *gem_obj)
 	cma_obj = to_drm_gem_cma_obj(gem_obj);
 
 	if (cma_obj->vaddr) {
-		dma_free_writecombine(gem_obj->dev->dev, cma_obj->base.size,
-				      cma_obj->vaddr, cma_obj->paddr);
+		dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
+			    cma_obj->vaddr, cma_obj->paddr);
 	} else if (gem_obj->import_attach) {
 		drm_prime_gem_destroy(gem_obj, cma_obj->sgt);
 	}
@@ -324,9 +324,8 @@ static int drm_gem_cma_mmap_obj(struct drm_gem_cma_object *cma_obj,
 	vma->vm_flags &= ~VM_PFNMAP;
 	vma->vm_pgoff = 0;
 
-	ret = dma_mmap_writecombine(cma_obj->base.dev->dev, vma,
-				    cma_obj->vaddr, cma_obj->paddr,
-				    vma->vm_end - vma->vm_start);
+	ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr,
+			  cma_obj->paddr, vma->vm_end - vma->vm_start);
 	if (ret)
 		drm_gem_vm_close(vma);
 
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 6e6a9c58d404..f5d80839a90c 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -47,7 +47,17 @@
 
 static int mipi_dsi_device_match(struct device *dev, struct device_driver *drv)
 {
-	return of_driver_match_device(dev, drv);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
+
+	/* attempt OF style match */
+	if (of_driver_match_device(dev, drv))
+		return 1;
+
+	/* compare DSI device and driver names */
+	if (!strcmp(dsi->name, drv->name))
+		return 1;
+
+	return 0;
 }
 
 static const struct dev_pm_ops mipi_dsi_device_pm_ops = {
@@ -129,14 +139,20 @@ static int mipi_dsi_device_add(struct mipi_dsi_device *dsi)
 	return device_add(&dsi->dev);
 }
 
+#if IS_ENABLED(CONFIG_OF)
 static struct mipi_dsi_device *
 of_mipi_dsi_device_add(struct mipi_dsi_host *host, struct device_node *node)
 {
-	struct mipi_dsi_device *dsi;
 	struct device *dev = host->dev;
+	struct mipi_dsi_device_info info = { };
 	int ret;
 	u32 reg;
 
+	if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) {
+		dev_err(dev, "modalias failure on %s\n", node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
 	ret = of_property_read_u32(node, "reg", &reg);
 	if (ret) {
 		dev_err(dev, "device node %s has no valid reg property: %d\n",
@@ -144,32 +160,111 @@ of_mipi_dsi_device_add(struct mipi_dsi_host *host, struct device_node *node)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (reg > 3) {
-		dev_err(dev, "device node %s has invalid reg property: %u\n",
-			node->full_name, reg);
+	info.channel = reg;
+	info.node = of_node_get(node);
+
+	return mipi_dsi_device_register_full(host, &info);
+}
+#else
+static struct mipi_dsi_device *
+of_mipi_dsi_device_add(struct mipi_dsi_host *host, struct device_node *node)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
+/**
+ * mipi_dsi_device_register_full - create a MIPI DSI device
+ * @host: DSI host to which this device is connected
+ * @info: pointer to template containing DSI device information
+ *
+ * Create a MIPI DSI device by using the device information provided by
+ * mipi_dsi_device_info template
+ *
+ * Returns:
+ * A pointer to the newly created MIPI DSI device, or, a pointer encoded
+ * with an error
+ */
+struct mipi_dsi_device *
+mipi_dsi_device_register_full(struct mipi_dsi_host *host,
+			      const struct mipi_dsi_device_info *info)
+{
+	struct mipi_dsi_device *dsi;
+	struct device *dev = host->dev;
+	int ret;
+
+	if (!info) {
+		dev_err(dev, "invalid mipi_dsi_device_info pointer\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (info->channel > 3) {
+		dev_err(dev, "invalid virtual channel: %u\n", info->channel);
 		return ERR_PTR(-EINVAL);
 	}
 
 	dsi = mipi_dsi_device_alloc(host);
 	if (IS_ERR(dsi)) {
-		dev_err(dev, "failed to allocate DSI device %s: %ld\n",
-			node->full_name, PTR_ERR(dsi));
+		dev_err(dev, "failed to allocate DSI device %ld\n",
+			PTR_ERR(dsi));
 		return dsi;
 	}
 
-	dsi->dev.of_node = of_node_get(node);
-	dsi->channel = reg;
+	dsi->dev.of_node = info->node;
+	dsi->channel = info->channel;
+	strlcpy(dsi->name, info->type, sizeof(dsi->name));
 
 	ret = mipi_dsi_device_add(dsi);
 	if (ret) {
-		dev_err(dev, "failed to add DSI device %s: %d\n",
-			node->full_name, ret);
+		dev_err(dev, "failed to add DSI device %d\n", ret);
 		kfree(dsi);
 		return ERR_PTR(ret);
 	}
 
 	return dsi;
 }
+EXPORT_SYMBOL(mipi_dsi_device_register_full);
+
+/**
+ * mipi_dsi_device_unregister - unregister MIPI DSI device
+ * @dsi: DSI peripheral device
+ */
+void mipi_dsi_device_unregister(struct mipi_dsi_device *dsi)
+{
+	device_unregister(&dsi->dev);
+}
+EXPORT_SYMBOL(mipi_dsi_device_unregister);
+
+static DEFINE_MUTEX(host_lock);
+static LIST_HEAD(host_list);
+
+/**
+ * of_find_mipi_dsi_host_by_node() - find the MIPI DSI host matching a
+ *				     device tree node
+ * @node: device tree node
+ *
+ * Returns:
+ * A pointer to the MIPI DSI host corresponding to @node or NULL if no
+ * such device exists (or has not been registered yet).
+ */
+struct mipi_dsi_host *of_find_mipi_dsi_host_by_node(struct device_node *node)
+{
+	struct mipi_dsi_host *host;
+
+	mutex_lock(&host_lock);
+
+	list_for_each_entry(host, &host_list, list) {
+		if (host->dev->of_node == node) {
+			mutex_unlock(&host_lock);
+			return host;
+		}
+	}
+
+	mutex_unlock(&host_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL(of_find_mipi_dsi_host_by_node);
 
 int mipi_dsi_host_register(struct mipi_dsi_host *host)
 {
@@ -182,6 +277,10 @@ int mipi_dsi_host_register(struct mipi_dsi_host *host)
 		of_mipi_dsi_device_add(host, node);
 	}
 
+	mutex_lock(&host_lock);
+	list_add_tail(&host->list, &host_list);
+	mutex_unlock(&host_lock);
+
 	return 0;
 }
 EXPORT_SYMBOL(mipi_dsi_host_register);
@@ -190,7 +289,7 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv)
 {
 	struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
 
-	device_unregister(&dsi->dev);
+	mipi_dsi_device_unregister(dsi);
 
 	return 0;
 }
@@ -198,6 +297,10 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv)
 void mipi_dsi_host_unregister(struct mipi_dsi_host *host)
 {
 	device_for_each_child(host->dev, NULL, mipi_dsi_remove_device_fn);
+
+	mutex_lock(&host_lock);
+	list_del_init(&host->list);
+	mutex_unlock(&host_lock);
 }
 EXPORT_SYMBOL(mipi_dsi_host_unregister);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 937a77520f58..281c6eca20a8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -761,9 +761,9 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 
 	down_read(&mm->mmap_sem);
 	while (pinned < npages) {
-		ret = get_user_pages(task, mm, ptr, npages - pinned,
-				     !etnaviv_obj->userptr.ro, 0,
-				     pvec + pinned, NULL);
+		ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
+					    !etnaviv_obj->userptr.ro, 0,
+					    pvec + pinned, NULL);
 		if (ret < 0)
 			break;
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index d13303ce530d..09198d0b5814 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1110,8 +1110,8 @@ struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
 	if (!cmdbuf)
 		return NULL;
 
-	cmdbuf->vaddr = dma_alloc_writecombine(gpu->dev, size, &cmdbuf->paddr,
-					       GFP_KERNEL);
+	cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
+				     GFP_KERNEL);
 	if (!cmdbuf->vaddr) {
 		kfree(cmdbuf);
 		return NULL;
@@ -1125,8 +1125,8 @@ struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
 
 void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 {
-	dma_free_writecombine(cmdbuf->gpu->dev, cmdbuf->size,
-			      cmdbuf->vaddr, cmdbuf->paddr);
+	dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
+		    cmdbuf->paddr);
 	kfree(cmdbuf);
 }
 
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 7bb1f1aff932..c52f9adf5e04 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -220,7 +220,7 @@ i2c_dp_aux_prepare_bus(struct i2c_adapter *adapter)
  * FIXME: This is the old dp aux helper, gma500 is the last driver that needs to
  * be ported over to the new helper code in drm_dp_helper.c like i915 or radeon.
  */
-static int __deprecated
+static int
 i2c_dp_aux_add_bus(struct i2c_adapter *adapter)
 {
 	int error;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f1455faecbd2..b37ffea8b458 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -177,7 +177,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 		drm_clflush_virt_range(vaddr, PAGE_SIZE);
 		kunmap_atomic(src);
 
-		page_cache_release(page);
+		put_page(page);
 		vaddr += PAGE_SIZE;
 	}
 
@@ -243,7 +243,7 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 			set_page_dirty(page);
 			if (obj->madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
-			page_cache_release(page);
+			put_page(page);
 			vaddr += PAGE_SIZE;
 		}
 		obj->dirty = 0;
@@ -2206,7 +2206,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 		if (obj->madv == I915_MADV_WILLNEED)
 			mark_page_accessed(page);
 
-		page_cache_release(page);
+		put_page(page);
 	}
 	obj->dirty = 0;
 
@@ -2354,7 +2354,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 err_pages:
 	sg_mark_end(sg);
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
-		page_cache_release(sg_page_iter_page(&sg_iter));
+		put_page(sg_page_iter_page(&sg_iter));
 	sg_free_table(st);
 	kfree(st);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 4e60643ef53a..80bbe43a2e92 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -187,25 +187,20 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 	return ret;
 }
 
-static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	bool was_interruptible;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
-	was_interruptible = dev_priv->mm.interruptible;
-	dev_priv->mm.interruptible = false;
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
-
-	dev_priv->mm.interruptible = was_interruptible;
 	mutex_unlock(&dev->struct_mutex);
 
-	if (unlikely(ret))
-		DRM_ERROR("unable to flush buffer following CPU access; rendering may be corrupt\n");
+	return ret;
 }
 
 static const struct dma_buf_ops i915_dmabuf_ops =  {
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 598198543dcd..a2b938ec01a7 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -34,8 +34,8 @@
  * set of these objects.
  *
  * Fences are used to detile GTT memory mappings. They're also connected to the
- * hardware frontbuffer render tracking and hence interract with frontbuffer
- * conmpression. Furthermore on older platforms fences are required for tiled
+ * hardware frontbuffer render tracking and hence interact with frontbuffer
+ * compression. Furthermore on older platforms fences are required for tiled
  * objects used by the display engine. They can also be used by the render
  * engine - they're required for blitter commands and are optional for render
  * commands. But on gen4+ both display (with the exception of fbc) and rendering
@@ -46,8 +46,8 @@
  *
  * Finally note that because fences are such a restricted resource they're
  * dynamically associated with objects. Furthermore fence state is committed to
- * the hardware lazily to avoid unecessary stalls on gen2/3. Therefore code must
- * explictly call i915_gem_object_get_fence() to synchronize fencing status
+ * the hardware lazily to avoid unnecessary stalls on gen2/3. Therefore code must
+ * explicitly call i915_gem_object_get_fence() to synchronize fencing status
  * for cpu access. Also note that some code wants an unfenced view, for those
  * cases the fence can be removed forcefully with i915_gem_object_put_fence().
  *
@@ -527,7 +527,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
  * required.
  *
  * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
- * 17 is not just a page offset, so as we page an objet out and back in,
+ * 17 is not just a page offset, so as we page an object out and back in,
  * individual pages in it will have different bit 17 addresses, resulting in
  * each 64 bytes being swapped with its neighbor!
  *
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index d59741492341..0f94b6c5c9cc 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -500,11 +500,11 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 		down_read(&mm->mmap_sem);
 		while (pinned < npages) {
-			ret = get_user_pages(work->task, mm,
-					     obj->userptr.ptr + pinned * PAGE_SIZE,
-					     npages - pinned,
-					     !obj->userptr.read_only, 0,
-					     pvec + pinned, NULL);
+			ret = get_user_pages_remote(work->task, mm,
+					obj->userptr.ptr + pinned * PAGE_SIZE,
+					npages - pinned,
+					!obj->userptr.read_only, 0,
+					pvec + pinned, NULL);
 			if (ret < 0)
 				break;
 
@@ -677,7 +677,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 	obj->dirty = 0;
 
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index 2a95d10e9d92..a24631fdf4ad 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
@@ -225,8 +225,6 @@ static int dw_hdmi_imx_bind(struct device *dev, struct device *master,
 	if (!iores)
 		return -ENXIO;
 
-	platform_set_drvdata(pdev, hdmi);
-
 	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm, dev->of_node);
 	/*
 	 * If we failed to find the CRTC(s) which this encoder is
@@ -245,7 +243,16 @@ static int dw_hdmi_imx_bind(struct device *dev, struct device *master,
 	drm_encoder_init(drm, encoder, &dw_hdmi_imx_encoder_funcs,
 			 DRM_MODE_ENCODER_TMDS, NULL);
 
-	return dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
+	ret = dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
+
+	/*
+	 * If dw_hdmi_bind() fails we'll never call dw_hdmi_unbind(),
+	 * which would have called the encoder cleanup.  Do it manually.
+	 */
+	if (ret)
+		drm_encoder_cleanup(encoder);
+
+	return ret;
 }
 
 static void dw_hdmi_imx_unbind(struct device *dev, struct device *master,
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 9876e0f0c3e1..e26dcdec2aba 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -326,7 +326,6 @@ int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
 {
 	struct imx_drm_device *imxdrm = drm->dev_private;
 	struct imx_drm_crtc *imx_drm_crtc;
-	int ret;
 
 	/*
 	 * The vblank arrays are dimensioned by MAX_CRTC - we can't
@@ -351,10 +350,6 @@ int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
 
 	*new_crtc = imx_drm_crtc;
 
-	ret = drm_mode_crtc_set_gamma_size(imx_drm_crtc->crtc, 256);
-	if (ret)
-		goto err_register;
-
 	drm_crtc_helper_add(crtc,
 			imx_drm_crtc->imx_drm_helper_funcs.crtc_helper_funcs);
 
@@ -362,11 +357,6 @@ int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
 			imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs, NULL);
 
 	return 0;
-
-err_register:
-	imxdrm->crtc[--imxdrm->pipes] = NULL;
-	kfree(imx_drm_crtc);
-	return ret;
 }
 EXPORT_SYMBOL_GPL(imx_drm_add_crtc);
 
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 588827844f30..681ec6eb77d9 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -72,22 +72,101 @@ static inline int calc_bandwidth(int width, int height, unsigned int vref)
 int ipu_plane_set_base(struct ipu_plane *ipu_plane, struct drm_framebuffer *fb,
 		       int x, int y)
 {
-	struct drm_gem_cma_object *cma_obj;
-	unsigned long eba;
-	int active;
-
-	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	if (!cma_obj) {
-		DRM_DEBUG_KMS("entry is null.\n");
-		return -EFAULT;
+	struct drm_gem_cma_object *cma_obj[3];
+	unsigned long eba, ubo, vbo;
+	int active, i;
+
+	for (i = 0; i < drm_format_num_planes(fb->pixel_format); i++) {
+		cma_obj[i] = drm_fb_cma_get_gem_obj(fb, i);
+		if (!cma_obj[i]) {
+			DRM_DEBUG_KMS("plane %d entry is null.\n", i);
+			return -EFAULT;
+		}
 	}
 
-	dev_dbg(ipu_plane->base.dev->dev, "phys = %pad, x = %d, y = %d",
-		&cma_obj->paddr, x, y);
-
-	eba = cma_obj->paddr + fb->offsets[0] +
+	eba = cma_obj[0]->paddr + fb->offsets[0] +
 	      fb->pitches[0] * y + (fb->bits_per_pixel >> 3) * x;
 
+	if (eba & 0x7) {
+		DRM_DEBUG_KMS("base address must be a multiple of 8.\n");
+		return -EINVAL;
+	}
+
+	if (fb->pitches[0] < 1 || fb->pitches[0] > 16384) {
+		DRM_DEBUG_KMS("pitches out of range.\n");
+		return -EINVAL;
+	}
+
+	if (ipu_plane->enabled && fb->pitches[0] != ipu_plane->stride[0]) {
+		DRM_DEBUG_KMS("pitches must not change while plane is enabled.\n");
+		return -EINVAL;
+	}
+
+	ipu_plane->stride[0] = fb->pitches[0];
+
+	switch (fb->pixel_format) {
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		/*
+		 * Multiplanar formats have to meet the following restrictions:
+		 * - The (up to) three plane addresses are EBA, EBA+UBO, EBA+VBO
+		 * - EBA, UBO and VBO are a multiple of 8
+		 * - UBO and VBO are unsigned and not larger than 0xfffff8
+		 * - Only EBA may be changed while scanout is active
+		 * - The strides of U and V planes must be identical.
+		 */
+		ubo = cma_obj[1]->paddr + fb->offsets[1] +
+		      fb->pitches[1] * y / 2 + x / 2 - eba;
+		vbo = cma_obj[2]->paddr + fb->offsets[2] +
+		      fb->pitches[2] * y / 2 + x / 2 - eba;
+
+		if ((ubo & 0x7) || (vbo & 0x7)) {
+			DRM_DEBUG_KMS("U/V buffer offsets must be a multiple of 8.\n");
+			return -EINVAL;
+		}
+
+		if ((ubo > 0xfffff8) || (vbo > 0xfffff8)) {
+			DRM_DEBUG_KMS("U/V buffer offsets must be positive and not larger than 0xfffff8.\n");
+			return -EINVAL;
+		}
+
+		if (ipu_plane->enabled && ((ipu_plane->u_offset != ubo) ||
+					   (ipu_plane->v_offset != vbo))) {
+			DRM_DEBUG_KMS("U/V buffer offsets must not change while plane is enabled.\n");
+			return -EINVAL;
+		}
+
+		if (fb->pitches[1] != fb->pitches[2]) {
+			DRM_DEBUG_KMS("U/V pitches must be identical.\n");
+			return -EINVAL;
+		}
+
+		if (fb->pitches[1] < 1 || fb->pitches[1] > 16384) {
+			DRM_DEBUG_KMS("U/V pitches out of range.\n");
+			return -EINVAL;
+		}
+
+		if (ipu_plane->enabled &&
+		    (ipu_plane->stride[1] != fb->pitches[1])) {
+			DRM_DEBUG_KMS("U/V pitches must not change while plane is enabled.\n");
+			return -EINVAL;
+		}
+
+		ipu_plane->u_offset = ubo;
+		ipu_plane->v_offset = vbo;
+		ipu_plane->stride[1] = fb->pitches[1];
+
+		dev_dbg(ipu_plane->base.dev->dev,
+			"phys = %pad %pad %pad, x = %d, y = %d",
+			&cma_obj[0]->paddr, &cma_obj[1]->paddr,
+			&cma_obj[2]->paddr, x, y);
+		break;
+	default:
+		dev_dbg(ipu_plane->base.dev->dev, "phys = %pad, x = %d, y = %d",
+			&cma_obj[0]->paddr, x, y);
+		break;
+	}
+
 	if (ipu_plane->enabled) {
 		active = ipu_idmac_get_current_buffer(ipu_plane->ipu_ch);
 		ipu_cpmem_set_buffer(ipu_plane->ipu_ch, !active, eba);
@@ -201,12 +280,6 @@ int ipu_plane_mode_set(struct ipu_plane *ipu_plane, struct drm_crtc *crtc,
 		}
 	}
 
-	ret = ipu_dmfc_init_channel(ipu_plane->dmfc, crtc_w);
-	if (ret) {
-		dev_err(dev, "initializing dmfc channel failed with %d\n", ret);
-		return ret;
-	}
-
 	ret = ipu_dmfc_alloc_bandwidth(ipu_plane->dmfc,
 			calc_bandwidth(crtc_w, crtc_h,
 				       calc_vref(mode)), 64);
@@ -215,6 +288,8 @@ int ipu_plane_mode_set(struct ipu_plane *ipu_plane, struct drm_crtc *crtc,
 		return ret;
 	}
 
+	ipu_dmfc_config_wait4eot(ipu_plane->dmfc, crtc_w);
+
 	ipu_cpmem_zero(ipu_plane->ipu_ch);
 	ipu_cpmem_set_resolution(ipu_plane->ipu_ch, src_w, src_h);
 	ret = ipu_cpmem_set_fmt(ipu_plane->ipu_ch, fb->pixel_format);
@@ -233,6 +308,18 @@ int ipu_plane_mode_set(struct ipu_plane *ipu_plane, struct drm_crtc *crtc,
 	if (interlaced)
 		ipu_cpmem_interlaced_scan(ipu_plane->ipu_ch, fb->pitches[0]);
 
+	if (fb->pixel_format == DRM_FORMAT_YUV420) {
+		ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
+					      ipu_plane->stride[1],
+					      ipu_plane->u_offset,
+					      ipu_plane->v_offset);
+	} else if (fb->pixel_format == DRM_FORMAT_YVU420) {
+		ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
+					      ipu_plane->stride[1],
+					      ipu_plane->v_offset,
+					      ipu_plane->u_offset);
+	}
+
 	ipu_plane->w = src_w;
 	ipu_plane->h = src_h;
 
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h
index 3a443b413c60..4448fd4ad4eb 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.h
+++ b/drivers/gpu/drm/imx/ipuv3-plane.h
@@ -29,6 +29,10 @@ struct ipu_plane {
 	int			w;
 	int			h;
 
+	unsigned int		u_offset;
+	unsigned int		v_offset;
+	unsigned int		stride[2];
+
 	bool			enabled;
 };
 
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index b04a64664673..65428cf233ce 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -196,7 +196,7 @@ void __exit msm_hdmi_phy_driver_unregister(void);
 int msm_hdmi_pll_8960_init(struct platform_device *pdev);
 int msm_hdmi_pll_8996_init(struct platform_device *pdev);
 #else
-static inline int msm_hdmi_pll_8960_init(struct platform_device *pdev);
+static inline int msm_hdmi_pll_8960_init(struct platform_device *pdev)
 {
 	return -ENODEV;
 }
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index d52910e2c26c..c03b96709179 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -467,9 +467,6 @@ static void msm_preclose(struct drm_device *dev, struct drm_file *file)
 	struct msm_file_private *ctx = file->driver_priv;
 	struct msm_kms *kms = priv->kms;
 
-	if (kms)
-		kms->funcs->preclose(kms, file);
-
 	mutex_lock(&dev->struct_mutex);
 	if (ctx == priv->lastctx)
 		priv->lastctx = NULL;
diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
index 9bcabaada179..e32222c3d44f 100644
--- a/drivers/gpu/drm/msm/msm_kms.h
+++ b/drivers/gpu/drm/msm/msm_kms.h
@@ -55,7 +55,6 @@ struct msm_kms_funcs {
 			struct drm_encoder *slave_encoder,
 			bool is_cmd_mode);
 	/* cleanup: */
-	void (*preclose)(struct msm_kms *kms, struct drm_file *file);
 	void (*destroy)(struct msm_kms *kms);
 };
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
index 16641cec18a2..b5370cb56e3c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
@@ -11,6 +11,7 @@ struct nvkm_device_tegra {
 
 	struct reset_control *rst;
 	struct clk *clk;
+	struct clk *clk_ref;
 	struct clk *clk_pwr;
 
 	struct regulator *vdd;
@@ -36,6 +37,10 @@ struct nvkm_device_tegra_func {
 	 * bypassed). A value of 0 means an IOMMU is never used.
 	 */
 	u8 iommu_bit;
+	/*
+	 * Whether the chip requires a reference clock
+	 */
+	bool require_ref_clk;
 };
 
 int nvkm_device_tegra_new(const struct nvkm_device_tegra_func *,
diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 2dfe58af12e4..4c4cc2260257 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -55,6 +55,11 @@ static const struct nvkm_device_tegra_func gk20a_platform_data = {
 	.iommu_bit = 34,
 };
 
+static const struct nvkm_device_tegra_func gm20b_platform_data = {
+	.iommu_bit = 34,
+	.require_ref_clk = true,
+};
+
 static const struct of_device_id nouveau_platform_match[] = {
 	{
 		.compatible = "nvidia,gk20a",
@@ -62,7 +67,7 @@ static const struct of_device_id nouveau_platform_match[] = {
 	},
 	{
 		.compatible = "nvidia,gm20b",
-		.data = &gk20a_platform_data,
+		.data = &gm20b_platform_data,
 	},
 	{ }
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 9afa5f3e3c1c..ec12efb4689a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -35,6 +35,11 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
 	ret = clk_prepare_enable(tdev->clk);
 	if (ret)
 		goto err_clk;
+	if (tdev->clk_ref) {
+		ret = clk_prepare_enable(tdev->clk_ref);
+		if (ret)
+			goto err_clk_ref;
+	}
 	ret = clk_prepare_enable(tdev->clk_pwr);
 	if (ret)
 		goto err_clk_pwr;
@@ -57,6 +62,9 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
 err_clamp:
 	clk_disable_unprepare(tdev->clk_pwr);
 err_clk_pwr:
+	if (tdev->clk_ref)
+		clk_disable_unprepare(tdev->clk_ref);
+err_clk_ref:
 	clk_disable_unprepare(tdev->clk);
 err_clk:
 	regulator_disable(tdev->vdd);
@@ -71,6 +79,8 @@ nvkm_device_tegra_power_down(struct nvkm_device_tegra *tdev)
 	udelay(10);
 
 	clk_disable_unprepare(tdev->clk_pwr);
+	if (tdev->clk_ref)
+		clk_disable_unprepare(tdev->clk_ref);
 	clk_disable_unprepare(tdev->clk);
 	udelay(10);
 
@@ -274,6 +284,13 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 		goto free;
 	}
 
+	if (func->require_ref_clk)
+		tdev->clk_ref = devm_clk_get(&pdev->dev, "ref");
+	if (IS_ERR(tdev->clk_ref)) {
+		ret = PTR_ERR(tdev->clk_ref);
+		goto free;
+	}
+
 	tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
 	if (IS_ERR(tdev->clk_pwr)) {
 		ret = PTR_ERR(tdev->clk_pwr);
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index 9f94576c435d..de275a5be1db 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -597,10 +597,9 @@ static int omap_dmm_remove(struct platform_device *dev)
 
 		kfree(omap_dmm->engines);
 		if (omap_dmm->refill_va)
-			dma_free_writecombine(omap_dmm->dev,
-				REFILL_BUFFER_SIZE * omap_dmm->num_engines,
-				omap_dmm->refill_va,
-				omap_dmm->refill_pa);
+			dma_free_wc(omap_dmm->dev,
+				    REFILL_BUFFER_SIZE * omap_dmm->num_engines,
+				    omap_dmm->refill_va, omap_dmm->refill_pa);
 		if (omap_dmm->dummy_page)
 			__free_page(omap_dmm->dummy_page);
 
@@ -725,9 +724,9 @@ static int omap_dmm_probe(struct platform_device *dev)
 	omap_dmm->dummy_pa = page_to_phys(omap_dmm->dummy_page);
 
 	/* alloc refill memory */
-	omap_dmm->refill_va = dma_alloc_writecombine(&dev->dev,
-				REFILL_BUFFER_SIZE * omap_dmm->num_engines,
-				&omap_dmm->refill_pa, GFP_KERNEL);
+	omap_dmm->refill_va = dma_alloc_wc(&dev->dev,
+					   REFILL_BUFFER_SIZE * omap_dmm->num_engines,
+					   &omap_dmm->refill_pa, GFP_KERNEL);
 	if (!omap_dmm->refill_va) {
 		dev_err(&dev->dev, "could not allocate refill memory\n");
 		goto fail;
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index cc36a8dc9bd4..907154f5b67c 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -1313,8 +1313,8 @@ void omap_gem_free_object(struct drm_gem_object *obj)
 	}
 
 	if (omap_obj->flags & OMAP_BO_MEM_DMA_API) {
-		dma_free_writecombine(dev->dev, obj->size,
-				omap_obj->vaddr, omap_obj->paddr);
+		dma_free_wc(dev->dev, obj->size, omap_obj->vaddr,
+			    omap_obj->paddr);
 	} else if (omap_obj->vaddr) {
 		vunmap(omap_obj->vaddr);
 	} else if (obj->import_attach) {
@@ -1412,9 +1412,9 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev,
 
 	/* Allocate memory if needed. */
 	if (flags & OMAP_BO_MEM_DMA_API) {
-		omap_obj->vaddr = dma_alloc_writecombine(dev->dev, size,
-							 &omap_obj->paddr,
-							 GFP_KERNEL);
+		omap_obj->vaddr = dma_alloc_wc(dev->dev, size,
+					       &omap_obj->paddr,
+					       GFP_KERNEL);
 		if (!omap_obj->vaddr)
 			goto err_release;
 	}
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index 3cf8aab23a39..af267c35d813 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -97,11 +97,12 @@ static int omap_gem_dmabuf_begin_cpu_access(struct dma_buf *buffer,
 	return omap_gem_get_pages(obj, &pages, true);
 }
 
-static void omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
-		enum dma_data_direction dir)
+static int omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
+					  enum dma_data_direction dir)
 {
 	struct drm_gem_object *obj = buffer->priv;
 	omap_gem_put_pages(obj);
+	return 0;
 }
 
 
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 2164c999052c..ceb20486dacf 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -847,6 +847,7 @@ static const struct drm_display_mode innolux_g121x1_l03_mode = {
 	.vsync_end = 768 + 38 + 1,
 	.vtotal = 768 + 38 + 1 + 0,
 	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
 };
 
 static const struct panel_desc innolux_g121x1_l03 = {
@@ -982,6 +983,29 @@ static const struct panel_desc lg_lb070wv8 = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
 };
 
+static const struct drm_display_mode lg_lp120up1_mode = {
+	.clock = 162300,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 40,
+	.hsync_end = 1920 + 40 + 40,
+	.htotal = 1920 + 40 + 40+ 80,
+	.vdisplay = 1280,
+	.vsync_start = 1280 + 4,
+	.vsync_end = 1280 + 4 + 4,
+	.vtotal = 1280 + 4 + 4 + 12,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc lg_lp120up1 = {
+	.modes = &lg_lp120up1_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 267,
+		.height = 183,
+	},
+};
+
 static const struct drm_display_mode lg_lp129qe_mode = {
 	.clock = 285250,
 	.hdisplay = 2560,
@@ -1177,6 +1201,42 @@ static const struct panel_desc shelly_sca07010_bfn_lnn = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct display_timing urt_umsh_8596md_timing = {
+	.pixelclock = { 33260000, 33260000, 33260000 },
+	.hactive = { 800, 800, 800 },
+	.hfront_porch = { 41, 41, 41 },
+	.hback_porch = { 216 - 128, 216 - 128, 216 - 128 },
+	.hsync_len = { 71, 128, 128 },
+	.vactive = { 480, 480, 480 },
+	.vfront_porch = { 10, 10, 10 },
+	.vback_porch = { 35 - 2, 35 - 2, 35 - 2 },
+	.vsync_len = { 2, 2, 2 },
+	.flags = DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_NEGEDGE |
+		DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW,
+};
+
+static const struct panel_desc urt_umsh_8596md_lvds = {
+	.timings = &urt_umsh_8596md_timing,
+	.num_timings = 1,
+	.bpc = 6,
+	.size = {
+		.width = 152,
+		.height = 91,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG,
+};
+
+static const struct panel_desc urt_umsh_8596md_parallel = {
+	.timings = &urt_umsh_8596md_timing,
+	.num_timings = 1,
+	.bpc = 6,
+	.size = {
+		.width = 152,
+		.height = 91,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+};
+
 static const struct of_device_id platform_of_match[] = {
 	{
 		.compatible = "ampire,am800480r3tmqwa1h",
@@ -1257,6 +1317,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "lg,lb070wv8",
 		.data = &lg_lb070wv8,
 	}, {
+		.compatible = "lg,lp120up1",
+		.data = &lg_lp120up1,
+	}, {
 		.compatible = "lg,lp129qe",
 		.data = &lg_lp129qe,
 	}, {
@@ -1281,6 +1344,24 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "shelly,sca07010-bfn-lnn",
 		.data = &shelly_sca07010_bfn_lnn,
 	}, {
+		.compatible = "urt,umsh-8596md-t",
+		.data = &urt_umsh_8596md_parallel,
+	}, {
+		.compatible = "urt,umsh-8596md-1t",
+		.data = &urt_umsh_8596md_parallel,
+	}, {
+		.compatible = "urt,umsh-8596md-7t",
+		.data = &urt_umsh_8596md_parallel,
+	}, {
+		.compatible = "urt,umsh-8596md-11t",
+		.data = &urt_umsh_8596md_lvds,
+	}, {
+		.compatible = "urt,umsh-8596md-19t",
+		.data = &urt_umsh_8596md_lvds,
+	}, {
+		.compatible = "urt,umsh-8596md-20t",
+		.data = &urt_umsh_8596md_parallel,
+	}, {
 		/* sentinel */
 	}
 };
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index ec1593a6a561..f66c33dd21a3 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -66,9 +66,10 @@ int atom_debug = 0;
 static int atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t * params);
 int atom_execute_table(struct atom_context *ctx, int index, uint32_t * params);
 
-static uint32_t atom_arg_mask[8] =
-    { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
-0xFF000000 };
+static uint32_t atom_arg_mask[8] = {
+	0xFFFFFFFF, 0x0000FFFF, 0x00FFFF00, 0xFFFF0000,
+	0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000
+};
 static int atom_arg_shift[8] = { 0, 0, 8, 16, 0, 8, 16, 24 };
 
 static int atom_dst_to_src[8][4] = {
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 801dd60ac192..b80b08f71cb4 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -275,13 +275,15 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
 		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
 			atombios_enable_crtc_memreq(crtc, ATOM_ENABLE);
 		atombios_blank_crtc(crtc, ATOM_DISABLE);
-		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
+		if (dev->num_crtcs > radeon_crtc->crtc_id)
+			drm_vblank_on(dev, radeon_crtc->crtc_id);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
+		if (dev->num_crtcs > radeon_crtc->crtc_id)
+			drm_vblank_off(dev, radeon_crtc->crtc_id);
 		if (radeon_crtc->enabled)
 			atombios_blank_crtc(crtc, ATOM_ENABLE);
 		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
@@ -1665,11 +1667,11 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 }
 
 int atombios_crtc_set_base_atomic(struct drm_crtc *crtc,
-                                  struct drm_framebuffer *fb,
+				  struct drm_framebuffer *fb,
 				  int x, int y, enum mode_set_atomic state)
 {
-       struct drm_device *dev = crtc->dev;
-       struct radeon_device *rdev = dev->dev_private;
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 
 	if (ASIC_IS_DCE4(rdev))
 		return dce4_crtc_do_set_base(crtc, fb, x, y, 1);
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 6af832545bc5..afa9db1dc0e3 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -37,10 +37,10 @@
 #define DP_DPCD_SIZE DP_RECEIVER_CAP_SIZE
 
 static char *voltage_names[] = {
-        "0.4V", "0.6V", "0.8V", "1.2V"
+	"0.4V", "0.6V", "0.8V", "1.2V"
 };
 static char *pre_emph_names[] = {
-        "0dB", "3.5dB", "6dB", "9.5dB"
+	"0dB", "3.5dB", "6dB", "9.5dB"
 };
 
 /***** radeon AUX functions *****/
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 1603751b1164..edd05cdb0cd8 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -892,8 +892,6 @@ atombios_dig_encoder_setup2(struct drm_encoder *encoder, int action, int panel_m
 			else
 				args.v1.ucLaneNum = 4;
 
-			if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000))
-				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
 			switch (radeon_encoder->encoder_id) {
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
 				args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
@@ -910,6 +908,10 @@ atombios_dig_encoder_setup2(struct drm_encoder *encoder, int action, int panel_m
 				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
 			else
 				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
+
+			if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000))
+				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+
 			break;
 		case 2:
 		case 3:
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c
index 69556f5e247e..38e5123708e7 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@@ -1163,12 +1163,11 @@ u32 btc_valid_sclk[40] =
 	155000, 160000, 165000, 170000, 175000, 180000, 185000, 190000, 195000, 200000
 };
 
-static const struct radeon_blacklist_clocks btc_blacklist_clocks[] =
-{
-        { 10000, 30000, RADEON_SCLK_UP },
-        { 15000, 30000, RADEON_SCLK_UP },
-        { 20000, 30000, RADEON_SCLK_UP },
-        { 25000, 30000, RADEON_SCLK_UP }
+static const struct radeon_blacklist_clocks btc_blacklist_clocks[] = {
+	{ 10000, 30000, RADEON_SCLK_UP },
+	{ 15000, 30000, RADEON_SCLK_UP },
+	{ 20000, 30000, RADEON_SCLK_UP },
+	{ 25000, 30000, RADEON_SCLK_UP }
 };
 
 void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
@@ -1637,14 +1636,14 @@ static int btc_init_smc_table(struct radeon_device *rdev,
 	cypress_populate_smc_voltage_tables(rdev, table);
 
 	switch (rdev->pm.int_thermal_type) {
-        case THERMAL_TYPE_EVERGREEN:
-        case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
+	case THERMAL_TYPE_EVERGREEN:
+	case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_INTERNAL;
 		break;
-        case THERMAL_TYPE_NONE:
+	case THERMAL_TYPE_NONE:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_NONE;
 		break;
-        default:
+	default:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_EXTERNAL;
 		break;
 	}
@@ -1860,37 +1859,37 @@ static bool btc_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
 	case MC_SEQ_RAS_TIMING >> 2:
 		*out_reg = MC_SEQ_RAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_CAS_TIMING >> 2:
+	case MC_SEQ_CAS_TIMING >> 2:
 		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING >> 2:
+	case MC_SEQ_MISC_TIMING >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING2 >> 2:
+	case MC_SEQ_MISC_TIMING2 >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING2_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D0 >> 2:
+	case MC_SEQ_RD_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D1 >> 2:
+	case MC_SEQ_RD_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D1_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D0 >> 2:
+	case MC_SEQ_WR_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D1 >> 2:
+	case MC_SEQ_WR_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D1_LP >> 2;
 		break;
-        case MC_PMG_CMD_EMRS >> 2:
+	case MC_PMG_CMD_EMRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS >> 2:
+	case MC_PMG_CMD_MRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS1 >> 2:
+	case MC_PMG_CMD_MRS1 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
 		break;
-        default:
+	default:
 		result = false;
 		break;
 	}
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 4a09947be244..35e0fc3ae8a7 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -192,9 +192,9 @@ static void ci_fan_ctrl_set_default_mode(struct radeon_device *rdev);
 
 static struct ci_power_info *ci_get_pi(struct radeon_device *rdev)
 {
-        struct ci_power_info *pi = rdev->pm.dpm.priv;
+	struct ci_power_info *pi = rdev->pm.dpm.priv;
 
-        return pi;
+	return pi;
 }
 
 static struct ci_ps *ci_get_ps(struct radeon_ps *rps)
@@ -1632,7 +1632,7 @@ static int ci_notify_hw_of_power_source(struct radeon_device *rdev,
 	else
 		power_limit = (u32)(cac_tdp_table->battery_power_limit * 256);
 
-        ci_set_power_limit(rdev, power_limit);
+	ci_set_power_limit(rdev, power_limit);
 
 	if (pi->caps_automatic_dc_transition) {
 		if (ac_power)
@@ -2017,9 +2017,9 @@ static void ci_enable_display_gap(struct radeon_device *rdev)
 {
 	u32 tmp = RREG32_SMC(CG_DISPLAY_GAP_CNTL);
 
-        tmp &= ~(DISP_GAP_MASK | DISP_GAP_MCHG_MASK);
-        tmp |= (DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE) |
-                DISP_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK));
+	tmp &= ~(DISP_GAP_MASK | DISP_GAP_MCHG_MASK);
+	tmp |= (DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE) |
+		DISP_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK));
 
 	WREG32_SMC(CG_DISPLAY_GAP_CNTL, tmp);
 }
@@ -2938,8 +2938,8 @@ static int ci_populate_single_memory_level(struct radeon_device *rdev,
 
 	memory_level->MinVddc = cpu_to_be32(memory_level->MinVddc * VOLTAGE_SCALE);
 	memory_level->MinVddcPhases = cpu_to_be32(memory_level->MinVddcPhases);
-        memory_level->MinVddci = cpu_to_be32(memory_level->MinVddci * VOLTAGE_SCALE);
-        memory_level->MinMvdd = cpu_to_be32(memory_level->MinMvdd * VOLTAGE_SCALE);
+	memory_level->MinVddci = cpu_to_be32(memory_level->MinVddci * VOLTAGE_SCALE);
+	memory_level->MinMvdd = cpu_to_be32(memory_level->MinMvdd * VOLTAGE_SCALE);
 
 	memory_level->MclkFrequency = cpu_to_be32(memory_level->MclkFrequency);
 	memory_level->ActivityLevel = cpu_to_be16(memory_level->ActivityLevel);
@@ -3152,7 +3152,7 @@ static int ci_calculate_sclk_params(struct radeon_device *rdev,
 
 	spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK;
 	spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv);
-        spll_func_cntl_3 |= SPLL_DITHEN;
+	spll_func_cntl_3 |= SPLL_DITHEN;
 
 	if (pi->caps_sclk_ss_support) {
 		struct radeon_atom_ss ss;
@@ -3229,7 +3229,7 @@ static int ci_populate_single_graphic_level(struct radeon_device *rdev,
 	graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
 	graphic_level->Flags = cpu_to_be32(graphic_level->Flags);
-        graphic_level->MinVddc = cpu_to_be32(graphic_level->MinVddc * VOLTAGE_SCALE);
+	graphic_level->MinVddc = cpu_to_be32(graphic_level->MinVddc * VOLTAGE_SCALE);
 	graphic_level->MinVddcPhases = cpu_to_be32(graphic_level->MinVddcPhases);
 	graphic_level->SclkFrequency = cpu_to_be32(graphic_level->SclkFrequency);
 	graphic_level->ActivityLevel = cpu_to_be16(graphic_level->ActivityLevel);
@@ -4393,7 +4393,7 @@ static bool ci_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
 		break;
 	case MC_SEQ_CAS_TIMING >> 2:
 		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
-            break;
+		break;
 	case MC_SEQ_MISC_TIMING >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
 		break;
@@ -4625,7 +4625,7 @@ static int ci_initialize_mc_reg_table(struct radeon_device *rdev)
 	if (ret)
 		goto init_mc_done;
 
-        ret = ci_copy_vbios_mc_reg_table(table, ci_table);
+	ret = ci_copy_vbios_mc_reg_table(table, ci_table);
 	if (ret)
 		goto init_mc_done;
 
@@ -4916,7 +4916,7 @@ static int ci_set_private_data_variables_based_on_pptable(struct radeon_device *
 		allowed_mclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk;
 	rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddc =
 		allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v;
-        rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddci =
+	rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddci =
 		allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v;
 
 	return 0;
@@ -5517,7 +5517,7 @@ static int ci_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct ci_ps *ps;
@@ -5693,8 +5693,8 @@ int ci_dpm_init(struct radeon_device *rdev)
 		return ret;
 	}
 
-        pi->dll_default_on = false;
-        pi->sram_end = SMC_RAM_END;
+	pi->dll_default_on = false;
+	pi->sram_end = SMC_RAM_END;
 
 	pi->activity_target[0] = CISLAND_TARGETACTIVITY_DFLT;
 	pi->activity_target[1] = CISLAND_TARGETACTIVITY_DFLT;
@@ -5734,9 +5734,9 @@ int ci_dpm_init(struct radeon_device *rdev)
 	pi->caps_uvd_dpm = true;
 	pi->caps_vce_dpm = true;
 
-        ci_get_leakage_voltages(rdev);
-        ci_patch_dependency_tables_with_leakage(rdev);
-        ci_set_private_data_variables_based_on_pptable(rdev);
+	ci_get_leakage_voltages(rdev);
+	ci_patch_dependency_tables_with_leakage(rdev);
+	ci_set_private_data_variables_based_on_pptable(rdev);
 
 	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries =
 		kzalloc(4 * sizeof(struct radeon_clock_voltage_dependency_entry), GFP_KERNEL);
@@ -5839,7 +5839,7 @@ int ci_dpm_init(struct radeon_device *rdev)
 			pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2;
 		else
 			rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL;
-        }
+	}
 
 	if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_MVDDCONTROL) {
 		if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_GPIO_LUT))
@@ -5860,7 +5860,7 @@ int ci_dpm_init(struct radeon_device *rdev)
 #endif
 
 	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
-                                   &frev, &crev, &data_offset)) {
+				   &frev, &crev, &data_offset)) {
 		pi->caps_sclk_ss_support = true;
 		pi->caps_mclk_ss_support = true;
 		pi->dynamic_ss = true;
diff --git a/drivers/gpu/drm/radeon/ci_smc.c b/drivers/gpu/drm/radeon/ci_smc.c
index 35c6f648ba04..24760ee3063e 100644
--- a/drivers/gpu/drm/radeon/ci_smc.c
+++ b/drivers/gpu/drm/radeon/ci_smc.c
@@ -194,11 +194,11 @@ PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev)
 		return PPSMC_Result_OK;
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
-                tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
-                if ((tmp & CKEN) == 0)
+		tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
+		if ((tmp & CKEN) == 0)
 			break;
-                udelay(1);
-        }
+		udelay(1);
+	}
 
 	return PPSMC_Result_OK;
 }
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 06001400ce8b..8ac82df2efde 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -1712,7 +1712,7 @@ static void cik_init_golden_registers(struct radeon_device *rdev)
  */
 u32 cik_get_xclk(struct radeon_device *rdev)
 {
-        u32 reference_clock = rdev->clock.spll.reference_freq;
+	u32 reference_clock = rdev->clock.spll.reference_freq;
 
 	if (rdev->flags & RADEON_IS_IGP) {
 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
@@ -2343,9 +2343,13 @@ out:
  */
 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
 {
-	const u32 num_tile_mode_states = 32;
-	const u32 num_secondary_tile_mode_states = 16;
-	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
+	u32 *tile = rdev->config.cik.tile_mode_array;
+	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
+	const u32 num_tile_mode_states =
+			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
+	const u32 num_secondary_tile_mode_states =
+			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
+	u32 reg_offset, split_equal_to_row_size;
 	u32 num_pipe_configs;
 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
 		rdev->config.cik.max_shader_engines;
@@ -2367,1032 +2371,669 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
 	if (num_pipe_configs > 8)
 		num_pipe_configs = 16;
 
-	if (num_pipe_configs == 16) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 7:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 12:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 17:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 30:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else if (num_pipe_configs == 8) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 7:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 12:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 17:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 30:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else if (num_pipe_configs == 4) {
+	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+		tile[reg_offset] = 0;
+	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+		macrotile[reg_offset] = 0;
+
+	switch(num_pipe_configs) {
+	case 16:
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_2_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_2_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_2_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
+		break;
+
+	case 8:
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
+		break;
+
+	case 4:
 		if (num_rbs == 4) {
-			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-				switch (reg_offset) {
-				case 0:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-					break;
-				case 1:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-					break;
-				case 2:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-					break;
-				case 3:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-					break;
-				case 4:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(split_equal_to_row_size));
-					break;
-				case 5:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-					break;
-				case 6:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-					break;
-				case 7:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(split_equal_to_row_size));
-					break;
-				case 8:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
-					break;
-				case 9:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-					break;
-				case 10:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 11:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 12:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 13:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-					break;
-				case 14:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 16:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 17:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 27:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-					break;
-				case 28:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 29:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 30:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				default:
-					gb_tile_moden = 0;
-					break;
-				}
-				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-			}
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+
 		} else if (num_rbs < 4) {
-			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-				switch (reg_offset) {
-				case 0:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-					break;
-				case 1:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-					break;
-				case 2:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-					break;
-				case 3:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-					break;
-				case 4:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(split_equal_to_row_size));
-					break;
-				case 5:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-					break;
-				case 6:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-					break;
-				case 7:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(split_equal_to_row_size));
-					break;
-				case 8:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
-					break;
-				case 9:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-					break;
-				case 10:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 11:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 12:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 13:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-					break;
-				case 14:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 16:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 17:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 27:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-					break;
-				case 28:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 29:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 30:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				default:
-					gb_tile_moden = 0;
-					break;
-				}
-				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-			}
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else if (num_pipe_configs == 2) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 7:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 8:
-				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						PIPE_CONFIG(ADDR_SURF_P2);
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 12:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 17:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 30:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
 		}
-	} else
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
+		break;
+
+	case 2:
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P2);
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2));
+		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
+		break;
+
+	default:
 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
+	}
 }
 
 /**
@@ -9709,13 +9350,13 @@ uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
 	mutex_lock(&rdev->gpu_clock_mutex);
 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
-	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
 	mutex_unlock(&rdev->gpu_clock_mutex);
 	return clock;
 }
 
 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
-                              u32 cntl_reg, u32 status_reg)
+			     u32 cntl_reg, u32 status_reg)
 {
 	int r, i;
 	struct atom_clock_dividers dividers;
diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
index ca058589ddef..a4edd0702718 100644
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@@ -1620,14 +1620,14 @@ static int cypress_init_smc_table(struct radeon_device *rdev,
 	cypress_populate_smc_voltage_tables(rdev, table);
 
 	switch (rdev->pm.int_thermal_type) {
-        case THERMAL_TYPE_EVERGREEN:
-        case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
+	case THERMAL_TYPE_EVERGREEN:
+	case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_INTERNAL;
 		break;
-        case THERMAL_TYPE_NONE:
+	case THERMAL_TYPE_NONE:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_NONE;
 		break;
-        default:
+	default:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_EXTERNAL;
 		break;
 	}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 2ad462896896..76c4bdf21b20 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1140,7 +1140,7 @@ static int sumo_set_uvd_clock(struct radeon_device *rdev, u32 clock,
 	int r, i;
 	struct atom_clock_dividers dividers;
 
-        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
 					   clock, false, &dividers);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 46f87d4aaf31..9e93205eb9e4 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -1816,8 +1816,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (idx_value & 0xfffffff0) +
-		         ((u64)(tmp & 0xff) << 32);
+			 (idx_value & 0xfffffff0) +
+			 ((u64)(tmp & 0xff) << 32);
 
 		ib[idx + 0] = offset;
 		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
@@ -1862,8 +1862,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         idx_value +
-		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+			 idx_value +
+			 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
 
 		ib[idx+0] = offset;
 		ib[idx+1] = upper_32_bits(offset) & 0xff;
@@ -1897,8 +1897,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         idx_value +
-		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+			 idx_value +
+			 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
 
 		ib[idx+0] = offset;
 		ib[idx+1] = upper_32_bits(offset) & 0xff;
@@ -1925,8 +1925,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         radeon_get_ib_value(p, idx+1) +
-		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+			 radeon_get_ib_value(p, idx+1) +
+			 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 		ib[idx+1] = offset;
 		ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -2098,8 +2098,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			}
 
 			offset = reloc->gpu_offset +
-			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
-			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+				 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+				 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -2239,8 +2239,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				return -EINVAL;
 			}
 			offset = reloc->gpu_offset +
-			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
-			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+				 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
+				 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 			ib[idx+1] = offset & 0xfffffff8;
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -2261,8 +2261,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
-		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+			 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+			 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 		ib[idx+1] = offset & 0xfffffffc;
 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
@@ -2283,8 +2283,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
-		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+			 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+			 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 		ib[idx+1] = offset & 0xfffffffc;
 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 3cf04a2f44bb..f766c967a284 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -206,7 +206,7 @@ void evergreen_hdmi_write_sad_regs(struct drm_encoder *encoder,
  * build a AVI Info Frame
  */
 void evergreen_set_avi_packet(struct radeon_device *rdev, u32 offset,
-    unsigned char *buffer, size_t size)
+			      unsigned char *buffer, size_t size)
 {
 	uint8_t *frame = buffer + 3;
 
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c
index 2d71da448487..d0240743a17c 100644
--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -2640,7 +2640,7 @@ static int kv_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct kv_ps *ps;
@@ -2738,7 +2738,7 @@ int kv_dpm_init(struct radeon_device *rdev)
 	for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++)
 		pi->at[i] = TRINITY_AT_DFLT;
 
-        pi->sram_end = SMC_RAM_END;
+	pi->sram_end = SMC_RAM_END;
 
 	/* Enabling nb dpm on an asrock system prevents dpm from working */
 	if (rdev->pdev->subsystem_vendor == 0x1849)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 158872eb78e4..b88d63c9be99 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1257,7 +1257,7 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 		tmp = RREG32_CG(CG_CGTT_LOCAL_0);
 		tmp &= ~0x00380000;
 		WREG32_CG(CG_CGTT_LOCAL_0, tmp);
-                tmp = RREG32_CG(CG_CGTT_LOCAL_1);
+		tmp = RREG32_CG(CG_CGTT_LOCAL_1);
 		tmp &= ~0x0e000000;
 		WREG32_CG(CG_CGTT_LOCAL_1, tmp);
 	}
@@ -2634,7 +2634,7 @@ int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
 	struct atom_clock_dividers dividers;
 	int r, i;
 
-        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
 					   ecclk, false, &dividers);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index c3d531a1114b..4a601f990562 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -725,9 +725,9 @@ extern int ni_mc_load_microcode(struct radeon_device *rdev);
 
 struct ni_power_info *ni_get_pi(struct radeon_device *rdev)
 {
-        struct ni_power_info *pi = rdev->pm.dpm.priv;
+	struct ni_power_info *pi = rdev->pm.dpm.priv;
 
-        return pi;
+	return pi;
 }
 
 struct ni_ps *ni_get_ps(struct radeon_ps *rps)
@@ -1096,9 +1096,9 @@ static void ni_stop_smc(struct radeon_device *rdev)
 
 static int ni_process_firmware_header(struct radeon_device *rdev)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	u32 tmp;
 	int ret;
 
@@ -1202,14 +1202,14 @@ static int ni_enter_ulp_state(struct radeon_device *rdev)
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 
 	if (pi->gfx_clock_gating) {
-                WREG32_P(SCLK_PWRMGT_CNTL, 0, ~DYN_GFX_CLK_OFF_EN);
+		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~DYN_GFX_CLK_OFF_EN);
 		WREG32_P(SCLK_PWRMGT_CNTL, GFX_CLK_FORCE_ON, ~GFX_CLK_FORCE_ON);
-                WREG32_P(SCLK_PWRMGT_CNTL, 0, ~GFX_CLK_FORCE_ON);
+		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~GFX_CLK_FORCE_ON);
 		RREG32(GB_ADDR_CONFIG);
-        }
+	}
 
 	WREG32_P(SMC_MSG, HOST_SMC_MSG(PPSMC_MSG_SwitchToMinimumPower),
-                 ~HOST_SMC_MSG_MASK);
+		 ~HOST_SMC_MSG_MASK);
 
 	udelay(25000);
 
@@ -1321,12 +1321,12 @@ static void ni_populate_mvdd_value(struct radeon_device *rdev,
 				   u32 mclk,
 				   NISLANDS_SMC_VOLTAGE_VALUE *voltage)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 
 	if (!pi->mvdd_control) {
 		voltage->index = eg_pi->mvdd_high_index;
-                voltage->value = cpu_to_be16(MVDD_HIGH_VALUE);
+		voltage->value = cpu_to_be16(MVDD_HIGH_VALUE);
 		return;
 	}
 
@@ -1510,47 +1510,47 @@ int ni_copy_and_switch_arb_sets(struct radeon_device *rdev,
 	u32 mc_cg_config;
 
 	switch (arb_freq_src) {
-        case MC_CG_ARB_FREQ_F0:
+	case MC_CG_ARB_FREQ_F0:
 		mc_arb_dram_timing  = RREG32(MC_ARB_DRAM_TIMING);
 		mc_arb_dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2);
 		burst_time = (RREG32(MC_ARB_BURST_TIME) & STATE0_MASK) >> STATE0_SHIFT;
 		break;
-        case MC_CG_ARB_FREQ_F1:
+	case MC_CG_ARB_FREQ_F1:
 		mc_arb_dram_timing  = RREG32(MC_ARB_DRAM_TIMING_1);
 		mc_arb_dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2_1);
 		burst_time = (RREG32(MC_ARB_BURST_TIME) & STATE1_MASK) >> STATE1_SHIFT;
 		break;
-        case MC_CG_ARB_FREQ_F2:
+	case MC_CG_ARB_FREQ_F2:
 		mc_arb_dram_timing  = RREG32(MC_ARB_DRAM_TIMING_2);
 		mc_arb_dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2_2);
 		burst_time = (RREG32(MC_ARB_BURST_TIME) & STATE2_MASK) >> STATE2_SHIFT;
 		break;
-        case MC_CG_ARB_FREQ_F3:
+	case MC_CG_ARB_FREQ_F3:
 		mc_arb_dram_timing  = RREG32(MC_ARB_DRAM_TIMING_3);
 		mc_arb_dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2_3);
 		burst_time = (RREG32(MC_ARB_BURST_TIME) & STATE3_MASK) >> STATE3_SHIFT;
 		break;
-        default:
+	default:
 		return -EINVAL;
 	}
 
 	switch (arb_freq_dest) {
-        case MC_CG_ARB_FREQ_F0:
+	case MC_CG_ARB_FREQ_F0:
 		WREG32(MC_ARB_DRAM_TIMING, mc_arb_dram_timing);
 		WREG32(MC_ARB_DRAM_TIMING2, mc_arb_dram_timing2);
 		WREG32_P(MC_ARB_BURST_TIME, STATE0(burst_time), ~STATE0_MASK);
 		break;
-        case MC_CG_ARB_FREQ_F1:
+	case MC_CG_ARB_FREQ_F1:
 		WREG32(MC_ARB_DRAM_TIMING_1, mc_arb_dram_timing);
 		WREG32(MC_ARB_DRAM_TIMING2_1, mc_arb_dram_timing2);
 		WREG32_P(MC_ARB_BURST_TIME, STATE1(burst_time), ~STATE1_MASK);
 		break;
-        case MC_CG_ARB_FREQ_F2:
+	case MC_CG_ARB_FREQ_F2:
 		WREG32(MC_ARB_DRAM_TIMING_2, mc_arb_dram_timing);
 		WREG32(MC_ARB_DRAM_TIMING2_2, mc_arb_dram_timing2);
 		WREG32_P(MC_ARB_BURST_TIME, STATE2(burst_time), ~STATE2_MASK);
 		break;
-        case MC_CG_ARB_FREQ_F3:
+	case MC_CG_ARB_FREQ_F3:
 		WREG32(MC_ARB_DRAM_TIMING_3, mc_arb_dram_timing);
 		WREG32(MC_ARB_DRAM_TIMING2_3, mc_arb_dram_timing2);
 		WREG32_P(MC_ARB_BURST_TIME, STATE3(burst_time), ~STATE3_MASK);
@@ -1621,9 +1621,7 @@ static int ni_populate_memory_timing_parameters(struct radeon_device *rdev,
 		(u8)rv770_calculate_memory_refresh_rate(rdev, pl->sclk);
 
 
-	radeon_atom_set_engine_dram_timings(rdev,
-                                            pl->sclk,
-                                            pl->mclk);
+	radeon_atom_set_engine_dram_timings(rdev, pl->sclk, pl->mclk);
 
 	dram_timing = RREG32(MC_ARB_DRAM_TIMING);
 	dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2);
@@ -1867,9 +1865,9 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 
 	mpll_ad_func_cntl_2 |= BIAS_GEN_PDNB | RESET_EN;
 
-        if (pi->mem_gddr5)
-                mpll_dq_func_cntl &= ~PDNB;
-        mpll_dq_func_cntl_2 |= BIAS_GEN_PDNB | RESET_EN | BYPASS;
+	if (pi->mem_gddr5)
+		mpll_dq_func_cntl &= ~PDNB;
+	mpll_dq_func_cntl_2 |= BIAS_GEN_PDNB | RESET_EN | BYPASS;
 
 
 	mclk_pwrmgt_cntl |= (MRDCKA0_RESET |
@@ -1891,15 +1889,15 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 			      MRDCKD1_PDNB);
 
 	dll_cntl |= (MRDCKA0_BYPASS |
-                     MRDCKA1_BYPASS |
-                     MRDCKB0_BYPASS |
-                     MRDCKB1_BYPASS |
-                     MRDCKC0_BYPASS |
-                     MRDCKC1_BYPASS |
-                     MRDCKD0_BYPASS |
-                     MRDCKD1_BYPASS);
-
-        spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
+		     MRDCKA1_BYPASS |
+		     MRDCKB0_BYPASS |
+		     MRDCKB1_BYPASS |
+		     MRDCKC0_BYPASS |
+		     MRDCKC1_BYPASS |
+		     MRDCKD0_BYPASS |
+		     MRDCKD1_BYPASS);
+
+	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
 
 	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
@@ -2089,7 +2087,7 @@ static int ni_populate_sclk_value(struct radeon_device *rdev,
 
 static int ni_init_smc_spll_table(struct radeon_device *rdev)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	SMC_NISLANDS_SPLL_DIV_TABLE *spll_table;
 	NISLANDS_SMC_SCLK_VALUE sclk_params;
@@ -2311,8 +2309,8 @@ static int ni_convert_power_level_to_smc(struct radeon_device *rdev,
 					 NISLANDS_SMC_HW_PERFORMANCE_LEVEL *level)
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	int ret;
 	bool dll_state_on;
 	u16 std_vddc;
@@ -2391,8 +2389,8 @@ static int ni_populate_smc_t(struct radeon_device *rdev,
 			     struct radeon_ps *radeon_state,
 			     NISLANDS_SMC_SWSTATE *smc_state)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct ni_ps *state = ni_get_ps(radeon_state);
 	u32 a_t;
 	u32 t_l, t_h;
@@ -2451,8 +2449,8 @@ static int ni_populate_power_containment_values(struct radeon_device *rdev,
 						struct radeon_ps *radeon_state,
 						NISLANDS_SMC_SWSTATE *smc_state)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	struct ni_ps *state = ni_get_ps(radeon_state);
 	u32 prev_sclk;
@@ -2595,7 +2593,7 @@ static int ni_enable_power_containment(struct radeon_device *rdev,
 				       struct radeon_ps *radeon_new_state,
 				       bool enable)
 {
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	PPSMC_Result smc_result;
 	int ret = 0;
 
@@ -2625,7 +2623,7 @@ static int ni_convert_power_state_to_smc(struct radeon_device *rdev,
 					 struct radeon_ps *radeon_state,
 					 NISLANDS_SMC_SWSTATE *smc_state)
 {
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	struct ni_ps *state = ni_get_ps(radeon_state);
 	int i, ret;
@@ -2770,46 +2768,46 @@ static bool ni_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
 	bool result = true;
 
 	switch (in_reg) {
-        case  MC_SEQ_RAS_TIMING >> 2:
+	case  MC_SEQ_RAS_TIMING >> 2:
 		*out_reg = MC_SEQ_RAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_CAS_TIMING >> 2:
+	case MC_SEQ_CAS_TIMING >> 2:
 		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING >> 2:
+	case MC_SEQ_MISC_TIMING >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING2 >> 2:
+	case MC_SEQ_MISC_TIMING2 >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING2_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D0 >> 2:
+	case MC_SEQ_RD_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D1 >> 2:
+	case MC_SEQ_RD_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D1_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D0 >> 2:
+	case MC_SEQ_WR_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D1 >> 2:
+	case MC_SEQ_WR_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D1_LP >> 2;
 		break;
-        case MC_PMG_CMD_EMRS >> 2:
+	case MC_PMG_CMD_EMRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS >> 2:
+	case MC_PMG_CMD_MRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS1 >> 2:
+	case MC_PMG_CMD_MRS1 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
 		break;
-        case MC_SEQ_PMG_TIMING >> 2:
+	case MC_SEQ_PMG_TIMING >> 2:
 		*out_reg = MC_SEQ_PMG_TIMING_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS2 >> 2:
+	case MC_PMG_CMD_MRS2 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS2_LP >> 2;
 		break;
-        default:
+	default:
 		result = false;
 		break;
 	}
@@ -2876,9 +2874,9 @@ static int ni_initialize_mc_reg_table(struct radeon_device *rdev)
 	struct ni_mc_reg_table *ni_table = &ni_pi->mc_reg_table;
 	u8 module_index = rv770_get_memory_module_index(rdev);
 
-        table = kzalloc(sizeof(struct atom_mc_reg_table), GFP_KERNEL);
-        if (!table)
-                return -ENOMEM;
+	table = kzalloc(sizeof(struct atom_mc_reg_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
 
 	WREG32(MC_SEQ_RAS_TIMING_LP, RREG32(MC_SEQ_RAS_TIMING));
 	WREG32(MC_SEQ_CAS_TIMING_LP, RREG32(MC_SEQ_CAS_TIMING));
@@ -2896,25 +2894,25 @@ static int ni_initialize_mc_reg_table(struct radeon_device *rdev)
 
 	ret = radeon_atom_init_mc_reg_table(rdev, module_index, table);
 
-        if (ret)
-                goto init_mc_done;
+	if (ret)
+		goto init_mc_done;
 
 	ret = ni_copy_vbios_mc_reg_table(table, ni_table);
 
-        if (ret)
-                goto init_mc_done;
+	if (ret)
+		goto init_mc_done;
 
 	ni_set_s0_mc_reg_index(ni_table);
 
 	ret = ni_set_mc_special_registers(rdev, ni_table);
 
-        if (ret)
-                goto init_mc_done;
+	if (ret)
+		goto init_mc_done;
 
 	ni_set_valid_flag(ni_table);
 
 init_mc_done:
-        kfree(table);
+	kfree(table);
 
 	return ret;
 }
@@ -2994,7 +2992,7 @@ static int ni_populate_mc_reg_table(struct radeon_device *rdev,
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	struct ni_ps *boot_state = ni_get_ps(radeon_boot_state);
 	SMC_NIslands_MCRegisters *mc_reg_table = &ni_pi->smc_mc_reg_table;
 
@@ -3025,7 +3023,7 @@ static int ni_upload_mc_reg_table(struct radeon_device *rdev,
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	struct ni_ps *ni_new_state = ni_get_ps(radeon_new_state);
 	SMC_NIslands_MCRegisters *mc_reg_table = &ni_pi->smc_mc_reg_table;
 	u16 address;
@@ -3142,7 +3140,7 @@ static int ni_initialize_smc_cac_tables(struct radeon_device *rdev)
 	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	PP_NIslands_CACTABLES *cac_tables = NULL;
 	int i, ret;
-        u32 reg;
+	u32 reg;
 
 	if (ni_pi->enable_cac == false)
 		return 0;
@@ -3422,13 +3420,13 @@ static int ni_pcie_performance_request(struct radeon_device *rdev,
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 
 	if ((perf_req == PCIE_PERF_REQ_PECI_GEN1) ||
-            (perf_req == PCIE_PERF_REQ_PECI_GEN2)) {
+	    (perf_req == PCIE_PERF_REQ_PECI_GEN2)) {
 		if (eg_pi->pcie_performance_request_registered == false)
 			radeon_acpi_pcie_notify_device_ready(rdev);
 		eg_pi->pcie_performance_request_registered = true;
 		return radeon_acpi_pcie_performance_request(rdev, perf_req, advertise);
 	} else if ((perf_req == PCIE_PERF_REQ_REMOVE_REGISTRY) &&
-                   eg_pi->pcie_performance_request_registered) {
+		    eg_pi->pcie_performance_request_registered) {
 		eg_pi->pcie_performance_request_registered = false;
 		return radeon_acpi_pcie_performance_request(rdev, perf_req, advertise);
 	}
@@ -3441,12 +3439,12 @@ static int ni_advertise_gen2_capability(struct radeon_device *rdev)
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	u32 tmp;
 
-        tmp = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+	tmp = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 
-        if ((tmp & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
-            (tmp & LC_OTHER_SIDE_SUPPORTS_GEN2))
-                pi->pcie_gen2 = true;
-        else
+	if ((tmp & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
+	    (tmp & LC_OTHER_SIDE_SUPPORTS_GEN2))
+		pi->pcie_gen2 = true;
+	else
 		pi->pcie_gen2 = false;
 
 	if (!pi->pcie_gen2)
@@ -3458,8 +3456,8 @@ static int ni_advertise_gen2_capability(struct radeon_device *rdev)
 static void ni_enable_bif_dynamic_pcie_gen2(struct radeon_device *rdev,
 					    bool enable)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        u32 tmp, bif;
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	u32 tmp, bif;
 
 	tmp = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 
@@ -3502,7 +3500,7 @@ static void ni_enable_dynamic_pcie_gen2(struct radeon_device *rdev,
 	if (enable)
 		WREG32_P(GENERAL_PWRMGT, ENABLE_GEN2PCIE, ~ENABLE_GEN2PCIE);
 	else
-                WREG32_P(GENERAL_PWRMGT, 0, ~ENABLE_GEN2PCIE);
+		WREG32_P(GENERAL_PWRMGT, 0, ~ENABLE_GEN2PCIE);
 }
 
 void ni_set_uvd_clock_before_set_eng_clock(struct radeon_device *rdev,
@@ -3563,7 +3561,7 @@ void ni_update_current_ps(struct radeon_device *rdev,
 {
 	struct ni_ps *new_ps = ni_get_ps(rps);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 
 	eg_pi->current_rps = *rps;
 	ni_pi->current_ps = *new_ps;
@@ -3575,7 +3573,7 @@ void ni_update_requested_ps(struct radeon_device *rdev,
 {
 	struct ni_ps *new_ps = ni_get_ps(rps);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 
 	eg_pi->requested_rps = *rps;
 	ni_pi->requested_ps = *new_ps;
@@ -3591,8 +3589,8 @@ int ni_dpm_enable(struct radeon_device *rdev)
 
 	if (pi->gfx_clock_gating)
 		ni_cg_clockgating_default(rdev);
-        if (btc_dpm_enabled(rdev))
-                return -EINVAL;
+	if (btc_dpm_enabled(rdev))
+		return -EINVAL;
 	if (pi->mg_clock_gating)
 		ni_mg_clockgating_default(rdev);
 	if (eg_pi->ls_clock_gating)
@@ -3991,7 +3989,7 @@ static int ni_parse_power_table(struct radeon_device *rdev)
 	union pplib_clock_info *clock_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	struct ni_ps *ps;
 
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index ed121042247f..f86ab695ee8f 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -235,8 +235,8 @@ int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 		fb_div |= 1;
 
 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
-        if (r)
-                return r;
+	if (r)
+		return r;
 
 	/* assert PLL_RESET */
 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
@@ -1490,7 +1490,7 @@ static int r600_mc_init(struct radeon_device *rdev)
 					rdev->fastfb_working = true;
 				}
 			}
-  		}
+		}
 	}
 
 	radeon_update_bandwidth_info(rdev);
@@ -4574,7 +4574,7 @@ uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev)
 	mutex_lock(&rdev->gpu_clock_mutex);
 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
-	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
 	mutex_unlock(&rdev->gpu_clock_mutex);
 	return clock;
 }
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 2f36fa1576e0..b69c8de35bd3 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -1671,8 +1671,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (idx_value & 0xfffffff0) +
-		         ((u64)(tmp & 0xff) << 32);
+			 (idx_value & 0xfffffff0) +
+			 ((u64)(tmp & 0xff) << 32);
 
 		ib[idx + 0] = offset;
 		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
@@ -1712,8 +1712,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         idx_value +
-		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+			 idx_value +
+			 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
 
 		ib[idx+0] = offset;
 		ib[idx+1] = upper_32_bits(offset) & 0xff;
@@ -1764,8 +1764,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 			}
 
 			offset = reloc->gpu_offset +
-			         (radeon_get_ib_value(p, idx+1) & 0xfffffff0) +
-			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+				 (radeon_get_ib_value(p, idx+1) & 0xfffffff0) +
+				 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffff0);
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -1876,8 +1876,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 				return -EINVAL;
 			}
 			offset = reloc->gpu_offset +
-			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
-			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+				 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
+				 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 			ib[idx+1] = offset & 0xfffffff8;
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -1898,8 +1898,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
-		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+			 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+			 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 		ib[idx+1] = offset & 0xfffffffc;
 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index fa2154493cf1..6a4b020dd0b4 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -844,7 +844,7 @@ int r600_get_platform_caps(struct radeon_device *rdev)
 	struct radeon_mode_info *mode_info = &rdev->mode_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 
 	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
@@ -874,7 +874,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 	union fan_info *fan_info;
 	ATOM_PPLIB_Clock_Voltage_Dependency_Table *dep_table;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	int ret, i;
 
@@ -1070,7 +1070,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 			ext_hdr->usVCETableOffset) {
 			VCEClockInfoArray *array = (VCEClockInfoArray *)
 				(mode_info->atom_context->bios + data_offset +
-                                 le16_to_cpu(ext_hdr->usVCETableOffset) + 1);
+				 le16_to_cpu(ext_hdr->usVCETableOffset) + 1);
 			ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table *limits =
 				(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table *)
 				(mode_info->atom_context->bios + data_offset +
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index e85894ade95c..e82a99cb2459 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c
@@ -215,7 +215,7 @@ void r600_hdmi_update_acr(struct drm_encoder *encoder, long offset,
  * build a HDMI Video Info Frame
  */
 void r600_set_avi_packet(struct radeon_device *rdev, u32 offset,
-    unsigned char *buffer, size_t size)
+			 unsigned char *buffer, size_t size)
 {
 	uint8_t *frame = buffer + 3;
 
@@ -312,7 +312,7 @@ void r600_hdmi_audio_workaround(struct drm_encoder *encoder)
 }
 
 void r600_hdmi_audio_set_dto(struct radeon_device *rdev,
-    struct radeon_crtc *crtc, unsigned int clock)
+			     struct radeon_crtc *crtc, unsigned int clock)
 {
 	struct radeon_encoder *radeon_encoder;
 	struct radeon_encoder_atom_dig *dig;
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index de9a2ffcf5f7..f8097a0e7a79 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2095,7 +2095,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 	struct radeon_i2c_bus_rec i2c_bus;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 
 	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
@@ -2575,7 +2575,7 @@ static int radeon_atombios_parse_power_table_4_5(struct radeon_device *rdev)
 	bool valid;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 
 	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
@@ -2666,7 +2666,7 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
 	bool valid;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 56482e35d43e..fd8c4d317e60 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -62,6 +62,10 @@ bool radeon_has_atpx(void) {
 	return radeon_atpx_priv.atpx_detected;
 }
 
+bool radeon_has_atpx_dgpu_power_cntl(void) {
+	return radeon_atpx_priv.atpx.functions.power_cntl;
+}
+
 /**
  * radeon_atpx_call - call an ATPX method
  *
@@ -141,10 +145,6 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas
  */
 static int radeon_atpx_validate(struct radeon_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	atpx->functions.power_cntl = true;
-
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index a9b01bcf7d0a..432480ff9d22 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -34,7 +34,6 @@
 #include <asm/machdep.h>
 #include <asm/pmac_feature.h>
 #include <asm/prom.h>
-#include <asm/pci-bridge.h>
 #endif /* CONFIG_PPC_PMAC */
 
 /* from radeon_legacy_encoder.c */
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 4197ca1bb1e4..4fd1a961012d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -103,6 +103,12 @@ static const char radeon_family_name[][16] = {
 	"LAST",
 };
 
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool radeon_has_atpx_dgpu_power_cntl(void);
+#else
+static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
+#endif
+
 #define RADEON_PX_QUIRK_DISABLE_PX  (1 << 0)
 #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1)
 
@@ -1155,9 +1161,9 @@ static void radeon_check_arguments(struct radeon_device *rdev)
 		radeon_vm_size = 4;
 	}
 
-       /*
-        * Max GPUVM size for Cayman, SI and CI are 40 bits.
-        */
+	/*
+	 * Max GPUVM size for Cayman, SI and CI are 40 bits.
+	 */
 	if (radeon_vm_size > 1024) {
 		dev_warn(rdev->dev, "VM size (%d) too large, max is 1TB\n",
 			 radeon_vm_size);
@@ -1433,7 +1439,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
 
-	if (rdev->flags & RADEON_IS_PX)
+	if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl())
 		runtime = true;
 	vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime);
 	if (runtime)
@@ -1895,7 +1901,7 @@ int radeon_debugfs_add_files(struct radeon_device *rdev,
 	if (i > RADEON_DEBUGFS_MAX_COMPONENTS) {
 		DRM_ERROR("Reached maximum number of debugfs components.\n");
 		DRM_ERROR("Report so we increase "
-		          "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
+			  "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
 		return -EINVAL;
 	}
 	rdev->debugfs[rdev->debugfs_count].files = files;
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index e29096b2fa6b..fcc7483d3f7b 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -407,7 +407,7 @@ static void radeon_flip_work_func(struct work_struct *__work)
 	unsigned repcnt = 4;
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
-        down_read(&rdev->exclusive_lock);
+	down_read(&rdev->exclusive_lock);
 	if (work->fence) {
 		struct radeon_fence *fence;
 
@@ -919,7 +919,7 @@ static void avivo_reduce_ratio(unsigned *nom, unsigned *den,
 	*den /= tmp;
 
 	/* make sure nominator is large enough */
-        if (*nom < nom_min) {
+	if (*nom < nom_min) {
 		tmp = DIV_ROUND_UP(nom_min, *nom);
 		*nom *= tmp;
 		*den *= tmp;
@@ -959,7 +959,7 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div,
 	*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
 
 	/* limit fb divider to its maximum */
-        if (*fb_div > fb_div_max) {
+	if (*fb_div > fb_div_max) {
 		*ref_div = DIV_ROUND_CLOSEST(*ref_div * fb_div_max, *fb_div);
 		*fb_div = fb_div_max;
 	}
@@ -1683,10 +1683,8 @@ int radeon_modeset_init(struct radeon_device *rdev)
 	/* setup afmt */
 	radeon_afmt_init(rdev);
 
-	if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
-		radeon_fbdev_init(rdev);
-		drm_kms_helper_poll_init(rdev->ddev);
-	}
+	radeon_fbdev_init(rdev);
+	drm_kms_helper_poll_init(rdev->ddev);
 
 	/* do pm late init */
 	ret = radeon_pm_late_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index df7a1719c841..43cffb526b0c 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c
@@ -510,6 +510,7 @@ static bool radeon_mst_mode_fixup(struct drm_encoder *encoder,
 {
 	struct radeon_encoder_mst *mst_enc;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_connector_atom_dig *dig_connector;
 	int bpp = 24;
 
 	mst_enc = radeon_encoder->enc_priv;
@@ -523,22 +524,11 @@ static bool radeon_mst_mode_fixup(struct drm_encoder *encoder,
 
 
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
-	{
-	  struct radeon_connector_atom_dig *dig_connector;
-	  int ret;
-
-	  dig_connector = mst_enc->connector->con_priv;
-	  ret = radeon_dp_get_dp_link_config(&mst_enc->connector->base,
-					     dig_connector->dpcd, adjusted_mode->clock,
-					     &dig_connector->dp_lane_count,
-					     &dig_connector->dp_clock);
-	  if (ret) {
-		  dig_connector->dp_lane_count = 0;
-		  dig_connector->dp_clock = 0;
-	  }
-	  DRM_DEBUG_KMS("dig clock %p %d %d\n", dig_connector,
-			dig_connector->dp_lane_count, dig_connector->dp_clock);
-	}
+	dig_connector = mst_enc->connector->con_priv;
+	dig_connector->dp_lane_count = drm_dp_max_lane_count(dig_connector->dpcd);
+	dig_connector->dp_clock = drm_dp_max_link_rate(dig_connector->dpcd);
+	DRM_DEBUG_KMS("dig clock %p %d %d\n", dig_connector,
+		      dig_connector->dp_lane_count, dig_connector->dp_clock);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index d2e628eea53d..0e3143acb565 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -38,9 +38,9 @@
 #include <linux/vga_switcheroo.h>
 
 /* object hierarchy -
-   this contains a helper + a radeon fb
-   the helper contains a pointer to radeon framebuffer baseclass.
-*/
+ * this contains a helper + a radeon fb
+ * the helper contains a pointer to radeon framebuffer baseclass.
+ */
 struct radeon_fbdev {
 	struct drm_fb_helper helper;
 	struct radeon_framebuffer rfb;
@@ -292,7 +292,8 @@ out_unref:
 
 void radeon_fb_output_poll_changed(struct radeon_device *rdev)
 {
-	drm_fb_helper_hotplug_event(&rdev->mode_info.rfbdev->helper);
+	if (rdev->mode_info.rfbdev)
+		drm_fb_helper_hotplug_event(&rdev->mode_info.rfbdev->helper);
 }
 
 static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfbdev)
@@ -325,6 +326,10 @@ int radeon_fbdev_init(struct radeon_device *rdev)
 	int bpp_sel = 32;
 	int ret;
 
+	/* don't enable fbdev if no connectors */
+	if (list_empty(&rdev->ddev->mode_config.connector_list))
+		return 0;
+
 	/* select 8 bpp console on RN50 or 16MB cards */
 	if (ASIC_IS_RN50(rdev) || rdev->mc.real_vram_size <= (32*1024*1024))
 		bpp_sel = 8;
@@ -377,11 +382,15 @@ void radeon_fbdev_fini(struct radeon_device *rdev)
 
 void radeon_fbdev_set_suspend(struct radeon_device *rdev, int state)
 {
-	fb_set_suspend(rdev->mode_info.rfbdev->helper.fbdev, state);
+	if (rdev->mode_info.rfbdev)
+		fb_set_suspend(rdev->mode_info.rfbdev->helper.fbdev, state);
 }
 
 bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj)
 {
+	if (!rdev->mode_info.rfbdev)
+		return false;
+
 	if (robj == gem_to_radeon_bo(rdev->mode_info.rfbdev->rfb.obj))
 		return true;
 	return false;
@@ -389,12 +398,14 @@ bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj)
 
 void radeon_fb_add_connector(struct radeon_device *rdev, struct drm_connector *connector)
 {
-	drm_fb_helper_add_one_connector(&rdev->mode_info.rfbdev->helper, connector);
+	if (rdev->mode_info.rfbdev)
+		drm_fb_helper_add_one_connector(&rdev->mode_info.rfbdev->helper, connector);
 }
 
 void radeon_fb_remove_connector(struct radeon_device *rdev, struct drm_connector *connector)
 {
-	drm_fb_helper_remove_one_connector(&rdev->mode_info.rfbdev->helper, connector);
+	if (rdev->mode_info.rfbdev)
+		drm_fb_helper_remove_one_connector(&rdev->mode_info.rfbdev->helper, connector);
 }
 
 void radeon_fbdev_restore_mode(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index c39ce1f05703..92ce0e533bc0 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -274,7 +274,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
 			if (i == RADEON_RING_TYPE_GFX_INDEX) {
 				/* oh, oh, that's really bad */
 				DRM_ERROR("radeon: failed testing IB on GFX ring (%d).\n", r);
-		                rdev->accel_working = false;
+				rdev->accel_working = false;
 				return r;
 
 			} else {
@@ -304,7 +304,7 @@ static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
 }
 
 static struct drm_info_list radeon_debugfs_sa_list[] = {
-        {"radeon_sa_info", &radeon_debugfs_sa_info, 0, NULL},
+	{"radeon_sa_info", &radeon_debugfs_sa_info, 0, NULL},
 };
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 979f3bf65f2c..1e9304d1c88f 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -291,6 +291,8 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
 	if (r) {
 		return r;
 	}
+	rdev->ddev->vblank_disable_allowed = true;
+
 	/* enable msi */
 	rdev->msi_enabled = 0;
 
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 32b338ff436b..478d4099b0d0 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -331,13 +331,15 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode)
 									 RADEON_CRTC_DISP_REQ_EN_B));
 			WREG32_P(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl, ~(mask | crtc_ext_cntl));
 		}
-		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
+		if (dev->num_crtcs > radeon_crtc->crtc_id)
+			drm_vblank_on(dev, radeon_crtc->crtc_id);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
+		if (dev->num_crtcs > radeon_crtc->crtc_id)
+			drm_vblank_off(dev, radeon_crtc->crtc_id);
 		if (radeon_crtc->crtc_id)
 			WREG32_P(RADEON_CRTC2_GEN_CNTL, mask, ~(RADEON_CRTC2_EN | mask));
 		else {
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 88dc973fb209..868c3ba2efaa 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -818,52 +818,52 @@ static void radeon_legacy_tmds_int_mode_set(struct drm_encoder *encoder,
 	tmds_transmitter_cntl = RREG32(RADEON_TMDS_TRANSMITTER_CNTL) &
 		~(RADEON_TMDS_TRANSMITTER_PLLRST);
 
-    if (rdev->family == CHIP_R200 ||
-	rdev->family == CHIP_R100 ||
-	ASIC_IS_R300(rdev))
-	    tmds_transmitter_cntl &= ~(RADEON_TMDS_TRANSMITTER_PLLEN);
-    else /* RV chips got this bit reversed */
-	    tmds_transmitter_cntl |= RADEON_TMDS_TRANSMITTER_PLLEN;
-
-    fp_gen_cntl = (RREG32(RADEON_FP_GEN_CNTL) |
-		   (RADEON_FP_CRTC_DONT_SHADOW_VPAR |
-		    RADEON_FP_CRTC_DONT_SHADOW_HEND));
-
-    fp_gen_cntl &= ~(RADEON_FP_FPON | RADEON_FP_TMDS_EN);
-
-    fp_gen_cntl &= ~(RADEON_FP_RMX_HVSYNC_CONTROL_EN |
-		     RADEON_FP_DFP_SYNC_SEL |
-		     RADEON_FP_CRT_SYNC_SEL |
-		     RADEON_FP_CRTC_LOCK_8DOT |
-		     RADEON_FP_USE_SHADOW_EN |
-		     RADEON_FP_CRTC_USE_SHADOW_VEND |
-		     RADEON_FP_CRT_SYNC_ALT);
-
-    if (1) /*  FIXME rgbBits == 8 */
-	    fp_gen_cntl |= RADEON_FP_PANEL_FORMAT;  /* 24 bit format */
-    else
-	    fp_gen_cntl &= ~RADEON_FP_PANEL_FORMAT;/* 18 bit format */
-
-    if (radeon_crtc->crtc_id == 0) {
-	    if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
-		    fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
-		    if (radeon_encoder->rmx_type != RMX_OFF)
-			    fp_gen_cntl |= R200_FP_SOURCE_SEL_RMX;
-		    else
-			    fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC1;
-	    } else
-		    fp_gen_cntl &= ~RADEON_FP_SEL_CRTC2;
-    } else {
-	    if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
-		    fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
-		    fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC2;
-	    } else
-		    fp_gen_cntl |= RADEON_FP_SEL_CRTC2;
-    }
-
-    WREG32(RADEON_TMDS_PLL_CNTL, tmds_pll_cntl);
-    WREG32(RADEON_TMDS_TRANSMITTER_CNTL, tmds_transmitter_cntl);
-    WREG32(RADEON_FP_GEN_CNTL, fp_gen_cntl);
+	if (rdev->family == CHIP_R200 ||
+	    rdev->family == CHIP_R100 ||
+	    ASIC_IS_R300(rdev))
+		tmds_transmitter_cntl &= ~(RADEON_TMDS_TRANSMITTER_PLLEN);
+	else /* RV chips got this bit reversed */
+		tmds_transmitter_cntl |= RADEON_TMDS_TRANSMITTER_PLLEN;
+
+	fp_gen_cntl = (RREG32(RADEON_FP_GEN_CNTL) |
+		      (RADEON_FP_CRTC_DONT_SHADOW_VPAR |
+		       RADEON_FP_CRTC_DONT_SHADOW_HEND));
+
+	fp_gen_cntl &= ~(RADEON_FP_FPON | RADEON_FP_TMDS_EN);
+
+	fp_gen_cntl &= ~(RADEON_FP_RMX_HVSYNC_CONTROL_EN |
+			 RADEON_FP_DFP_SYNC_SEL |
+			 RADEON_FP_CRT_SYNC_SEL |
+			 RADEON_FP_CRTC_LOCK_8DOT |
+			 RADEON_FP_USE_SHADOW_EN |
+			 RADEON_FP_CRTC_USE_SHADOW_VEND |
+			 RADEON_FP_CRT_SYNC_ALT);
+
+	if (1) /*  FIXME rgbBits == 8 */
+		fp_gen_cntl |= RADEON_FP_PANEL_FORMAT;  /* 24 bit format */
+	else
+		fp_gen_cntl &= ~RADEON_FP_PANEL_FORMAT;/* 18 bit format */
+
+	if (radeon_crtc->crtc_id == 0) {
+		if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
+			fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
+			if (radeon_encoder->rmx_type != RMX_OFF)
+				fp_gen_cntl |= R200_FP_SOURCE_SEL_RMX;
+			else
+				fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC1;
+		} else
+			fp_gen_cntl &= ~RADEON_FP_SEL_CRTC2;
+	} else {
+		if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
+			fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
+			fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC2;
+		} else
+			fp_gen_cntl |= RADEON_FP_SEL_CRTC2;
+	}
+
+	WREG32(RADEON_TMDS_PLL_CNTL, tmds_pll_cntl);
+	WREG32(RADEON_TMDS_TRANSMITTER_CNTL, tmds_transmitter_cntl);
+	WREG32(RADEON_FP_GEN_CNTL, fp_gen_cntl);
 
 	if (rdev->is_atom_bios)
 		radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index fb6ad143873f..2d901bf28a94 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -214,8 +214,8 @@ int radeon_bo_create(struct radeon_device *rdev,
 	INIT_LIST_HEAD(&bo->list);
 	INIT_LIST_HEAD(&bo->va);
 	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
-	                               RADEON_GEM_DOMAIN_GTT |
-	                               RADEON_GEM_DOMAIN_CPU);
+				       RADEON_GEM_DOMAIN_GTT |
+				       RADEON_GEM_DOMAIN_CPU);
 
 	bo->flags = flags;
 	/* PCI GART is always snooped */
@@ -799,6 +799,10 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	if ((offset + size) <= rdev->mc.visible_vram_size)
 		return 0;
 
+	/* Can't move a pinned BO to visible VRAM */
+	if (rbo->pin_count > 0)
+		return -EINVAL;
+
 	/* hurrah the memory is not visible ! */
 	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
 	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
@@ -848,7 +852,7 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
  *
  */
 void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
-                     bool shared)
+		     bool shared)
 {
 	struct reservation_object *resv = bo->tbo.resv;
 
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 7a98823bacd1..38226d925a5b 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -79,7 +79,7 @@ void radeon_pm_acpi_event_handler(struct radeon_device *rdev)
 				radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power);
 		}
 		mutex_unlock(&rdev->pm.mutex);
-        } else if (rdev->pm.pm_method == PM_METHOD_PROFILE) {
+	} else if (rdev->pm.pm_method == PM_METHOD_PROFILE) {
 		if (rdev->pm.profile == PM_PROFILE_AUTO) {
 			mutex_lock(&rdev->pm.mutex);
 			radeon_pm_update_profile(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index e6ad54cdfa62..b0eb28e8fb73 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -56,7 +56,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
 }
 
 bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx,
-			          struct radeon_semaphore *semaphore)
+				  struct radeon_semaphore *semaphore)
 {
 	struct radeon_ring *ring = &rdev->ring[ridx];
 
@@ -73,7 +73,7 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx,
 }
 
 bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx,
-			        struct radeon_semaphore *semaphore)
+				struct radeon_semaphore *semaphore)
 {
 	struct radeon_ring *ring = &rdev->ring[ridx];
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e06ac546a90f..7dddfdce85e6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -397,9 +397,15 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 			struct ttm_mem_reg *new_mem)
 {
 	struct radeon_device *rdev;
+	struct radeon_bo *rbo;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int r;
 
+	/* Can't move a pinned BO */
+	rbo = container_of(bo, struct radeon_bo, tbo);
+	if (WARN_ON_ONCE(rbo->pin_count > 0))
+		return -EINVAL;
+
 	rdev = radeon_get_rdev(bo->bdev);
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		radeon_move_null(bo, new_mem);
@@ -554,8 +560,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
 		struct page **pages = ttm->pages + pinned;
 
-		r = get_user_pages(current, current->mm, userptr, num_pages,
-				   write, 0, pages, NULL);
+		r = get_user_pages(userptr, num_pages, write, 0, pages, NULL);
 		if (r < 0)
 			goto release_pages;
 
@@ -610,7 +615,7 @@ static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
-		page_cache_release(page);
+		put_page(page);
 	}
 
 	sg_free_table(ttm->sg);
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 6edcb5485092..6fe9e4e76284 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -722,9 +722,11 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev,
 	return r;
 }
 
-/* multiple fence commands without any stream commands in between can
-   crash the vcpu so just try to emmit a dummy create/destroy msg to
-   avoid this */
+/*
+ * multiple fence commands without any stream commands in between can
+ * crash the vcpu so just try to emmit a dummy create/destroy msg to
+ * avoid this
+ */
 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 			      uint32_t handle, struct radeon_fence **fence)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 566a1a01f6d1..c1c619facb47 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -166,7 +166,7 @@ int radeon_vce_init(struct radeon_device *rdev)
 	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
 		atomic_set(&rdev->vce.handles[i], 0);
 		rdev->vce.filp[i] = NULL;
-        }
+	}
 
 	return 0;
 }
@@ -389,7 +389,7 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
-	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 	}
 
 	if (fence)
@@ -446,7 +446,7 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
-	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 	}
 
 	if (fence)
@@ -769,18 +769,18 @@ int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_unlock_commit(rdev, ring, false);
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
-	        if (vce_v1_0_get_rptr(rdev, ring) != rptr)
-	                break;
-	        DRM_UDELAY(1);
+		if (vce_v1_0_get_rptr(rdev, ring) != rptr)
+			break;
+		DRM_UDELAY(1);
 	}
 
 	if (i < rdev->usec_timeout) {
-	        DRM_INFO("ring test on %d succeeded in %d usecs\n",
-	                 ring->idx, i);
+		DRM_INFO("ring test on %d succeeded in %d usecs\n",
+			 ring->idx, i);
 	} else {
-	        DRM_ERROR("radeon: ring %d test failed\n",
-	                  ring->idx);
-	        r = -ETIMEDOUT;
+		DRM_ERROR("radeon: ring %d test failed\n",
+			 ring->idx);
+		r = -ETIMEDOUT;
 	}
 
 	return r;
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 3979632b9225..a1358748cea5 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -611,15 +611,16 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
  */
 static uint32_t radeon_vm_page_flags(uint32_t flags)
 {
-        uint32_t hw_flags = 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
-        if (flags & RADEON_VM_PAGE_SYSTEM) {
-                hw_flags |= R600_PTE_SYSTEM;
-                hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
-        }
-        return hw_flags;
+	uint32_t hw_flags = 0;
+
+	hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
+	hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
+	hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
+	if (flags & RADEON_VM_PAGE_SYSTEM) {
+		hw_flags |= R600_PTE_SYSTEM;
+		hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
+	}
+	return hw_flags;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c
index cb0afe78abed..94b48fc1e266 100644
--- a/drivers/gpu/drm/radeon/rs780_dpm.c
+++ b/drivers/gpu/drm/radeon/rs780_dpm.c
@@ -795,7 +795,7 @@ static int rs780_parse_power_table(struct radeon_device *rdev)
 	union pplib_clock_info *clock_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	struct igp_ps *ps;
 
diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index 97e5a6f1ce58..25e29303b119 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -209,7 +209,7 @@ static struct rv6xx_sclk_stepping rv6xx_next_vco_step(struct radeon_device *rdev
 
 static bool rv6xx_can_step_post_div(struct radeon_device *rdev,
 				    struct rv6xx_sclk_stepping *cur,
-                                    struct rv6xx_sclk_stepping *target)
+				    struct rv6xx_sclk_stepping *target)
 {
 	return (cur->post_divider > target->post_divider) &&
 		((cur->vco_frequency * target->post_divider) <=
@@ -239,7 +239,7 @@ static bool rv6xx_reached_stepping_target(struct radeon_device *rdev,
 
 static void rv6xx_generate_steps(struct radeon_device *rdev,
 				 u32 low, u32 high,
-                                 u32 start_index, u8 *end_index)
+				 u32 start_index, u8 *end_index)
 {
 	struct rv6xx_sclk_stepping cur;
 	struct rv6xx_sclk_stepping target;
@@ -1356,23 +1356,23 @@ static void rv6xx_set_dpm_event_sources(struct radeon_device *rdev, u32 sources)
 	enum radeon_dpm_event_src dpm_event_src;
 
 	switch (sources) {
-        case 0:
-        default:
+	case 0:
+	default:
 		want_thermal_protection = false;
 		break;
-        case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL;
 		break;
 
-        case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_EXTERNAL;
 		break;
 
-        case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
+	case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
 	      (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL)):
-		want_thermal_protection = true;
+			want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL;
 		break;
 	}
@@ -1879,7 +1879,7 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev)
 	union pplib_clock_info *clock_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	struct rv6xx_ps *ps;
 
diff --git a/drivers/gpu/drm/radeon/rv740_dpm.c b/drivers/gpu/drm/radeon/rv740_dpm.c
index c4c8da501da8..4b850824fe06 100644
--- a/drivers/gpu/drm/radeon/rv740_dpm.c
+++ b/drivers/gpu/drm/radeon/rv740_dpm.c
@@ -36,28 +36,28 @@ u32 rv740_get_decoded_reference_divider(u32 encoded_ref)
 	u32 ref = 0;
 
 	switch (encoded_ref) {
-        case 0:
+	case 0:
 		ref = 1;
 		break;
-        case 16:
+	case 16:
 		ref = 2;
 		break;
-        case 17:
+	case 17:
 		ref = 3;
 		break;
-        case 18:
+	case 18:
 		ref = 2;
 		break;
-        case 19:
+	case 19:
 		ref = 3;
 		break;
-        case 20:
+	case 20:
 		ref = 4;
 		break;
-        case 21:
+	case 21:
 		ref = 5;
 		break;
-        default:
+	default:
 		DRM_ERROR("Invalid encoded Reference Divider\n");
 		ref = 0;
 		break;
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index e830c8935db0..a010decf59af 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -345,27 +345,27 @@ static int rv770_encode_yclk_post_div(u32 postdiv, u32 *encoded_postdiv)
 	int ret = 0;
 
 	switch (postdiv) {
-        case 1:
+	case 1:
 		*encoded_postdiv = 0;
 		break;
-        case 2:
+	case 2:
 		*encoded_postdiv = 1;
 		break;
-        case 4:
+	case 4:
 		*encoded_postdiv = 2;
 		break;
-        case 8:
+	case 8:
 		*encoded_postdiv = 3;
 		break;
-        case 16:
+	case 16:
 		*encoded_postdiv = 4;
 		break;
-        default:
+	default:
 		ret = -EINVAL;
 		break;
 	}
 
-    return ret;
+	return ret;
 }
 
 u32 rv770_map_clkf_to_ibias(struct radeon_device *rdev, u32 clkf)
@@ -1175,15 +1175,15 @@ static int rv770_init_smc_table(struct radeon_device *rdev,
 	rv770_populate_smc_mvdd_table(rdev, table);
 
 	switch (rdev->pm.int_thermal_type) {
-        case THERMAL_TYPE_RV770:
-        case THERMAL_TYPE_ADT7473_WITH_INTERNAL:
+	case THERMAL_TYPE_RV770:
+	case THERMAL_TYPE_ADT7473_WITH_INTERNAL:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_INTERNAL;
 		break;
-        case THERMAL_TYPE_NONE:
+	case THERMAL_TYPE_NONE:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_NONE;
 		break;
-        case THERMAL_TYPE_EXTERNAL_GPIO:
-        default:
+	case THERMAL_TYPE_EXTERNAL_GPIO:
+	default:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_EXTERNAL;
 		break;
 	}
@@ -1567,18 +1567,18 @@ void rv770_reset_smio_status(struct radeon_device *rdev)
 	sw_smio_index =
 		(RREG32(GENERAL_PWRMGT) & SW_SMIO_INDEX_MASK) >> SW_SMIO_INDEX_SHIFT;
 	switch (sw_smio_index) {
-        case 3:
+	case 3:
 		vid_smio_cntl = RREG32(S3_VID_LOWER_SMIO_CNTL);
 		break;
-        case 2:
+	case 2:
 		vid_smio_cntl = RREG32(S2_VID_LOWER_SMIO_CNTL);
 		break;
-        case 1:
+	case 1:
 		vid_smio_cntl = RREG32(S1_VID_LOWER_SMIO_CNTL);
 		break;
-        case 0:
+	case 0:
 		return;
-        default:
+	default:
 		vid_smio_cntl = pi->s0_vid_lower_smio_cntl;
 		break;
 	}
@@ -1817,21 +1817,21 @@ static void rv770_set_dpm_event_sources(struct radeon_device *rdev, u32 sources)
 	enum radeon_dpm_event_src dpm_event_src;
 
 	switch (sources) {
-        case 0:
-        default:
+	case 0:
+	default:
 		want_thermal_protection = false;
 		break;
-        case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL;
 		break;
 
-        case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_EXTERNAL;
 		break;
 
-        case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
+	case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
 	      (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL)):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL;
@@ -2273,7 +2273,7 @@ int rv7xx_parse_power_table(struct radeon_device *rdev)
 	union pplib_clock_info *clock_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	struct rv7xx_ps *ps;
 
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index f878d6962da5..ae21550fe767 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1307,7 +1307,7 @@ int si_get_allowed_info_register(struct radeon_device *rdev,
  */
 u32 si_get_xclk(struct radeon_device *rdev)
 {
-        u32 reference_clock = rdev->clock.spll.reference_freq;
+	u32 reference_clock = rdev->clock.spll.reference_freq;
 	u32 tmp;
 
 	tmp = RREG32(CG_CLKPIN_CNTL_2);
@@ -2442,8 +2442,10 @@ void dce6_bandwidth_update(struct radeon_device *rdev)
  */
 static void si_tiling_mode_table_init(struct radeon_device *rdev)
 {
-	const u32 num_tile_mode_states = 32;
-	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
+	u32 *tile = rdev->config.si.tile_mode_array;
+	const u32 num_tile_mode_states =
+			ARRAY_SIZE(rdev->config.si.tile_mode_array);
+	u32 reg_offset, split_equal_to_row_size;
 
 	switch (rdev->config.si.mem_row_size_in_kb) {
 	case 1:
@@ -2458,491 +2460,442 @@ static void si_tiling_mode_table_init(struct radeon_device *rdev)
 		break;
 	}
 
-	if ((rdev->family == CHIP_TAHITI) ||
-	    (rdev->family == CHIP_PITCAIRN)) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:  /* non-AA compressed depth or any compressed stencil */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 1:  /* 2xAA/4xAA compressed depth only */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 2:  /* 8xAA compressed depth only */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 8:  /* 1D and 1D Array Surfaces */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 9:  /* Displayable maps. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 10:  /* Display 8bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 11:  /* Display 16bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 12:  /* Display 32bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 13:  /* Thin. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 14:  /* Thin 8 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 15:  /* Thin 16 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 16:  /* Thin 32 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 17:  /* Thin 64 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 21:  /* 8 bpp PRT. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 22:  /* 16 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 23:  /* 32 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 24:  /* 64 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 25:  /* 128 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 NUM_BANKS(ADDR_SURF_8_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else if ((rdev->family == CHIP_VERDE) ||
-		   (rdev->family == CHIP_OLAND) ||
-		   (rdev->family == CHIP_HAINAN)) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:  /* non-AA compressed depth or any compressed stencil */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 1:  /* 2xAA/4xAA compressed depth only */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 2:  /* 8xAA compressed depth only */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 8:  /* 1D and 1D Array Surfaces */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 9:  /* Displayable maps. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 10:  /* Display 8bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 11:  /* Display 16bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 12:  /* Display 32bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 13:  /* Thin. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 14:  /* Thin 8 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 15:  /* Thin 16 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 16:  /* Thin 32 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 17:  /* Thin 64 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 21:  /* 8 bpp PRT. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 22:  /* 16 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 23:  /* 32 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 24:  /* 64 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 25:  /* 128 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 NUM_BANKS(ADDR_SURF_8_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else
+	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+		tile[reg_offset] = 0;
+
+	switch(rdev->family) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+		/* non-AA compressed depth or any compressed stencil */
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 2xAA/4xAA compressed depth only */
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 8xAA compressed depth only */
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
+		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
+		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
+		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
+		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 1D and 1D Array Surfaces */
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Displayable maps. */
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 8bpp. */
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 16bpp. */
+		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 32bpp. */
+		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Thin. */
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 8 bpp. */
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Thin 16 bpp. */
+		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Thin 32 bpp. */
+		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Thin 64 bpp. */
+		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* 8 bpp PRT. */
+		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 16 bpp PRT */
+		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 32 bpp PRT */
+		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 64 bpp PRT */
+		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 128 bpp PRT */
+		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+			   NUM_BANKS(ADDR_SURF_8_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		break;
+
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+	case CHIP_HAINAN:
+		/* non-AA compressed depth or any compressed stencil */
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 2xAA/4xAA compressed depth only */
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 8xAA compressed depth only */
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
+		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
+		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
+		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
+		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 1D and 1D Array Surfaces */
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Displayable maps. */
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 8bpp. */
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* Display 16bpp. */
+		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 32bpp. */
+		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin. */
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 8 bpp. */
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 16 bpp. */
+		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 32 bpp. */
+		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 64 bpp. */
+		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 8 bpp PRT. */
+		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 16 bpp PRT */
+		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 32 bpp PRT */
+		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 64 bpp PRT */
+		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 128 bpp PRT */
+		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+			   NUM_BANKS(ADDR_SURF_8_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		break;
+
+	default:
 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
+	}
 }
 
 static void si_select_se_sh(struct radeon_device *rdev,
@@ -7314,7 +7267,7 @@ uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
 	mutex_lock(&rdev->gpu_clock_mutex);
 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
-	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
 	mutex_unlock(&rdev->gpu_clock_mutex);
 	return clock;
 }
@@ -7775,33 +7728,33 @@ static void si_program_aspm(struct radeon_device *rdev)
 
 int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
 {
-        unsigned i;
+	unsigned i;
 
-        /* make sure VCEPLL_CTLREQ is deasserted */
-        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+	/* make sure VCEPLL_CTLREQ is deasserted */
+	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
 
-        mdelay(10);
+	mdelay(10);
 
-        /* assert UPLL_CTLREQ */
-        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+	/* assert UPLL_CTLREQ */
+	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
 
-        /* wait for CTLACK and CTLACK2 to get asserted */
-        for (i = 0; i < 100; ++i) {
-                uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
-                if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
-                        break;
-                mdelay(10);
-        }
+	/* wait for CTLACK and CTLACK2 to get asserted */
+	for (i = 0; i < 100; ++i) {
+		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
+			break;
+		mdelay(10);
+	}
 
-        /* deassert UPLL_CTLREQ */
-        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+	/* deassert UPLL_CTLREQ */
+	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
 
-        if (i == 100) {
-                DRM_ERROR("Timeout setting UVD clocks!\n");
-                return -ETIMEDOUT;
-        }
+	if (i == 100) {
+		DRM_ERROR("Timeout setting UVD clocks!\n");
+		return -ETIMEDOUT;
+	}
 
-        return 0;
+	return 0;
 }
 
 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index a82b891ae1fe..af4df81c4e0c 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -499,7 +499,7 @@ static const struct si_cac_config_reg lcac_pitcairn[] =
 
 static const struct si_cac_config_reg cac_override_pitcairn[] =
 {
-    { 0xFFFFFFFF }
+	{ 0xFFFFFFFF }
 };
 
 static const struct si_powertune_data powertune_data_pitcairn =
@@ -991,7 +991,7 @@ static const struct si_cac_config_reg lcac_cape_verde[] =
 
 static const struct si_cac_config_reg cac_override_cape_verde[] =
 {
-    { 0xFFFFFFFF }
+	{ 0xFFFFFFFF }
 };
 
 static const struct si_powertune_data powertune_data_cape_verde =
@@ -1762,9 +1762,9 @@ static void si_fan_ctrl_set_default_mode(struct radeon_device *rdev);
 
 static struct si_power_info *si_get_pi(struct radeon_device *rdev)
 {
-        struct si_power_info *pi = rdev->pm.dpm.priv;
+	struct si_power_info *pi = rdev->pm.dpm.priv;
 
-        return pi;
+	return pi;
 }
 
 static void si_calculate_leakage_for_v_and_t_formula(const struct ni_leakage_coeffients *coeff,
@@ -2926,9 +2926,11 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
 	/* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 },
 	{ 0, 0, 0, 0 },
 };
 
@@ -3008,6 +3010,10 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		}
 		++p;
 	}
+	/* limit mclk on all R7 370 parts for stability */
+	if (rdev->pdev->device == 0x6811 &&
+	    rdev->pdev->revision == 0x81)
+		max_mclk = 120000;
 
 	if (rps->vce_active) {
 		rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
@@ -3150,9 +3156,9 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		}
 	}
 
-        for (i = 0; i < ps->performance_level_count; i++)
-                btc_adjust_clock_combinations(rdev, max_limits,
-                                              &ps->performance_levels[i]);
+	for (i = 0; i < ps->performance_level_count; i++)
+		btc_adjust_clock_combinations(rdev, max_limits,
+					      &ps->performance_levels[i]);
 
 	for (i = 0; i < ps->performance_level_count; i++) {
 		if (ps->performance_levels[i].vddc < min_vce_voltage)
@@ -3291,7 +3297,7 @@ static void si_set_dpm_event_sources(struct radeon_device *rdev, u32 sources)
 	case 0:
 	default:
 		want_thermal_protection = false;
-                break;
+		break;
 	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL;
@@ -3493,7 +3499,7 @@ static int si_process_firmware_header(struct radeon_device *rdev)
 	if (ret)
 		return ret;
 
-        si_pi->state_table_start = tmp;
+	si_pi->state_table_start = tmp;
 
 	ret = si_read_smc_sram_dword(rdev,
 				     SISLANDS_SMC_FIRMWARE_HEADER_LOCATION +
@@ -3652,7 +3658,7 @@ static void si_program_response_times(struct radeon_device *rdev)
 	si_write_smc_soft_register(rdev, SI_SMC_SOFT_REGISTER_mvdd_chg_time, 1);
 
 	voltage_response_time = (u32)rdev->pm.dpm.voltage_response_time;
-        backbias_response_time = (u32)rdev->pm.dpm.backbias_response_time;
+	backbias_response_time = (u32)rdev->pm.dpm.backbias_response_time;
 
 	if (voltage_response_time == 0)
 		voltage_response_time = 1000;
@@ -3760,7 +3766,7 @@ static void si_setup_bsp(struct radeon_device *rdev)
 			       &pi->pbsu);
 
 
-        pi->dsp = BSP(pi->bsp) | BSU(pi->bsu);
+	pi->dsp = BSP(pi->bsp) | BSU(pi->bsu);
 	pi->psp = BSP(pi->pbsp) | BSU(pi->pbsu);
 
 	WREG32(CG_BSP, pi->dsp);
@@ -4308,7 +4314,7 @@ static int si_populate_memory_timing_parameters(struct radeon_device *rdev,
 
 	radeon_atom_set_engine_dram_timings(rdev,
 					    pl->sclk,
-                                            pl->mclk);
+					    pl->mclk);
 
 	dram_timing  = RREG32(MC_ARB_DRAM_TIMING);
 	dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2);
@@ -4343,7 +4349,7 @@ static int si_do_program_memory_timing_parameters(struct radeon_device *rdev,
 					   si_pi->sram_end);
 		if (ret)
 			break;
-        }
+	}
 
 	return ret;
 }
@@ -4821,9 +4827,9 @@ static int si_calculate_sclk_params(struct radeon_device *rdev,
 	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(2);
 
-        spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK;
-        spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv);
-        spll_func_cntl_3 |= SPLL_DITHEN;
+	spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK;
+	spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv);
+	spll_func_cntl_3 |= SPLL_DITHEN;
 
 	if (pi->sclk_ss) {
 		struct radeon_atom_ss ss;
@@ -4930,15 +4936,15 @@ static int si_populate_mclk_value(struct radeon_device *rdev,
 		tmp = freq_nom / reference_clock;
 		tmp = tmp * tmp;
 		if (radeon_atombios_get_asic_ss_info(rdev, &ss,
-                                                     ASIC_INTERNAL_MEMORY_SS, freq_nom)) {
+						     ASIC_INTERNAL_MEMORY_SS, freq_nom)) {
 			u32 clks = reference_clock * 5 / ss.rate;
 			u32 clkv = (u32)((((131 * ss.percentage * ss.rate) / 100) * tmp) / freq_nom);
 
-                        mpll_ss1 &= ~CLKV_MASK;
-                        mpll_ss1 |= CLKV(clkv);
+			mpll_ss1 &= ~CLKV_MASK;
+			mpll_ss1 |= CLKV(clkv);
 
-                        mpll_ss2 &= ~CLKS_MASK;
-                        mpll_ss2 |= CLKS(clks);
+			mpll_ss2 &= ~CLKS_MASK;
+			mpll_ss2 |= CLKS(clks);
 		}
 	}
 
@@ -5265,7 +5271,7 @@ static int si_convert_power_state_to_smc(struct radeon_device *rdev,
 		ni_pi->enable_power_containment = false;
 
 	ret = si_populate_sq_ramping_values(rdev, radeon_state, smc_state);
-        if (ret)
+	if (ret)
 		ni_pi->enable_sq_ramping = false;
 
 	return si_populate_smc_t(rdev, radeon_state, smc_state);
@@ -5436,46 +5442,46 @@ static bool si_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
 	case  MC_SEQ_RAS_TIMING >> 2:
 		*out_reg = MC_SEQ_RAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_CAS_TIMING >> 2:
+	case MC_SEQ_CAS_TIMING >> 2:
 		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING >> 2:
+	case MC_SEQ_MISC_TIMING >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING2 >> 2:
+	case MC_SEQ_MISC_TIMING2 >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING2_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D0 >> 2:
+	case MC_SEQ_RD_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D1 >> 2:
+	case MC_SEQ_RD_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D1_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D0 >> 2:
+	case MC_SEQ_WR_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D1 >> 2:
+	case MC_SEQ_WR_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D1_LP >> 2;
 		break;
-        case MC_PMG_CMD_EMRS >> 2:
+	case MC_PMG_CMD_EMRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS >> 2:
+	case MC_PMG_CMD_MRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS1 >> 2:
+	case MC_PMG_CMD_MRS1 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
 		break;
-        case MC_SEQ_PMG_TIMING >> 2:
+	case MC_SEQ_PMG_TIMING >> 2:
 		*out_reg = MC_SEQ_PMG_TIMING_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS2 >> 2:
+	case MC_PMG_CMD_MRS2 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS2_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_2 >> 2:
+	case MC_SEQ_WR_CTL_2 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_2_LP >> 2;
 		break;
-        default:
+	default:
 		result = false;
 		break;
 	}
@@ -5562,19 +5568,19 @@ static int si_initialize_mc_reg_table(struct radeon_device *rdev)
 	WREG32(MC_SEQ_PMG_CMD_MRS2_LP, RREG32(MC_PMG_CMD_MRS2));
 	WREG32(MC_SEQ_WR_CTL_2_LP, RREG32(MC_SEQ_WR_CTL_2));
 
-        ret = radeon_atom_init_mc_reg_table(rdev, module_index, table);
-        if (ret)
-                goto init_mc_done;
+	ret = radeon_atom_init_mc_reg_table(rdev, module_index, table);
+	if (ret)
+		goto init_mc_done;
 
-        ret = si_copy_vbios_mc_reg_table(table, si_table);
-        if (ret)
-                goto init_mc_done;
+	ret = si_copy_vbios_mc_reg_table(table, si_table);
+	if (ret)
+		goto init_mc_done;
 
 	si_set_s0_mc_reg_index(si_table);
 
 	ret = si_set_mc_special_registers(rdev, si_table);
-        if (ret)
-                goto init_mc_done;
+	if (ret)
+		goto init_mc_done;
 
 	si_set_valid_flag(si_table);
 
@@ -5715,10 +5721,10 @@ static int si_upload_mc_reg_table(struct radeon_device *rdev,
 
 static void si_enable_voltage_control(struct radeon_device *rdev, bool enable)
 {
-        if (enable)
-                WREG32_P(GENERAL_PWRMGT, VOLT_PWRMGT_EN, ~VOLT_PWRMGT_EN);
-        else
-                WREG32_P(GENERAL_PWRMGT, 0, ~VOLT_PWRMGT_EN);
+	if (enable)
+		WREG32_P(GENERAL_PWRMGT, VOLT_PWRMGT_EN, ~VOLT_PWRMGT_EN);
+	else
+		WREG32_P(GENERAL_PWRMGT, 0, ~VOLT_PWRMGT_EN);
 }
 
 static enum radeon_pcie_gen si_get_maximum_link_speed(struct radeon_device *rdev,
@@ -6820,7 +6826,7 @@ static int si_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct ni_ps *ps;
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index cd0862809adf..f0d5c1724f55 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -787,8 +787,8 @@ static void sumo_program_acpi_power_level(struct radeon_device *rdev)
 	struct atom_clock_dividers dividers;
 	int ret;
 
-        ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
-                                             pi->acpi_pl.sclk,
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     pi->acpi_pl.sclk,
 					     false, &dividers);
 	if (ret)
 		return;
@@ -1462,7 +1462,7 @@ static int sumo_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct sumo_ps *ps;
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index d34bfcdab9be..6730367ac228 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -369,8 +369,8 @@ static void trinity_gfx_powergating_initialize(struct radeon_device *rdev)
 	int ret;
 	u32 hw_rev = (RREG32(HW_REV) & ATI_REV_ID_MASK) >> ATI_REV_ID_SHIFT;
 
-        ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
-                                             25000, false, &dividers);
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     25000, false, &dividers);
 	if (ret)
 		return;
 
@@ -587,8 +587,8 @@ static void trinity_set_divider_value(struct radeon_device *rdev,
 	u32 value;
 	u32 ix = index * TRINITY_SIZEOF_DPM_STATE_TABLE;
 
-        ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
-                                             sclk, false, &dividers);
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     sclk, false, &dividers);
 	if (ret)
 		return;
 
@@ -597,8 +597,8 @@ static void trinity_set_divider_value(struct radeon_device *rdev,
 	value |= CLK_DIVIDER(dividers.post_div);
 	WREG32_SMC(SMU_SCLK_DPM_STATE_0_CNTL_0 + ix, value);
 
-        ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
-                                             sclk/2, false, &dividers);
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     sclk/2, false, &dividers);
 	if (ret)
 		return;
 
@@ -1045,14 +1045,14 @@ static int trinity_set_thermal_temperature_range(struct radeon_device *rdev,
 	int low_temp = 0 * 1000;
 	int high_temp = 255 * 1000;
 
-        if (low_temp < min_temp)
+	if (low_temp < min_temp)
 		low_temp = min_temp;
-        if (high_temp > max_temp)
+	if (high_temp > max_temp)
 		high_temp = max_temp;
-        if (high_temp < low_temp) {
+	if (high_temp < low_temp) {
 		DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp);
-                return -EINVAL;
-        }
+		return -EINVAL;
+	}
 
 	WREG32_P(CG_THERMAL_INT_CTRL, DIG_THERM_INTH(49 + (high_temp / 1000)), ~DIG_THERM_INTH_MASK);
 	WREG32_P(CG_THERMAL_INT_CTRL, DIG_THERM_INTL(49 + (low_temp / 1000)), ~DIG_THERM_INTL_MASK);
@@ -1737,7 +1737,7 @@ static int trinity_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct sumo_ps *ps;
diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c
index cdeaab7c7b1e..fce214482e72 100644
--- a/drivers/gpu/drm/radeon/vce_v2_0.c
+++ b/drivers/gpu/drm/radeon/vce_v2_0.c
@@ -53,7 +53,7 @@ static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated)
 		WREG32(VCE_UENC_REG_CLOCK_GATING, tmp);
 
 		WREG32(VCE_CGTT_CLK_OVERRIDE, 0);
-    } else {
+	} else {
 		tmp = RREG32(VCE_CLOCK_GATING_B);
 		tmp |= 0xe7;
 		tmp &= ~0xe70000;
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 3d3cf2f8891e..d5cfef75fc80 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -271,8 +271,6 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 	if (!iores)
 		return -ENXIO;
 
-	platform_set_drvdata(pdev, hdmi);
-
 	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm, dev->of_node);
 	/*
 	 * If we failed to find the CRTC(s) which this encoder is
@@ -293,7 +291,16 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 	drm_encoder_init(drm, encoder, &dw_hdmi_rockchip_encoder_funcs,
 			 DRM_MODE_ENCODER_TMDS, NULL);
 
-	return dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
+	ret = dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
+
+	/*
+	 * If dw_hdmi_bind() fails we'll never call dw_hdmi_unbind(),
+	 * which would have called the encoder cleanup.  Do it manually.
+	 */
+	if (ret)
+		drm_encoder_cleanup(encoder);
+
+	return ret;
 }
 
 static void dw_hdmi_rockchip_unbind(struct device *dev, struct device *master,
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 896da09e49ee..f556a8f4fde6 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -251,6 +251,27 @@ static int rockchip_drm_unload(struct drm_device *drm_dev)
 	return 0;
 }
 
+static void rockchip_drm_crtc_cancel_pending_vblank(struct drm_crtc *crtc,
+						    struct drm_file *file_priv)
+{
+	struct rockchip_drm_private *priv = crtc->dev->dev_private;
+	int pipe = drm_crtc_index(crtc);
+
+	if (pipe < ROCKCHIP_MAX_CRTC &&
+	    priv->crtc_funcs[pipe] &&
+	    priv->crtc_funcs[pipe]->cancel_pending_vblank)
+		priv->crtc_funcs[pipe]->cancel_pending_vblank(crtc, file_priv);
+}
+
+static void rockchip_drm_preclose(struct drm_device *dev,
+				  struct drm_file *file_priv)
+{
+	struct drm_crtc *crtc;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+		rockchip_drm_crtc_cancel_pending_vblank(crtc, file_priv);
+}
+
 void rockchip_drm_lastclose(struct drm_device *dev)
 {
 	struct rockchip_drm_private *priv = dev->dev_private;
@@ -281,6 +302,7 @@ static struct drm_driver rockchip_drm_driver = {
 				  DRIVER_PRIME | DRIVER_ATOMIC,
 	.load			= rockchip_drm_load,
 	.unload			= rockchip_drm_unload,
+	.preclose		= rockchip_drm_preclose,
 	.lastclose		= rockchip_drm_lastclose,
 	.get_vblank_counter	= drm_vblank_no_hw_counter,
 	.enable_vblank		= rockchip_drm_crtc_enable_vblank,
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index 3529f692edb8..00d17d71aa4c 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -40,6 +40,7 @@ struct rockchip_crtc_funcs {
 	int (*enable_vblank)(struct drm_crtc *crtc);
 	void (*disable_vblank)(struct drm_crtc *crtc);
 	void (*wait_for_update)(struct drm_crtc *crtc);
+	void (*cancel_pending_vblank)(struct drm_crtc *crtc, struct drm_file *file_priv);
 };
 
 struct rockchip_atomic_commit {
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index fd370548d7d7..a619f120f801 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -499,10 +499,25 @@ err_disable_hclk:
 static void vop_crtc_disable(struct drm_crtc *crtc)
 {
 	struct vop *vop = to_vop(crtc);
+	int i;
 
 	if (!vop->is_enabled)
 		return;
 
+	/*
+	 * We need to make sure that all windows are disabled before we
+	 * disable that crtc. Otherwise we might try to scan from a destroyed
+	 * buffer later.
+	 */
+	for (i = 0; i < vop->data->win_size; i++) {
+		struct vop_win *vop_win = &vop->win[i];
+		const struct vop_win_data *win = vop_win->data;
+
+		spin_lock(&vop->reg_lock);
+		VOP_WIN_SET(vop, win, enable, 0);
+		spin_unlock(&vop->reg_lock);
+	}
+
 	drm_crtc_vblank_off(crtc);
 
 	/*
@@ -549,6 +564,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
 			   struct drm_plane_state *state)
 {
 	struct drm_crtc *crtc = state->crtc;
+	struct drm_crtc_state *crtc_state;
 	struct drm_framebuffer *fb = state->fb;
 	struct vop_win *vop_win = to_vop_win(plane);
 	struct vop_plane_state *vop_plane_state = to_vop_plane_state(state);
@@ -563,12 +579,13 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
 	int max_scale = win->phy->scl ? FRAC_16_16(8, 1) :
 					DRM_PLANE_HELPER_NO_SCALING;
 
-	crtc = crtc ? crtc : plane->state->crtc;
-	/*
-	 * Both crtc or plane->state->crtc can be null.
-	 */
 	if (!crtc || !fb)
 		goto out_disable;
+
+	crtc_state = drm_atomic_get_existing_crtc_state(state->state, crtc);
+	if (WARN_ON(!crtc_state))
+		return -EINVAL;
+
 	src->x1 = state->src_x;
 	src->y1 = state->src_y;
 	src->x2 = state->src_x + state->src_w;
@@ -580,8 +597,8 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
 
 	clip.x1 = 0;
 	clip.y1 = 0;
-	clip.x2 = crtc->mode.hdisplay;
-	clip.y2 = crtc->mode.vdisplay;
+	clip.x2 = crtc_state->adjusted_mode.hdisplay;
+	clip.y2 = crtc_state->adjusted_mode.vdisplay;
 
 	ret = drm_plane_helper_check_update(plane, crtc, state->fb,
 					    src, dest, &clip,
@@ -873,10 +890,30 @@ static void vop_crtc_wait_for_update(struct drm_crtc *crtc)
 	WARN_ON(!wait_for_completion_timeout(&vop->wait_update_complete, 100));
 }
 
+static void vop_crtc_cancel_pending_vblank(struct drm_crtc *crtc,
+					   struct drm_file *file_priv)
+{
+	struct drm_device *drm = crtc->dev;
+	struct vop *vop = to_vop(crtc);
+	struct drm_pending_vblank_event *e;
+	unsigned long flags;
+
+	spin_lock_irqsave(&drm->event_lock, flags);
+	e = vop->event;
+	if (e && e->base.file_priv == file_priv) {
+		vop->event = NULL;
+
+		e->base.destroy(&e->base);
+		file_priv->event_space += sizeof(e->event);
+	}
+	spin_unlock_irqrestore(&drm->event_lock, flags);
+}
+
 static const struct rockchip_crtc_funcs private_crtc_funcs = {
 	.enable_vblank = vop_crtc_enable_vblank,
 	.disable_vblank = vop_crtc_disable_vblank,
 	.wait_for_update = vop_crtc_wait_for_update,
+	.cancel_pending_vblank = vop_crtc_cancel_pending_vblank,
 };
 
 static bool vop_crtc_mode_fixup(struct drm_crtc *crtc,
@@ -885,9 +922,6 @@ static bool vop_crtc_mode_fixup(struct drm_crtc *crtc,
 {
 	struct vop *vop = to_vop(crtc);
 
-	if (adjusted_mode->htotal == 0 || adjusted_mode->vtotal == 0)
-		return false;
-
 	adjusted_mode->clock =
 		clk_round_rate(vop->dclk, mode->clock * 1000) / 1000;
 
@@ -1108,7 +1142,7 @@ static int vop_create_crtc(struct vop *vop)
 	const struct vop_data *vop_data = vop->data;
 	struct device *dev = vop->dev;
 	struct drm_device *drm_dev = vop->drm_dev;
-	struct drm_plane *primary = NULL, *cursor = NULL, *plane;
+	struct drm_plane *primary = NULL, *cursor = NULL, *plane, *tmp;
 	struct drm_crtc *crtc = &vop->crtc;
 	struct device_node *port;
 	int ret;
@@ -1148,7 +1182,7 @@ static int vop_create_crtc(struct vop *vop)
 	ret = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor,
 					&vop_crtc_funcs, NULL);
 	if (ret)
-		return ret;
+		goto err_cleanup_planes;
 
 	drm_crtc_helper_add(crtc, &vop_crtc_helper_funcs);
 
@@ -1181,6 +1215,7 @@ static int vop_create_crtc(struct vop *vop)
 	if (!port) {
 		DRM_ERROR("no port node found in %s\n",
 			  dev->of_node->full_name);
+		ret = -ENOENT;
 		goto err_cleanup_crtc;
 	}
 
@@ -1194,7 +1229,8 @@ static int vop_create_crtc(struct vop *vop)
 err_cleanup_crtc:
 	drm_crtc_cleanup(crtc);
 err_cleanup_planes:
-	list_for_each_entry(plane, &drm_dev->mode_config.plane_list, head)
+	list_for_each_entry_safe(plane, tmp, &drm_dev->mode_config.plane_list,
+				 head)
 		drm_plane_cleanup(plane);
 	return ret;
 }
@@ -1202,9 +1238,28 @@ err_cleanup_planes:
 static void vop_destroy_crtc(struct vop *vop)
 {
 	struct drm_crtc *crtc = &vop->crtc;
+	struct drm_device *drm_dev = vop->drm_dev;
+	struct drm_plane *plane, *tmp;
 
 	rockchip_unregister_crtc_funcs(crtc);
 	of_node_put(crtc->port);
+
+	/*
+	 * We need to cleanup the planes now.  Why?
+	 *
+	 * The planes are "&vop->win[i].base".  That means the memory is
+	 * all part of the big "struct vop" chunk of memory.  That memory
+	 * was devm allocated and associated with this component.  We need to
+	 * free it ourselves before vop_unbind() finishes.
+	 */
+	list_for_each_entry_safe(plane, tmp, &drm_dev->mode_config.plane_list,
+				 head)
+		vop_plane_destroy(plane);
+
+	/*
+	 * Destroy CRTC after vop_plane_destroy() since vop_disable_plane()
+	 * references the CRTC.
+	 */
 	drm_crtc_cleanup(crtc);
 }
 
diff --git a/drivers/gpu/drm/sti/sti_awg_utils.c b/drivers/gpu/drm/sti/sti_awg_utils.c
index 00d0698be9d3..a516eb869f6f 100644
--- a/drivers/gpu/drm/sti/sti_awg_utils.c
+++ b/drivers/gpu/drm/sti/sti_awg_utils.c
@@ -7,6 +7,7 @@
 #include "sti_awg_utils.h"
 
 #define AWG_OPCODE_OFFSET 10
+#define AWG_MAX_ARG       0x3ff
 
 enum opcode {
 	SET,
@@ -34,6 +35,8 @@ static int awg_generate_instr(enum opcode opcode,
 	/* skip, repeat and replay arg should not exceed 1023.
 	 * If user wants to exceed this value, the instruction should be
 	 * duplicate and arg should be adjust for each duplicated instruction.
+	 *
+	 * mux_sel is used in case of SAV/EAV synchronization.
 	 */
 
 	while (arg_tmp > 0) {
@@ -65,7 +68,7 @@ static int awg_generate_instr(enum opcode opcode,
 
 			mux = 0;
 			data_enable = 0;
-			arg &= (0x3ff);
+			arg &= AWG_MAX_ARG;
 			break;
 		case REPEAT:
 		case REPLAY:
@@ -76,13 +79,13 @@ static int awg_generate_instr(enum opcode opcode,
 
 			mux = 0;
 			data_enable = 0;
-			arg &= (0x3ff);
+			arg &= AWG_MAX_ARG;
 			break;
 		case JUMP:
 			mux = 0;
 			data_enable = 0;
 			arg |= 0x40; /* for jump instruction 7th bit is 1 */
-			arg &= 0x3ff;
+			arg &= AWG_MAX_ARG;
 			break;
 		case STOP:
 			arg = 0;
@@ -110,68 +113,75 @@ static int awg_generate_instr(enum opcode opcode,
 	return 0;
 }
 
-int sti_awg_generate_code_data_enable_mode(
+static int awg_generate_line_signal(
 		struct awg_code_generation_params *fwparams,
 		struct awg_timing *timing)
 {
 	long int val;
-	long int data_en;
 	int ret = 0;
 
-	if (timing->trailing_lines > 0) {
-		/* skip trailing lines */
-		val = timing->blanking_level;
-		data_en = 0;
-		ret |= awg_generate_instr(RPLSET, val, 0, data_en, fwparams);
-
-		val = timing->trailing_lines - 1;
-		data_en = 0;
-		ret |= awg_generate_instr(REPLAY, val, 0, data_en, fwparams);
-	}
-
 	if (timing->trailing_pixels > 0) {
 		/* skip trailing pixel */
 		val = timing->blanking_level;
-		data_en = 0;
-		ret |= awg_generate_instr(RPLSET, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
 
 		val = timing->trailing_pixels - 1;
-		data_en = 0;
-		ret |= awg_generate_instr(SKIP, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(SKIP, val, 0, 0, fwparams);
 	}
 
 	/* set DE signal high */
 	val = timing->blanking_level;
-	data_en = 1;
 	ret |= awg_generate_instr((timing->trailing_pixels > 0) ? SET : RPLSET,
-			val, 0, data_en, fwparams);
+			val, 0, 1, fwparams);
 
 	if (timing->blanking_pixels > 0) {
 		/* skip the number of active pixel */
 		val = timing->active_pixels - 1;
-		data_en = 1;
-		ret |= awg_generate_instr(SKIP, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(SKIP, val, 0, 1, fwparams);
 
 		/* set DE signal low */
 		val = timing->blanking_level;
-		data_en = 0;
-		ret |= awg_generate_instr(SET, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(SET, val, 0, 0, fwparams);
+	}
+
+	return ret;
+}
+
+int sti_awg_generate_code_data_enable_mode(
+		struct awg_code_generation_params *fwparams,
+		struct awg_timing *timing)
+{
+	long int val, tmp_val;
+	int ret = 0;
+
+	if (timing->trailing_lines > 0) {
+		/* skip trailing lines */
+		val = timing->blanking_level;
+		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
+
+		val = timing->trailing_lines - 1;
+		ret |= awg_generate_instr(REPLAY, val, 0, 0, fwparams);
 	}
 
-	/* replay the sequence as many active lines defined */
-	val = timing->active_lines - 1;
-	data_en = 0;
-	ret |= awg_generate_instr(REPLAY, val, 0, data_en, fwparams);
+	tmp_val = timing->active_lines - 1;
+
+	while (tmp_val > 0) {
+		/* generate DE signal for each line */
+		ret |= awg_generate_line_signal(fwparams, timing);
+		/* replay the sequence as many active lines defined */
+		ret |= awg_generate_instr(REPLAY,
+					  min_t(int, AWG_MAX_ARG, tmp_val),
+					  0, 0, fwparams);
+		tmp_val -= AWG_MAX_ARG;
+	}
 
 	if (timing->blanking_lines > 0) {
 		/* skip blanking lines */
 		val = timing->blanking_level;
-		data_en = 0;
-		ret |= awg_generate_instr(RPLSET, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
 
 		val = timing->blanking_lines - 1;
-		data_en = 0;
-		ret |= awg_generate_instr(REPLAY, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(REPLAY, val, 0, 0, fwparams);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/sti/sti_compositor.c b/drivers/gpu/drm/sti/sti_compositor.c
index afed2171beb9..3d2fa3ab33df 100644
--- a/drivers/gpu/drm/sti/sti_compositor.c
+++ b/drivers/gpu/drm/sti/sti_compositor.c
@@ -75,13 +75,13 @@ static int sti_compositor_bind(struct device *dev,
 		switch (desc[i].type) {
 		case STI_VID_SUBDEV:
 			compo->vid[vid_id++] =
-			    sti_vid_create(compo->dev, desc[i].id,
+			    sti_vid_create(compo->dev, drm_dev, desc[i].id,
 					   compo->regs + desc[i].offset);
 			break;
 		case STI_MIXER_MAIN_SUBDEV:
 		case STI_MIXER_AUX_SUBDEV:
 			compo->mixer[mixer_id++] =
-			    sti_mixer_create(compo->dev, desc[i].id,
+			    sti_mixer_create(compo->dev, drm_dev, desc[i].id,
 					     compo->regs + desc[i].offset);
 			break;
 		case STI_GPD_SUBDEV:
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index e04deedabd4a..505620c7c2c8 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -51,6 +51,15 @@ static void sti_crtc_disabling(struct drm_crtc *crtc)
 	mixer->status = STI_MIXER_DISABLING;
 }
 
+static bool sti_crtc_mode_fixup(struct drm_crtc *crtc,
+				const struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	/* accept the provided drm_display_mode, do not fix it up */
+	drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
+	return true;
+}
+
 static int
 sti_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode)
 {
@@ -221,6 +230,7 @@ static void sti_crtc_atomic_flush(struct drm_crtc *crtc,
 static const struct drm_crtc_helper_funcs sti_crtc_helper_funcs = {
 	.enable = sti_crtc_enable,
 	.disable = sti_crtc_disabling,
+	.mode_fixup = sti_crtc_mode_fixup,
 	.mode_set = drm_helper_crtc_mode_set,
 	.mode_set_nofb = sti_crtc_mode_set_nofb,
 	.mode_set_base = drm_helper_crtc_mode_set_base,
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index 807863106b8d..3abb400151ac 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -5,12 +5,10 @@
  *          for STMicroelectronics.
  * License terms:  GNU General Public License (GPL), version 2
  */
-#include <drm/drmP.h>
 
-#include <drm/drm_atomic_helper.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
-#include <drm/drm_plane_helper.h>
 
 #include "sti_compositor.h"
 #include "sti_cursor.h"
@@ -74,6 +72,82 @@ static const uint32_t cursor_supported_formats[] = {
 
 #define to_sti_cursor(x) container_of(x, struct sti_cursor, plane)
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(cursor->regs + reg))
+
+static void cursor_dbg_vpo(struct seq_file *s, u32 val)
+{
+	seq_printf(s, "\txdo:%4d\tydo:%4d", val & 0x0FFF, (val >> 16) & 0x0FFF);
+}
+
+static void cursor_dbg_size(struct seq_file *s, u32 val)
+{
+	seq_printf(s, "\t%d x %d", val & 0x07FF, (val >> 16) & 0x07FF);
+}
+
+static void cursor_dbg_pml(struct seq_file *s,
+			   struct sti_cursor *cursor, u32 val)
+{
+	if (cursor->pixmap.paddr == val)
+		seq_printf(s, "\tVirt @: %p", cursor->pixmap.base);
+}
+
+static void cursor_dbg_cml(struct seq_file *s,
+			   struct sti_cursor *cursor, u32 val)
+{
+	if (cursor->clut_paddr == val)
+		seq_printf(s, "\tVirt @: %p", cursor->clut);
+}
+
+static int cursor_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_cursor *cursor = (struct sti_cursor *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s: (vaddr = 0x%p)",
+		   sti_plane_to_str(&cursor->plane), cursor->regs);
+
+	DBGFS_DUMP(CUR_CTL);
+	DBGFS_DUMP(CUR_VPO);
+	cursor_dbg_vpo(s, readl(cursor->regs + CUR_VPO));
+	DBGFS_DUMP(CUR_PML);
+	cursor_dbg_pml(s, cursor, readl(cursor->regs + CUR_PML));
+	DBGFS_DUMP(CUR_PMP);
+	DBGFS_DUMP(CUR_SIZE);
+	cursor_dbg_size(s, readl(cursor->regs + CUR_SIZE));
+	DBGFS_DUMP(CUR_CML);
+	cursor_dbg_cml(s, cursor, readl(cursor->regs + CUR_CML));
+	DBGFS_DUMP(CUR_AWS);
+	DBGFS_DUMP(CUR_AWE);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list cursor_debugfs_files[] = {
+	{ "cursor", cursor_dbg_show, 0, NULL },
+};
+
+static int cursor_debugfs_init(struct sti_cursor *cursor,
+			       struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(cursor_debugfs_files); i++)
+		cursor_debugfs_files[i].data = cursor;
+
+	return drm_debugfs_create_files(cursor_debugfs_files,
+					ARRAY_SIZE(cursor_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 static void sti_cursor_argb8888_to_clut8(struct sti_cursor *cursor, u32 *src)
 {
 	u8  *dst = cursor->pixmap.base;
@@ -110,35 +184,31 @@ static void sti_cursor_init(struct sti_cursor *cursor)
 						  (b * 5);
 }
 
-static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
-				     struct drm_plane_state *oldstate)
+static int sti_cursor_atomic_check(struct drm_plane *drm_plane,
+				   struct drm_plane_state *state)
 {
-	struct drm_plane_state *state = drm_plane->state;
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_cursor *cursor = to_sti_cursor(plane);
 	struct drm_crtc *crtc = state->crtc;
-	struct sti_mixer *mixer = to_sti_mixer(crtc);
 	struct drm_framebuffer *fb = state->fb;
-	struct drm_display_mode *mode = &crtc->mode;
-	int dst_x = state->crtc_x;
-	int dst_y = state->crtc_y;
-	int dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
-	int dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	struct drm_crtc_state *crtc_state;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y, dst_w, dst_h;
+	int src_w, src_h;
+
+	/* no need for further checks if the plane is being disabled */
+	if (!crtc || !fb)
+		return 0;
+
+	crtc_state = drm_atomic_get_crtc_state(state->state, crtc);
+	mode = &crtc_state->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+	dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
+	dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
 	/* src_x are in 16.16 format */
-	int src_w = state->src_w >> 16;
-	int src_h = state->src_h >> 16;
-	bool first_prepare = plane->status == STI_PLANE_DISABLED ? true : false;
-	struct drm_gem_cma_object *cma_obj;
-	u32 y, x;
-	u32 val;
-
-	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
-		      crtc->base.id, sti_mixer_to_str(mixer),
-		      drm_plane->base.id, sti_plane_to_str(plane));
-	DRM_DEBUG_KMS("(%dx%d)@(%d,%d)\n", dst_w, dst_h, dst_x, dst_y);
-
-	dev_dbg(cursor->dev, "%s %s\n", __func__,
-		sti_plane_to_str(plane));
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
 
 	if (src_w < STI_CURS_MIN_SIZE ||
 	    src_h < STI_CURS_MIN_SIZE ||
@@ -146,7 +216,7 @@ static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
 	    src_h > STI_CURS_MAX_SIZE) {
 		DRM_ERROR("Invalid cursor size (%dx%d)\n",
 				src_w, src_h);
-		return;
+		return -EINVAL;
 	}
 
 	/* If the cursor size has changed, re-allocated the pixmap */
@@ -157,29 +227,57 @@ static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
 		cursor->height = src_h;
 
 		if (cursor->pixmap.base)
-			dma_free_writecombine(cursor->dev,
-					      cursor->pixmap.size,
-					      cursor->pixmap.base,
-					      cursor->pixmap.paddr);
+			dma_free_wc(cursor->dev, cursor->pixmap.size,
+				    cursor->pixmap.base, cursor->pixmap.paddr);
 
 		cursor->pixmap.size = cursor->width * cursor->height;
 
-		cursor->pixmap.base = dma_alloc_writecombine(cursor->dev,
-							cursor->pixmap.size,
-							&cursor->pixmap.paddr,
-							GFP_KERNEL | GFP_DMA);
+		cursor->pixmap.base = dma_alloc_wc(cursor->dev,
+						   cursor->pixmap.size,
+						   &cursor->pixmap.paddr,
+						   GFP_KERNEL | GFP_DMA);
 		if (!cursor->pixmap.base) {
 			DRM_ERROR("Failed to allocate memory for pixmap\n");
-			return;
+			return -EINVAL;
 		}
 	}
 
-	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	if (!cma_obj) {
+	if (!drm_fb_cma_get_gem_obj(fb, 0)) {
 		DRM_ERROR("Can't get CMA GEM object for fb\n");
-		return;
+		return -EINVAL;
 	}
 
+	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
+		      crtc->base.id, sti_mixer_to_str(to_sti_mixer(crtc)),
+		      drm_plane->base.id, sti_plane_to_str(plane));
+	DRM_DEBUG_KMS("(%dx%d)@(%d,%d)\n", dst_w, dst_h, dst_x, dst_y);
+
+	return 0;
+}
+
+static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
+				     struct drm_plane_state *oldstate)
+{
+	struct drm_plane_state *state = drm_plane->state;
+	struct sti_plane *plane = to_sti_plane(drm_plane);
+	struct sti_cursor *cursor = to_sti_cursor(plane);
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y;
+	struct drm_gem_cma_object *cma_obj;
+	u32 y, x;
+	u32 val;
+
+	if (!crtc || !fb)
+		return;
+
+	mode = &crtc->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+
+	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+
 	/* Convert ARGB8888 to CLUT8 */
 	sti_cursor_argb8888_to_clut8(cursor, (u32 *)cma_obj->vaddr);
 
@@ -193,21 +291,21 @@ static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
 	val = y << 16 | x;
 	writel(val, cursor->regs + CUR_AWE);
 
-	if (first_prepare) {
-		/* Set and fetch CLUT */
-		writel(cursor->clut_paddr, cursor->regs + CUR_CML);
-		writel(CUR_CTL_CLUT_UPDATE, cursor->regs + CUR_CTL);
-	}
-
 	/* Set memory location, size, and position */
 	writel(cursor->pixmap.paddr, cursor->regs + CUR_PML);
 	writel(cursor->width, cursor->regs + CUR_PMP);
 	writel(cursor->height << 16 | cursor->width, cursor->regs + CUR_SIZE);
 
 	y = sti_vtg_get_line_number(*mode, dst_y);
-	x = sti_vtg_get_pixel_number(*mode, dst_y);
+	x = sti_vtg_get_pixel_number(*mode, dst_x);
 	writel((y << 16) | x, cursor->regs + CUR_VPO);
 
+	/* Set and fetch CLUT */
+	writel(cursor->clut_paddr, cursor->regs + CUR_CML);
+	writel(CUR_CTL_CLUT_UPDATE, cursor->regs + CUR_CTL);
+
+	sti_plane_update_fps(plane, true, false);
+
 	plane->status = STI_PLANE_UPDATED;
 }
 
@@ -215,7 +313,6 @@ static void sti_cursor_atomic_disable(struct drm_plane *drm_plane,
 				      struct drm_plane_state *oldstate)
 {
 	struct sti_plane *plane = to_sti_plane(drm_plane);
-	struct sti_mixer *mixer = to_sti_mixer(drm_plane->crtc);
 
 	if (!drm_plane->crtc) {
 		DRM_DEBUG_DRIVER("drm plane:%d not enabled\n",
@@ -224,13 +321,15 @@ static void sti_cursor_atomic_disable(struct drm_plane *drm_plane,
 	}
 
 	DRM_DEBUG_DRIVER("CRTC:%d (%s) drm plane:%d (%s)\n",
-			 drm_plane->crtc->base.id, sti_mixer_to_str(mixer),
+			 drm_plane->crtc->base.id,
+			 sti_mixer_to_str(to_sti_mixer(drm_plane->crtc)),
 			 drm_plane->base.id, sti_plane_to_str(plane));
 
 	plane->status = STI_PLANE_DISABLING;
 }
 
 static const struct drm_plane_helper_funcs sti_cursor_helpers_funcs = {
+	.atomic_check = sti_cursor_atomic_check,
 	.atomic_update = sti_cursor_atomic_update,
 	.atomic_disable = sti_cursor_atomic_disable,
 };
@@ -252,8 +351,8 @@ struct drm_plane *sti_cursor_create(struct drm_device *drm_dev,
 
 	/* Allocate clut buffer */
 	size = 0x100 * sizeof(unsigned short);
-	cursor->clut = dma_alloc_writecombine(dev, size, &cursor->clut_paddr,
-					      GFP_KERNEL | GFP_DMA);
+	cursor->clut = dma_alloc_wc(dev, size, &cursor->clut_paddr,
+				    GFP_KERNEL | GFP_DMA);
 
 	if (!cursor->clut) {
 		DRM_ERROR("Failed to allocate memory for cursor clut\n");
@@ -283,10 +382,13 @@ struct drm_plane *sti_cursor_create(struct drm_device *drm_dev,
 
 	sti_plane_init_property(&cursor->plane, DRM_PLANE_TYPE_CURSOR);
 
+	if (cursor_debugfs_init(cursor, drm_dev->primary))
+		DRM_ERROR("CURSOR debugfs setup failed\n");
+
 	return &cursor->plane.drm_plane;
 
 err_plane:
-	dma_free_writecombine(dev, size, cursor->clut, cursor->clut_paddr);
+	dma_free_wc(dev, size, cursor->clut, cursor->clut_paddr);
 err_clut:
 	devm_kfree(dev, cursor);
 	return NULL;
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index 506b5626f3ed..6bd6abaa5a70 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -20,6 +20,7 @@
 
 #include "sti_crtc.h"
 #include "sti_drv.h"
+#include "sti_plane.h"
 
 #define DRIVER_NAME	"sti"
 #define DRIVER_DESC	"STMicroelectronics SoC DRM"
@@ -30,6 +31,130 @@
 #define STI_MAX_FB_HEIGHT	4096
 #define STI_MAX_FB_WIDTH	4096
 
+static int sti_drm_fps_get(void *data, u64 *val)
+{
+	struct drm_device *drm_dev = data;
+	struct drm_plane *p;
+	unsigned int i = 0;
+
+	*val = 0;
+	list_for_each_entry(p, &drm_dev->mode_config.plane_list, head) {
+		struct sti_plane *plane = to_sti_plane(p);
+
+		*val |= plane->fps_info.output << i;
+		i++;
+	}
+
+	return 0;
+}
+
+static int sti_drm_fps_set(void *data, u64 val)
+{
+	struct drm_device *drm_dev = data;
+	struct drm_plane *p;
+	unsigned int i = 0;
+
+	list_for_each_entry(p, &drm_dev->mode_config.plane_list, head) {
+		struct sti_plane *plane = to_sti_plane(p);
+
+		plane->fps_info.output = (val >> i) & 1;
+		i++;
+	}
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(sti_drm_fps_fops,
+			sti_drm_fps_get, sti_drm_fps_set, "%llu\n");
+
+static int sti_drm_fps_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_plane *p;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(p, &dev->mode_config.plane_list, head) {
+		struct sti_plane *plane = to_sti_plane(p);
+
+		seq_printf(s, "%s%s\n",
+			   plane->fps_info.fps_str,
+			   plane->fps_info.fips_str);
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list sti_drm_dbg_list[] = {
+	{"fps_get", sti_drm_fps_dbg_show, 0},
+};
+
+static int sti_drm_debugfs_create(struct dentry *root,
+				  struct drm_minor *minor,
+				  const char *name,
+				  const struct file_operations *fops)
+{
+	struct drm_device *dev = minor->dev;
+	struct drm_info_node *node;
+	struct dentry *ent;
+
+	ent = debugfs_create_file(name, S_IRUGO | S_IWUSR, root, dev, fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (!node) {
+		debugfs_remove(ent);
+		return -ENOMEM;
+	}
+
+	node->minor = minor;
+	node->dent = ent;
+	node->info_ent = (void *)fops;
+
+	mutex_lock(&minor->debugfs_lock);
+	list_add(&node->list, &minor->debugfs_list);
+	mutex_unlock(&minor->debugfs_lock);
+
+	return 0;
+}
+
+static int sti_drm_dbg_init(struct drm_minor *minor)
+{
+	int ret;
+
+	ret = drm_debugfs_create_files(sti_drm_dbg_list,
+				       ARRAY_SIZE(sti_drm_dbg_list),
+				       minor->debugfs_root, minor);
+	if (ret)
+		goto err;
+
+	ret = sti_drm_debugfs_create(minor->debugfs_root, minor, "fps_show",
+				     &sti_drm_fps_fops);
+	if (ret)
+		goto err;
+
+	DRM_INFO("%s: debugfs installed\n", DRIVER_NAME);
+	return 0;
+err:
+	DRM_ERROR("%s: cannot install debugfs\n", DRIVER_NAME);
+	return ret;
+}
+
+void sti_drm_dbg_cleanup(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(sti_drm_dbg_list,
+				 ARRAY_SIZE(sti_drm_dbg_list), minor);
+
+	drm_debugfs_remove_files((struct drm_info_list *)&sti_drm_fps_fops,
+				 1, minor);
+}
+
 static void sti_atomic_schedule(struct sti_private *private,
 				struct drm_atomic_state *state)
 {
@@ -181,18 +306,9 @@ static const struct file_operations sti_driver_fops = {
 	.release = drm_release,
 };
 
-static struct dma_buf *sti_gem_prime_export(struct drm_device *dev,
-					    struct drm_gem_object *obj,
-					    int flags)
-{
-	/* we want to be able to write in mmapped buffer */
-	flags |= O_RDWR;
-	return drm_gem_prime_export(dev, obj, flags);
-}
-
 static struct drm_driver sti_driver = {
 	.driver_features = DRIVER_HAVE_IRQ | DRIVER_MODESET |
-	    DRIVER_GEM | DRIVER_PRIME,
+	    DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC,
 	.load = sti_load,
 	.gem_free_object = drm_gem_cma_free_object,
 	.gem_vm_ops = &drm_gem_cma_vm_ops,
@@ -207,7 +323,7 @@ static struct drm_driver sti_driver = {
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
-	.gem_prime_export = sti_gem_prime_export,
+	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
 	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
@@ -215,6 +331,9 @@ static struct drm_driver sti_driver = {
 	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
 	.gem_prime_mmap = drm_gem_cma_prime_mmap,
 
+	.debugfs_init = sti_drm_dbg_init,
+	.debugfs_cleanup = sti_drm_dbg_cleanup,
+
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
 	.date = DRIVER_DATE,
diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index 45cbe2bf7dd6..25f76632002c 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -6,6 +6,7 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/debugfs.h>
 #include <linux/module.h>
 #include <linux/of_gpio.h>
 #include <linux/platform_device.h>
@@ -156,6 +157,69 @@ static void dvo_awg_configure(struct sti_dvo *dvo, u32 *awg_ram_code, int nb)
 	writel(DVO_AWG_CTRL_EN, dvo->regs + DVO_AWG_DIGSYNC_CTRL);
 }
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(dvo->regs + reg))
+
+static void dvo_dbg_awg_microcode(struct seq_file *s, void __iomem *reg)
+{
+	unsigned int i;
+
+	seq_puts(s, "\n\n");
+	seq_puts(s, "  DVO AWG microcode:");
+	for (i = 0; i < AWG_MAX_INST; i++) {
+		if (i % 8 == 0)
+			seq_printf(s, "\n  %04X:", i);
+		seq_printf(s, " %04X", readl(reg + i * 4));
+	}
+}
+
+static int dvo_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_dvo *dvo = (struct sti_dvo *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "DVO: (vaddr = 0x%p)", dvo->regs);
+	DBGFS_DUMP(DVO_AWG_DIGSYNC_CTRL);
+	DBGFS_DUMP(DVO_DOF_CFG);
+	DBGFS_DUMP(DVO_LUT_PROG_LOW);
+	DBGFS_DUMP(DVO_LUT_PROG_MID);
+	DBGFS_DUMP(DVO_LUT_PROG_HIGH);
+	dvo_dbg_awg_microcode(s, dvo->regs + DVO_DIGSYNC_INSTR_I);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list dvo_debugfs_files[] = {
+	{ "dvo", dvo_dbg_show, 0, NULL },
+};
+
+static void dvo_debugfs_exit(struct sti_dvo *dvo, struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(dvo_debugfs_files,
+				 ARRAY_SIZE(dvo_debugfs_files),
+				 minor);
+}
+
+static int dvo_debugfs_init(struct sti_dvo *dvo, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(dvo_debugfs_files); i++)
+		dvo_debugfs_files[i].data = dvo;
+
+	return drm_debugfs_create_files(dvo_debugfs_files,
+					ARRAY_SIZE(dvo_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 static void sti_dvo_disable(struct drm_bridge *bridge)
 {
 	struct sti_dvo *dvo = bridge->driver_private;
@@ -345,12 +409,14 @@ sti_dvo_connector_detect(struct drm_connector *connector, bool force)
 
 	DRM_DEBUG_DRIVER("\n");
 
-	if (!dvo->panel)
+	if (!dvo->panel) {
 		dvo->panel = of_drm_find_panel(dvo->panel_node);
+		if (dvo->panel)
+			drm_panel_attach(dvo->panel, connector);
+	}
 
 	if (dvo->panel)
-		if (!drm_panel_attach(dvo->panel, connector))
-			return connector_status_connected;
+		return connector_status_connected;
 
 	return connector_status_disconnected;
 }
@@ -453,6 +519,9 @@ static int sti_dvo_bind(struct device *dev, struct device *master, void *data)
 		goto err_sysfs;
 	}
 
+	if (dvo_debugfs_init(dvo, drm_dev->primary))
+		DRM_ERROR("DVO debugfs setup failed\n");
+
 	return 0;
 
 err_sysfs:
@@ -467,6 +536,9 @@ static void sti_dvo_unbind(struct device *dev,
 			   struct device *master, void *data)
 {
 	struct sti_dvo *dvo = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+
+	dvo_debugfs_exit(dvo, drm_dev->primary);
 
 	drm_bridge_remove(dvo->bridge);
 }
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index f9a1d92c9d95..ff3d3e7e7704 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -6,9 +6,7 @@
  * License terms:  GNU General Public License (GPL), version 2
  */
 
-#include <linux/clk.h>
-#include <linux/dma-mapping.h>
-
+#include <drm/drm_atomic.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 
@@ -32,10 +30,23 @@
 #define GDP_ABGR8888    (GDP_ARGB8888 | BIGNOTLITTLE | ALPHASWITCH)
 #define GDP_ARGB1555    0x06
 #define GDP_ARGB4444    0x07
-#define GDP_CLUT8       0x0B
-#define GDP_YCBR888     0x10
-#define GDP_YCBR422R    0x12
-#define GDP_AYCBR8888   0x15
+
+#define GDP2STR(fmt) { GDP_ ## fmt, #fmt }
+
+static struct gdp_format_to_str {
+	int format;
+	char name[20];
+} gdp_format_to_str[] = {
+		GDP2STR(RGB565),
+		GDP2STR(RGB888),
+		GDP2STR(RGB888_32),
+		GDP2STR(XBGR8888),
+		GDP2STR(ARGB8565),
+		GDP2STR(ARGB8888),
+		GDP2STR(ABGR8888),
+		GDP2STR(ARGB1555),
+		GDP2STR(ARGB4444)
+		};
 
 #define GAM_GDP_CTL_OFFSET      0x00
 #define GAM_GDP_AGC_OFFSET      0x04
@@ -97,6 +108,7 @@ struct sti_gdp_node_list {
  * @vtg_field_nb:       callback for VTG FIELD (top or bottom) notification
  * @is_curr_top:        true if the current node processed is the top field
  * @node_list:          array of node list
+ * @vtg:                registered vtg
  */
 struct sti_gdp {
 	struct sti_plane plane;
@@ -108,6 +120,7 @@ struct sti_gdp {
 	struct notifier_block vtg_field_nb;
 	bool is_curr_top;
 	struct sti_gdp_node_list node_list[GDP_NODE_NB_BANK];
+	struct sti_vtg *vtg;
 };
 
 #define to_sti_gdp(x) container_of(x, struct sti_gdp, plane)
@@ -121,12 +134,224 @@ static const uint32_t gdp_supported_formats[] = {
 	DRM_FORMAT_ARGB1555,
 	DRM_FORMAT_RGB565,
 	DRM_FORMAT_RGB888,
-	DRM_FORMAT_AYUV,
-	DRM_FORMAT_YUV444,
-	DRM_FORMAT_VYUY,
-	DRM_FORMAT_C8,
 };
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(gdp->regs + reg ## _OFFSET))
+
+static void gdp_dbg_ctl(struct seq_file *s, int val)
+{
+	int i;
+
+	seq_puts(s, "\tColor:");
+	for (i = 0; i < ARRAY_SIZE(gdp_format_to_str); i++) {
+		if (gdp_format_to_str[i].format == (val & 0x1F)) {
+			seq_printf(s, gdp_format_to_str[i].name);
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(gdp_format_to_str))
+		seq_puts(s, "<UNKNOWN>");
+
+	seq_printf(s, "\tWaitNextVsync:%d", val & WAIT_NEXT_VSYNC ? 1 : 0);
+}
+
+static void gdp_dbg_vpo(struct seq_file *s, int val)
+{
+	seq_printf(s, "\txdo:%4d\tydo:%4d", val & 0xFFFF, (val >> 16) & 0xFFFF);
+}
+
+static void gdp_dbg_vps(struct seq_file *s, int val)
+{
+	seq_printf(s, "\txds:%4d\tyds:%4d", val & 0xFFFF, (val >> 16) & 0xFFFF);
+}
+
+static void gdp_dbg_size(struct seq_file *s, int val)
+{
+	seq_printf(s, "\t%d x %d", val & 0xFFFF, (val >> 16) & 0xFFFF);
+}
+
+static void gdp_dbg_nvn(struct seq_file *s, struct sti_gdp *gdp, int val)
+{
+	void *base = NULL;
+	unsigned int i;
+
+	for (i = 0; i < GDP_NODE_NB_BANK; i++) {
+		if (gdp->node_list[i].top_field_paddr == val) {
+			base = gdp->node_list[i].top_field;
+			break;
+		}
+		if (gdp->node_list[i].btm_field_paddr == val) {
+			base = gdp->node_list[i].btm_field;
+			break;
+		}
+	}
+
+	if (base)
+		seq_printf(s, "\tVirt @: %p", base);
+}
+
+static void gdp_dbg_ppt(struct seq_file *s, int val)
+{
+	if (val & GAM_GDP_PPT_IGNORE)
+		seq_puts(s, "\tNot displayed on mixer!");
+}
+
+static void gdp_dbg_mst(struct seq_file *s, int val)
+{
+	if (val & 1)
+		seq_puts(s, "\tBUFFER UNDERFLOW!");
+}
+
+static int gdp_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_gdp *gdp = (struct sti_gdp *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_plane *drm_plane = &gdp->plane.drm_plane;
+	struct drm_crtc *crtc = drm_plane->crtc;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s: (vaddr = 0x%p)",
+		   sti_plane_to_str(&gdp->plane), gdp->regs);
+
+	DBGFS_DUMP(GAM_GDP_CTL);
+	gdp_dbg_ctl(s, readl(gdp->regs + GAM_GDP_CTL_OFFSET));
+	DBGFS_DUMP(GAM_GDP_AGC);
+	DBGFS_DUMP(GAM_GDP_VPO);
+	gdp_dbg_vpo(s, readl(gdp->regs + GAM_GDP_VPO_OFFSET));
+	DBGFS_DUMP(GAM_GDP_VPS);
+	gdp_dbg_vps(s, readl(gdp->regs + GAM_GDP_VPS_OFFSET));
+	DBGFS_DUMP(GAM_GDP_PML);
+	DBGFS_DUMP(GAM_GDP_PMP);
+	DBGFS_DUMP(GAM_GDP_SIZE);
+	gdp_dbg_size(s, readl(gdp->regs + GAM_GDP_SIZE_OFFSET));
+	DBGFS_DUMP(GAM_GDP_NVN);
+	gdp_dbg_nvn(s, gdp, readl(gdp->regs + GAM_GDP_NVN_OFFSET));
+	DBGFS_DUMP(GAM_GDP_KEY1);
+	DBGFS_DUMP(GAM_GDP_KEY2);
+	DBGFS_DUMP(GAM_GDP_PPT);
+	gdp_dbg_ppt(s, readl(gdp->regs + GAM_GDP_PPT_OFFSET));
+	DBGFS_DUMP(GAM_GDP_CML);
+	DBGFS_DUMP(GAM_GDP_MST);
+	gdp_dbg_mst(s, readl(gdp->regs + GAM_GDP_MST_OFFSET));
+
+	seq_puts(s, "\n\n");
+	if (!crtc)
+		seq_puts(s, "  Not connected to any DRM CRTC\n");
+	else
+		seq_printf(s, "  Connected to DRM CRTC #%d (%s)\n",
+			   crtc->base.id, sti_mixer_to_str(to_sti_mixer(crtc)));
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static void gdp_node_dump_node(struct seq_file *s, struct sti_gdp_node *node)
+{
+	seq_printf(s, "\t@:0x%p", node);
+	seq_printf(s, "\n\tCTL  0x%08X", node->gam_gdp_ctl);
+	gdp_dbg_ctl(s, node->gam_gdp_ctl);
+	seq_printf(s, "\n\tAGC  0x%08X", node->gam_gdp_agc);
+	seq_printf(s, "\n\tVPO  0x%08X", node->gam_gdp_vpo);
+	gdp_dbg_vpo(s, node->gam_gdp_vpo);
+	seq_printf(s, "\n\tVPS  0x%08X", node->gam_gdp_vps);
+	gdp_dbg_vps(s, node->gam_gdp_vps);
+	seq_printf(s, "\n\tPML  0x%08X", node->gam_gdp_pml);
+	seq_printf(s, "\n\tPMP  0x%08X", node->gam_gdp_pmp);
+	seq_printf(s, "\n\tSIZE 0x%08X", node->gam_gdp_size);
+	gdp_dbg_size(s, node->gam_gdp_size);
+	seq_printf(s, "\n\tNVN  0x%08X", node->gam_gdp_nvn);
+	seq_printf(s, "\n\tKEY1 0x%08X", node->gam_gdp_key1);
+	seq_printf(s, "\n\tKEY2 0x%08X", node->gam_gdp_key2);
+	seq_printf(s, "\n\tPPT  0x%08X", node->gam_gdp_ppt);
+	gdp_dbg_ppt(s, node->gam_gdp_ppt);
+	seq_printf(s, "\n\tCML  0x%08X", node->gam_gdp_cml);
+	seq_puts(s, "\n");
+}
+
+static int gdp_node_dbg_show(struct seq_file *s, void *arg)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_gdp *gdp = (struct sti_gdp *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	unsigned int b;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	for (b = 0; b < GDP_NODE_NB_BANK; b++) {
+		seq_printf(s, "\n%s[%d].top", sti_plane_to_str(&gdp->plane), b);
+		gdp_node_dump_node(s, gdp->node_list[b].top_field);
+		seq_printf(s, "\n%s[%d].btm", sti_plane_to_str(&gdp->plane), b);
+		gdp_node_dump_node(s, gdp->node_list[b].btm_field);
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list gdp0_debugfs_files[] = {
+	{ "gdp0", gdp_dbg_show, 0, NULL },
+	{ "gdp0_node", gdp_node_dbg_show, 0, NULL },
+};
+
+static struct drm_info_list gdp1_debugfs_files[] = {
+	{ "gdp1", gdp_dbg_show, 0, NULL },
+	{ "gdp1_node", gdp_node_dbg_show, 0, NULL },
+};
+
+static struct drm_info_list gdp2_debugfs_files[] = {
+	{ "gdp2", gdp_dbg_show, 0, NULL },
+	{ "gdp2_node", gdp_node_dbg_show, 0, NULL },
+};
+
+static struct drm_info_list gdp3_debugfs_files[] = {
+	{ "gdp3", gdp_dbg_show, 0, NULL },
+	{ "gdp3_node", gdp_node_dbg_show, 0, NULL },
+};
+
+static int gdp_debugfs_init(struct sti_gdp *gdp, struct drm_minor *minor)
+{
+	unsigned int i;
+	struct drm_info_list *gdp_debugfs_files;
+	int nb_files;
+
+	switch (gdp->plane.desc) {
+	case STI_GDP_0:
+		gdp_debugfs_files = gdp0_debugfs_files;
+		nb_files = ARRAY_SIZE(gdp0_debugfs_files);
+		break;
+	case STI_GDP_1:
+		gdp_debugfs_files = gdp1_debugfs_files;
+		nb_files = ARRAY_SIZE(gdp1_debugfs_files);
+		break;
+	case STI_GDP_2:
+		gdp_debugfs_files = gdp2_debugfs_files;
+		nb_files = ARRAY_SIZE(gdp2_debugfs_files);
+		break;
+	case STI_GDP_3:
+		gdp_debugfs_files = gdp3_debugfs_files;
+		nb_files = ARRAY_SIZE(gdp3_debugfs_files);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nb_files; i++)
+		gdp_debugfs_files[i].data = gdp;
+
+	return drm_debugfs_create_files(gdp_debugfs_files,
+					nb_files,
+					minor->debugfs_root, minor);
+}
+
 static int sti_gdp_fourcc2format(int fourcc)
 {
 	switch (fourcc) {
@@ -146,14 +371,6 @@ static int sti_gdp_fourcc2format(int fourcc)
 		return GDP_RGB565;
 	case DRM_FORMAT_RGB888:
 		return GDP_RGB888;
-	case DRM_FORMAT_AYUV:
-		return GDP_AYCBR8888;
-	case DRM_FORMAT_YUV444:
-		return GDP_YCBR888;
-	case DRM_FORMAT_VYUY:
-		return GDP_YCBR422R;
-	case DRM_FORMAT_C8:
-		return GDP_CLUT8;
 	}
 	return -1;
 }
@@ -163,7 +380,6 @@ static int sti_gdp_get_alpharange(int format)
 	switch (format) {
 	case GDP_ARGB8565:
 	case GDP_ARGB8888:
-	case GDP_AYCBR8888:
 	case GDP_ABGR8888:
 		return GAM_GDP_ALPHARANGE_255;
 	}
@@ -240,9 +456,6 @@ end:
  */
 static void sti_gdp_disable(struct sti_gdp *gdp)
 {
-	struct drm_plane *drm_plane = &gdp->plane.drm_plane;
-	struct sti_mixer *mixer = to_sti_mixer(drm_plane->crtc);
-	struct sti_compositor *compo = dev_get_drvdata(gdp->dev);
 	unsigned int i;
 
 	DRM_DEBUG_DRIVER("%s\n", sti_plane_to_str(&gdp->plane));
@@ -253,8 +466,7 @@ static void sti_gdp_disable(struct sti_gdp *gdp)
 		gdp->node_list[i].btm_field->gam_gdp_ppt |= GAM_GDP_PPT_IGNORE;
 	}
 
-	if (sti_vtg_unregister_client(mixer->id == STI_MIXER_MAIN ?
-			compo->vtg_main : compo->vtg_aux, &gdp->vtg_field_nb))
+	if (sti_vtg_unregister_client(gdp->vtg, &gdp->vtg_field_nb))
 		DRM_DEBUG_DRIVER("Warning: cannot unregister VTG notifier\n");
 
 	if (gdp->clk_pix)
@@ -312,8 +524,7 @@ static void sti_gdp_init(struct sti_gdp *gdp)
 	/* Allocate all the nodes within a single memory page */
 	size = sizeof(struct sti_gdp_node) *
 	    GDP_NODE_PER_FIELD * GDP_NODE_NB_BANK;
-	base = dma_alloc_writecombine(gdp->dev,
-				      size, &dma_addr, GFP_KERNEL | GFP_DMA);
+	base = dma_alloc_wc(gdp->dev, size, &dma_addr, GFP_KERNEL | GFP_DMA);
 
 	if (!base) {
 		DRM_ERROR("Failed to allocate memory for GDP node\n");
@@ -380,20 +591,140 @@ static void sti_gdp_init(struct sti_gdp *gdp)
 	}
 }
 
-static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
-				  struct drm_plane_state *oldstate)
+/**
+ * sti_gdp_get_dst
+ * @dev: device
+ * @dst: requested destination size
+ * @src: source size
+ *
+ * Return the cropped / clamped destination size
+ *
+ * RETURNS:
+ * cropped / clamped destination size
+ */
+static int sti_gdp_get_dst(struct device *dev, int dst, int src)
+{
+	if (dst == src)
+		return dst;
+
+	if (dst < src) {
+		dev_dbg(dev, "WARNING: GDP scale not supported, will crop\n");
+		return dst;
+	}
+
+	dev_dbg(dev, "WARNING: GDP scale not supported, will clamp\n");
+	return src;
+}
+
+static int sti_gdp_atomic_check(struct drm_plane *drm_plane,
+				struct drm_plane_state *state)
 {
-	struct drm_plane_state *state = drm_plane->state;
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_gdp *gdp = to_sti_gdp(plane);
 	struct drm_crtc *crtc = state->crtc;
 	struct sti_compositor *compo = dev_get_drvdata(gdp->dev);
 	struct drm_framebuffer *fb =  state->fb;
 	bool first_prepare = plane->status == STI_PLANE_DISABLED ? true : false;
+	struct drm_crtc_state *crtc_state;
 	struct sti_mixer *mixer;
 	struct drm_display_mode *mode;
 	int dst_x, dst_y, dst_w, dst_h;
 	int src_x, src_y, src_w, src_h;
+	int format;
+
+	/* no need for further checks if the plane is being disabled */
+	if (!crtc || !fb)
+		return 0;
+
+	mixer = to_sti_mixer(crtc);
+	crtc_state = drm_atomic_get_crtc_state(state->state, crtc);
+	mode = &crtc_state->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+	dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
+	dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	/* src_x are in 16.16 format */
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX);
+	src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX);
+
+	format = sti_gdp_fourcc2format(fb->pixel_format);
+	if (format == -1) {
+		DRM_ERROR("Format not supported by GDP %.4s\n",
+			  (char *)&fb->pixel_format);
+		return -EINVAL;
+	}
+
+	if (!drm_fb_cma_get_gem_obj(fb, 0)) {
+		DRM_ERROR("Can't get CMA GEM object for fb\n");
+		return -EINVAL;
+	}
+
+	if (first_prepare) {
+		/* Register gdp callback */
+		gdp->vtg = mixer->id == STI_MIXER_MAIN ?
+					compo->vtg_main : compo->vtg_aux;
+		if (sti_vtg_register_client(gdp->vtg,
+					    &gdp->vtg_field_nb, crtc)) {
+			DRM_ERROR("Cannot register VTG notifier\n");
+			return -EINVAL;
+		}
+
+		/* Set and enable gdp clock */
+		if (gdp->clk_pix) {
+			struct clk *clkp;
+			int rate = mode->clock * 1000;
+			int res;
+
+			/*
+			 * According to the mixer used, the gdp pixel clock
+			 * should have a different parent clock.
+			 */
+			if (mixer->id == STI_MIXER_MAIN)
+				clkp = gdp->clk_main_parent;
+			else
+				clkp = gdp->clk_aux_parent;
+
+			if (clkp)
+				clk_set_parent(gdp->clk_pix, clkp);
+
+			res = clk_set_rate(gdp->clk_pix, rate);
+			if (res < 0) {
+				DRM_ERROR("Cannot set rate (%dHz) for gdp\n",
+					  rate);
+				return -EINVAL;
+			}
+
+			if (clk_prepare_enable(gdp->clk_pix)) {
+				DRM_ERROR("Failed to prepare/enable gdp\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
+		      crtc->base.id, sti_mixer_to_str(mixer),
+		      drm_plane->base.id, sti_plane_to_str(plane));
+	DRM_DEBUG_KMS("%s dst=(%dx%d)@(%d,%d) - src=(%dx%d)@(%d,%d)\n",
+		      sti_plane_to_str(plane),
+		      dst_w, dst_h, dst_x, dst_y,
+		      src_w, src_h, src_x, src_y);
+
+	return 0;
+}
+
+static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
+				  struct drm_plane_state *oldstate)
+{
+	struct drm_plane_state *state = drm_plane->state;
+	struct sti_plane *plane = to_sti_plane(drm_plane);
+	struct sti_gdp *gdp = to_sti_gdp(plane);
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_framebuffer *fb =  state->fb;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y, dst_w, dst_h;
+	int src_x, src_y, src_w, src_h;
 	struct drm_gem_cma_object *cma_obj;
 	struct sti_gdp_node_list *list;
 	struct sti_gdp_node_list *curr_list;
@@ -403,13 +734,10 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	int format;
 	unsigned int depth, bpp;
 	u32 ydo, xdo, yds, xds;
-	int res;
 
-	/* Manage the case where crtc is null (disabled) */
-	if (!crtc)
+	if (!crtc || !fb)
 		return;
 
-	mixer = to_sti_mixer(crtc);
 	mode = &crtc->mode;
 	dst_x = state->crtc_x;
 	dst_y = state->crtc_y;
@@ -418,16 +746,8 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	/* src_x are in 16.16 format */
 	src_x = state->src_x >> 16;
 	src_y = state->src_y >> 16;
-	src_w = state->src_w >> 16;
-	src_h = state->src_h >> 16;
-
-	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
-		      crtc->base.id, sti_mixer_to_str(mixer),
-		      drm_plane->base.id, sti_plane_to_str(plane));
-	DRM_DEBUG_KMS("%s dst=(%dx%d)@(%d,%d) - src=(%dx%d)@(%d,%d)\n",
-		      sti_plane_to_str(plane),
-		      dst_w, dst_h, dst_x, dst_y,
-		      src_w, src_h, src_x, src_y);
+	src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX);
+	src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX);
 
 	list = sti_gdp_get_free_nodes(gdp);
 	top_field = list->top_field;
@@ -440,20 +760,11 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	top_field->gam_gdp_agc = GAM_GDP_AGC_FULL_RANGE;
 	top_field->gam_gdp_ctl = WAIT_NEXT_VSYNC;
 	format = sti_gdp_fourcc2format(fb->pixel_format);
-	if (format == -1) {
-		DRM_ERROR("Format not supported by GDP %.4s\n",
-			  (char *)&fb->pixel_format);
-		return;
-	}
 	top_field->gam_gdp_ctl |= format;
 	top_field->gam_gdp_ctl |= sti_gdp_get_alpharange(format);
 	top_field->gam_gdp_ppt &= ~GAM_GDP_PPT_IGNORE;
 
 	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	if (!cma_obj) {
-		DRM_ERROR("Can't get CMA GEM object for fb\n");
-		return;
-	}
 
 	DRM_DEBUG_DRIVER("drm FB:%d format:%.4s phys@:0x%lx\n", fb->base.id,
 			 (char *)&fb->pixel_format,
@@ -465,12 +776,9 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	top_field->gam_gdp_pml += src_x * (bpp >> 3);
 	top_field->gam_gdp_pml += src_y * fb->pitches[0];
 
-	/* input parameters */
-	top_field->gam_gdp_pmp = fb->pitches[0];
-	top_field->gam_gdp_size = clamp_val(src_h, 0, GAM_GDP_SIZE_MAX) << 16 |
-				  clamp_val(src_w, 0, GAM_GDP_SIZE_MAX);
-
-	/* output parameters */
+	/* output parameters (clamped / cropped) */
+	dst_w = sti_gdp_get_dst(gdp->dev, dst_w, src_w);
+	dst_h = sti_gdp_get_dst(gdp->dev, dst_h, src_h);
 	ydo = sti_vtg_get_line_number(*mode, dst_y);
 	yds = sti_vtg_get_line_number(*mode, dst_y + dst_h - 1);
 	xdo = sti_vtg_get_pixel_number(*mode, dst_x);
@@ -478,6 +786,11 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	top_field->gam_gdp_vpo = (ydo << 16) | xdo;
 	top_field->gam_gdp_vps = (yds << 16) | xds;
 
+	/* input parameters */
+	src_w = dst_w;
+	top_field->gam_gdp_pmp = fb->pitches[0];
+	top_field->gam_gdp_size = src_h << 16 | src_w;
+
 	/* Same content and chained together */
 	memcpy(btm_field, top_field, sizeof(*btm_field));
 	top_field->gam_gdp_nvn = list->btm_field_paddr;
@@ -488,44 +801,6 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 		btm_field->gam_gdp_pml = top_field->gam_gdp_pml +
 					 fb->pitches[0];
 
-	if (first_prepare) {
-		/* Register gdp callback */
-		if (sti_vtg_register_client(mixer->id == STI_MIXER_MAIN ?
-				compo->vtg_main : compo->vtg_aux,
-				&gdp->vtg_field_nb, crtc)) {
-			DRM_ERROR("Cannot register VTG notifier\n");
-			return;
-		}
-
-		/* Set and enable gdp clock */
-		if (gdp->clk_pix) {
-			struct clk *clkp;
-			int rate = mode->clock * 1000;
-
-			/* According to the mixer used, the gdp pixel clock
-			 * should have a different parent clock. */
-			if (mixer->id == STI_MIXER_MAIN)
-				clkp = gdp->clk_main_parent;
-			else
-				clkp = gdp->clk_aux_parent;
-
-			if (clkp)
-				clk_set_parent(gdp->clk_pix, clkp);
-
-			res = clk_set_rate(gdp->clk_pix, rate);
-			if (res < 0) {
-				DRM_ERROR("Cannot set rate (%dHz) for gdp\n",
-					  rate);
-				return;
-			}
-
-			if (clk_prepare_enable(gdp->clk_pix)) {
-				DRM_ERROR("Failed to prepare/enable gdp\n");
-				return;
-			}
-		}
-	}
-
 	/* Update the NVN field of the 'right' field of the current GDP node
 	 * (being used by the HW) with the address of the updated ('free') top
 	 * field GDP node.
@@ -574,6 +849,8 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	}
 
 end:
+	sti_plane_update_fps(plane, true, false);
+
 	plane->status = STI_PLANE_UPDATED;
 }
 
@@ -581,7 +858,6 @@ static void sti_gdp_atomic_disable(struct drm_plane *drm_plane,
 				   struct drm_plane_state *oldstate)
 {
 	struct sti_plane *plane = to_sti_plane(drm_plane);
-	struct sti_mixer *mixer = to_sti_mixer(drm_plane->crtc);
 
 	if (!drm_plane->crtc) {
 		DRM_DEBUG_DRIVER("drm plane:%d not enabled\n",
@@ -590,13 +866,15 @@ static void sti_gdp_atomic_disable(struct drm_plane *drm_plane,
 	}
 
 	DRM_DEBUG_DRIVER("CRTC:%d (%s) drm plane:%d (%s)\n",
-			 drm_plane->crtc->base.id, sti_mixer_to_str(mixer),
+			 drm_plane->crtc->base.id,
+			 sti_mixer_to_str(to_sti_mixer(drm_plane->crtc)),
 			 drm_plane->base.id, sti_plane_to_str(plane));
 
 	plane->status = STI_PLANE_DISABLING;
 }
 
 static const struct drm_plane_helper_funcs sti_gdp_helpers_funcs = {
+	.atomic_check = sti_gdp_atomic_check,
 	.atomic_update = sti_gdp_atomic_update,
 	.atomic_disable = sti_gdp_atomic_disable,
 };
@@ -640,6 +918,9 @@ struct drm_plane *sti_gdp_create(struct drm_device *drm_dev,
 
 	sti_plane_init_property(&gdp->plane, type);
 
+	if (gdp_debugfs_init(gdp, drm_dev->primary))
+		DRM_ERROR("GDP debugfs setup failed\n");
+
 	return &gdp->plane.drm_plane;
 
 err:
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index 49cce833f2c8..ec0d017eaf1a 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -326,6 +326,103 @@ static void hda_enable_hd_dacs(struct sti_hda *hda, bool enable)
 	}
 }
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(hda->regs + reg))
+
+static void hda_dbg_cfg(struct seq_file *s, int val)
+{
+	seq_puts(s, "\tAWG ");
+	seq_puts(s, val & CFG_AWG_ASYNC_EN ? "enabled" : "disabled");
+}
+
+static void hda_dbg_awg_microcode(struct seq_file *s, void __iomem *reg)
+{
+	unsigned int i;
+
+	seq_puts(s, "\n\n");
+	seq_puts(s, "  HDA AWG microcode:");
+	for (i = 0; i < AWG_MAX_INST; i++) {
+		if (i % 8 == 0)
+			seq_printf(s, "\n  %04X:", i);
+		seq_printf(s, " %04X", readl(reg + i * 4));
+	}
+}
+
+static void hda_dbg_video_dacs_ctrl(struct seq_file *s, void __iomem *reg)
+{
+	u32 val = readl(reg);
+	u32 mask;
+
+	switch ((u32)reg & VIDEO_DACS_CONTROL_MASK) {
+	case VIDEO_DACS_CONTROL_SYSCFG2535:
+		mask = DAC_CFG_HD_OFF_MASK;
+		break;
+	case VIDEO_DACS_CONTROL_SYSCFG5072:
+		mask = DAC_CFG_HD_HZUVW_OFF_MASK;
+		break;
+	default:
+		DRM_DEBUG_DRIVER("Warning: DACS ctrl register not supported!");
+		return;
+	}
+
+	seq_puts(s, "\n");
+	seq_printf(s, "\n  %-25s 0x%08X", "VIDEO_DACS_CONTROL", val);
+	seq_puts(s, "\tHD DACs ");
+	seq_puts(s, val & mask ? "disabled" : "enabled");
+}
+
+static int hda_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_hda *hda = (struct sti_hda *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "HD Analog: (vaddr = 0x%p)", hda->regs);
+	DBGFS_DUMP(HDA_ANA_CFG);
+	hda_dbg_cfg(s, readl(hda->regs + HDA_ANA_CFG));
+	DBGFS_DUMP(HDA_ANA_SCALE_CTRL_Y);
+	DBGFS_DUMP(HDA_ANA_SCALE_CTRL_CB);
+	DBGFS_DUMP(HDA_ANA_SCALE_CTRL_CR);
+	DBGFS_DUMP(HDA_ANA_ANC_CTRL);
+	DBGFS_DUMP(HDA_ANA_SRC_Y_CFG);
+	DBGFS_DUMP(HDA_ANA_SRC_C_CFG);
+	hda_dbg_awg_microcode(s, hda->regs + HDA_SYNC_AWGI);
+	if (hda->video_dacs_ctrl)
+		hda_dbg_video_dacs_ctrl(s, hda->video_dacs_ctrl);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list hda_debugfs_files[] = {
+	{ "hda", hda_dbg_show, 0, NULL },
+};
+
+static void hda_debugfs_exit(struct sti_hda *hda, struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(hda_debugfs_files,
+				 ARRAY_SIZE(hda_debugfs_files),
+				 minor);
+}
+
+static int hda_debugfs_init(struct sti_hda *hda, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(hda_debugfs_files); i++)
+		hda_debugfs_files[i].data = hda;
+
+	return drm_debugfs_create_files(hda_debugfs_files,
+					ARRAY_SIZE(hda_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 /**
  * Configure AWG, writing instructions
  *
@@ -685,6 +782,12 @@ static int sti_hda_bind(struct device *dev, struct device *master, void *data)
 		goto err_sysfs;
 	}
 
+	/* force to disable hd dacs at startup */
+	hda_enable_hd_dacs(hda, false);
+
+	if (hda_debugfs_init(hda, drm_dev->primary))
+		DRM_ERROR("HDA debugfs setup failed\n");
+
 	return 0;
 
 err_sysfs:
@@ -697,7 +800,10 @@ err_connector:
 static void sti_hda_unbind(struct device *dev,
 		struct device *master, void *data)
 {
-	/* do nothing */
+	struct sti_hda *hda = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+
+	hda_debugfs_exit(hda, drm_dev->primary);
 }
 
 static const struct component_ops sti_hda_ops = {
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index cd501563c0cc..6ef0715bd5b9 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -6,6 +6,7 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/debugfs.h>
 #include <linux/hdmi.h>
 #include <linux/module.h>
 #include <linux/of_gpio.h>
@@ -51,9 +52,18 @@
 #define HDMI_SW_DI_2_PKT_WORD4          0x0614
 #define HDMI_SW_DI_2_PKT_WORD5          0x0618
 #define HDMI_SW_DI_2_PKT_WORD6          0x061C
+#define HDMI_SW_DI_3_HEAD_WORD          0x0620
+#define HDMI_SW_DI_3_PKT_WORD0          0x0624
+#define HDMI_SW_DI_3_PKT_WORD1          0x0628
+#define HDMI_SW_DI_3_PKT_WORD2          0x062C
+#define HDMI_SW_DI_3_PKT_WORD3          0x0630
+#define HDMI_SW_DI_3_PKT_WORD4          0x0634
+#define HDMI_SW_DI_3_PKT_WORD5          0x0638
+#define HDMI_SW_DI_3_PKT_WORD6          0x063C
 
 #define HDMI_IFRAME_SLOT_AVI            1
 #define HDMI_IFRAME_SLOT_AUDIO          2
+#define HDMI_IFRAME_SLOT_VENDOR         3
 
 #define  XCAT(prefix, x, suffix)        prefix ## x ## suffix
 #define  HDMI_SW_DI_N_HEAD_WORD(x)      XCAT(HDMI_SW_DI_, x, _HEAD_WORD)
@@ -65,6 +75,8 @@
 #define  HDMI_SW_DI_N_PKT_WORD5(x)      XCAT(HDMI_SW_DI_, x, _PKT_WORD5)
 #define  HDMI_SW_DI_N_PKT_WORD6(x)      XCAT(HDMI_SW_DI_, x, _PKT_WORD6)
 
+#define HDMI_SW_DI_MAX_WORD             7
+
 #define HDMI_IFRAME_DISABLED            0x0
 #define HDMI_IFRAME_SINGLE_SHOT         0x1
 #define HDMI_IFRAME_FIELD               0x2
@@ -117,6 +129,8 @@ struct sti_hdmi_connector {
 	struct drm_connector drm_connector;
 	struct drm_encoder *encoder;
 	struct sti_hdmi *hdmi;
+	struct drm_property *colorspace_property;
+	struct drm_property *hdmi_mode_property;
 };
 
 #define to_sti_hdmi_connector(x) \
@@ -217,8 +231,10 @@ static void hdmi_config(struct sti_hdmi *hdmi)
 	/* Clear overrun and underrun fifo */
 	conf = HDMI_CFG_FIFO_OVERRUN_CLR | HDMI_CFG_FIFO_UNDERRUN_CLR;
 
-	/* Enable HDMI mode not DVI */
-	conf |= HDMI_CFG_HDMI_NOT_DVI | HDMI_CFG_ESS_NOT_OESS;
+	/* Select encryption type and the framing mode */
+	conf |= HDMI_CFG_ESS_NOT_OESS;
+	if (hdmi->hdmi_mode == HDMI_MODE_HDMI)
+		conf |= HDMI_CFG_HDMI_NOT_DVI;
 
 	/* Enable sink term detection */
 	conf |= HDMI_CFG_SINK_TERM_DET_EN;
@@ -241,6 +257,47 @@ static void hdmi_config(struct sti_hdmi *hdmi)
 	hdmi_write(hdmi, conf, HDMI_CFG);
 }
 
+/*
+ * Helper to reset info frame
+ *
+ * @hdmi: pointer on the hdmi internal structure
+ * @slot: infoframe to reset
+ */
+static void hdmi_infoframe_reset(struct sti_hdmi *hdmi,
+				 u32 slot)
+{
+	u32 val, i;
+	u32 head_offset, pack_offset;
+
+	switch (slot) {
+	case HDMI_IFRAME_SLOT_AVI:
+		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_AVI);
+		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_AVI);
+		break;
+	case HDMI_IFRAME_SLOT_AUDIO:
+		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_AUDIO);
+		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_AUDIO);
+		break;
+	case HDMI_IFRAME_SLOT_VENDOR:
+		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_VENDOR);
+		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_VENDOR);
+		break;
+	default:
+		DRM_ERROR("unsupported infoframe slot: %#x\n", slot);
+		return;
+	}
+
+	/* Disable transmission for the selected slot */
+	val = hdmi_read(hdmi, HDMI_SW_DI_CFG);
+	val &= ~HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, slot);
+	hdmi_write(hdmi, val, HDMI_SW_DI_CFG);
+
+	/* Reset info frame registers */
+	hdmi_write(hdmi, 0x0, head_offset);
+	for (i = 0; i < HDMI_SW_DI_MAX_WORD; i += sizeof(u32))
+		hdmi_write(hdmi, 0x0, pack_offset + i);
+}
+
 /**
  * Helper to concatenate infoframe in 32 bits word
  *
@@ -266,12 +323,13 @@ static inline unsigned int hdmi_infoframe_subpack(const u8 *ptr, size_t size)
  * @data: infoframe to write
  * @size: size to write
  */
-static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi, const u8 *data)
+static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi,
+					  const u8 *data,
+					  size_t size)
 {
 	const u8 *ptr = data;
 	u32 val, slot, mode, i;
 	u32 head_offset, pack_offset;
-	size_t size;
 
 	switch (*ptr) {
 	case HDMI_INFOFRAME_TYPE_AVI:
@@ -279,17 +337,19 @@ static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi, const u8 *data)
 		mode = HDMI_IFRAME_FIELD;
 		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_AVI);
 		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_AVI);
-		size = HDMI_AVI_INFOFRAME_SIZE;
 		break;
-
 	case HDMI_INFOFRAME_TYPE_AUDIO:
 		slot = HDMI_IFRAME_SLOT_AUDIO;
 		mode = HDMI_IFRAME_FRAME;
 		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_AUDIO);
 		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_AUDIO);
-		size = HDMI_AUDIO_INFOFRAME_SIZE;
 		break;
-
+	case HDMI_INFOFRAME_TYPE_VENDOR:
+		slot = HDMI_IFRAME_SLOT_VENDOR;
+		mode = HDMI_IFRAME_FRAME;
+		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_VENDOR);
+		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_VENDOR);
+		break;
 	default:
 		DRM_ERROR("unsupported infoframe type: %#x\n", *ptr);
 		return;
@@ -308,8 +368,9 @@ static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi, const u8 *data)
 	/*
 	 * Each subpack contains 4 bytes
 	 * The First Bytes of the first subpacket must contain the checksum
-	 * Packet size in increase by one.
+	 * Packet size is increase by one.
 	 */
+	size = size - HDMI_INFOFRAME_HEADER_SIZE + 1;
 	for (i = 0; i < size; i += sizeof(u32)) {
 		size_t num;
 
@@ -321,7 +382,7 @@ static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi, const u8 *data)
 
 	/* Enable transmission slot for updated infoframe */
 	val = hdmi_read(hdmi, HDMI_SW_DI_CFG);
-	val |= HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_FIELD, slot);
+	val |= HDMI_IFRAME_CFG_DI_N(mode, slot);
 	hdmi_write(hdmi, val, HDMI_SW_DI_CFG);
 }
 
@@ -352,7 +413,7 @@ static int hdmi_avi_infoframe_config(struct sti_hdmi *hdmi)
 	}
 
 	/* fixed infoframe configuration not linked to the mode */
-	infoframe.colorspace = HDMI_COLORSPACE_RGB;
+	infoframe.colorspace = hdmi->colorspace;
 	infoframe.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT;
 	infoframe.colorimetry = HDMI_COLORIMETRY_NONE;
 
@@ -362,7 +423,7 @@ static int hdmi_avi_infoframe_config(struct sti_hdmi *hdmi)
 		return ret;
 	}
 
-	hdmi_infoframe_write_infopack(hdmi, buffer);
+	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
 
 	return 0;
 }
@@ -398,7 +459,49 @@ static int hdmi_audio_infoframe_config(struct sti_hdmi *hdmi)
 		return ret;
 	}
 
-	hdmi_infoframe_write_infopack(hdmi, buffer);
+	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
+
+	return 0;
+}
+
+/*
+ * Prepare and configure the VS infoframe
+ *
+ * Vendor Specific infoframe are transmitted once per frame and
+ * contains vendor specific information.
+ *
+ * @hdmi: pointer on the hdmi internal structure
+ *
+ * Return negative value if error occurs
+ */
+#define HDMI_VENDOR_INFOFRAME_MAX_SIZE 6
+static int hdmi_vendor_infoframe_config(struct sti_hdmi *hdmi)
+{
+	struct drm_display_mode *mode = &hdmi->mode;
+	struct hdmi_vendor_infoframe infoframe;
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_VENDOR_INFOFRAME_MAX_SIZE];
+	int ret;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	ret = drm_hdmi_vendor_infoframe_from_display_mode(&infoframe, mode);
+	if (ret < 0) {
+		/*
+		 * Going into that statement does not means vendor infoframe
+		 * fails. It just informed us that vendor infoframe is not
+		 * needed for the selected mode. Only  4k or stereoscopic 3D
+		 * mode requires vendor infoframe. So just simply return 0.
+		 */
+		return 0;
+	}
+
+	ret = hdmi_vendor_infoframe_pack(&infoframe, buffer, sizeof(buffer));
+	if (ret < 0) {
+		DRM_ERROR("failed to pack VS infoframe: %d\n", ret);
+		return ret;
+	}
+
+	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
 
 	return 0;
 }
@@ -448,6 +551,172 @@ static void hdmi_swreset(struct sti_hdmi *hdmi)
 	clk_disable_unprepare(hdmi->clk_audio);
 }
 
+#define DBGFS_PRINT_STR(str1, str2) seq_printf(s, "%-24s %s\n", str1, str2)
+#define DBGFS_PRINT_INT(str1, int2) seq_printf(s, "%-24s %d\n", str1, int2)
+#define DBGFS_DUMP(str, reg) seq_printf(s, "%s  %-25s 0x%08X", str, #reg, \
+					hdmi_read(hdmi, reg))
+#define DBGFS_DUMP_DI(reg, slot) DBGFS_DUMP("\n", reg(slot))
+
+static void hdmi_dbg_cfg(struct seq_file *s, int val)
+{
+	int tmp;
+
+	seq_puts(s, "\t");
+	tmp = val & HDMI_CFG_HDMI_NOT_DVI;
+	DBGFS_PRINT_STR("mode:", tmp ? "HDMI" : "DVI");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_HDCP_EN;
+	DBGFS_PRINT_STR("HDCP:", tmp ? "enable" : "disable");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_ESS_NOT_OESS;
+	DBGFS_PRINT_STR("HDCP mode:", tmp ? "ESS enable" : "OESS enable");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_SINK_TERM_DET_EN;
+	DBGFS_PRINT_STR("Sink term detection:", tmp ? "enable" : "disable");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_H_SYNC_POL_NEG;
+	DBGFS_PRINT_STR("Hsync polarity:", tmp ? "inverted" : "normal");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_V_SYNC_POL_NEG;
+	DBGFS_PRINT_STR("Vsync polarity:", tmp ? "inverted" : "normal");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_422_EN;
+	DBGFS_PRINT_STR("YUV422 format:", tmp ? "enable" : "disable");
+}
+
+static void hdmi_dbg_sta(struct seq_file *s, int val)
+{
+	int tmp;
+
+	seq_puts(s, "\t");
+	tmp = (val & HDMI_STA_DLL_LCK);
+	DBGFS_PRINT_STR("pll:", tmp ? "locked" : "not locked");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_STA_HOT_PLUG);
+	DBGFS_PRINT_STR("hdmi cable:", tmp ? "connected" : "not connected");
+}
+
+static void hdmi_dbg_sw_di_cfg(struct seq_file *s, int val)
+{
+	int tmp;
+	char *const en_di[] = {"no transmission",
+			       "single transmission",
+			       "once every field",
+			       "once every frame"};
+
+	seq_puts(s, "\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 1));
+	DBGFS_PRINT_STR("Data island 1:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 2)) >> 4;
+	DBGFS_PRINT_STR("Data island 2:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 3)) >> 8;
+	DBGFS_PRINT_STR("Data island 3:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 4)) >> 12;
+	DBGFS_PRINT_STR("Data island 4:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 5)) >> 16;
+	DBGFS_PRINT_STR("Data island 5:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 6)) >> 20;
+	DBGFS_PRINT_STR("Data island 6:", en_di[tmp]);
+}
+
+static int hdmi_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_hdmi *hdmi = (struct sti_hdmi *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "HDMI: (vaddr = 0x%p)", hdmi->regs);
+	DBGFS_DUMP("\n", HDMI_CFG);
+	hdmi_dbg_cfg(s, hdmi_read(hdmi, HDMI_CFG));
+	DBGFS_DUMP("", HDMI_INT_EN);
+	DBGFS_DUMP("\n", HDMI_STA);
+	hdmi_dbg_sta(s, hdmi_read(hdmi, HDMI_STA));
+	DBGFS_DUMP("", HDMI_ACTIVE_VID_XMIN);
+	seq_puts(s, "\t");
+	DBGFS_PRINT_INT("Xmin:", hdmi_read(hdmi, HDMI_ACTIVE_VID_XMIN));
+	DBGFS_DUMP("", HDMI_ACTIVE_VID_XMAX);
+	seq_puts(s, "\t");
+	DBGFS_PRINT_INT("Xmax:", hdmi_read(hdmi, HDMI_ACTIVE_VID_XMAX));
+	DBGFS_DUMP("", HDMI_ACTIVE_VID_YMIN);
+	seq_puts(s, "\t");
+	DBGFS_PRINT_INT("Ymin:", hdmi_read(hdmi, HDMI_ACTIVE_VID_YMIN));
+	DBGFS_DUMP("", HDMI_ACTIVE_VID_YMAX);
+	seq_puts(s, "\t");
+	DBGFS_PRINT_INT("Ymax:", hdmi_read(hdmi, HDMI_ACTIVE_VID_YMAX));
+	DBGFS_DUMP("", HDMI_SW_DI_CFG);
+	hdmi_dbg_sw_di_cfg(s, hdmi_read(hdmi, HDMI_SW_DI_CFG));
+
+	seq_printf(s, "\n AVI Infoframe (Data Island slot N=%d):",
+		   HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD0, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD1, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD2, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD3, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_AVI);
+	seq_puts(s, "\n");
+	seq_printf(s, "\n AUDIO Infoframe (Data Island slot N=%d):",
+		   HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD0, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD1, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD2, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD3, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_AUDIO);
+	seq_puts(s, "\n");
+	seq_printf(s, "\n VENDOR SPECIFIC Infoframe (Data Island slot N=%d):",
+		   HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD0, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD1, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD2, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD3, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_VENDOR);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list hdmi_debugfs_files[] = {
+	{ "hdmi", hdmi_dbg_show, 0, NULL },
+};
+
+static void hdmi_debugfs_exit(struct sti_hdmi *hdmi, struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(hdmi_debugfs_files,
+				 ARRAY_SIZE(hdmi_debugfs_files),
+				 minor);
+}
+
+static int hdmi_debugfs_init(struct sti_hdmi *hdmi, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(hdmi_debugfs_files); i++)
+		hdmi_debugfs_files[i].data = hdmi;
+
+	return drm_debugfs_create_files(hdmi_debugfs_files,
+					ARRAY_SIZE(hdmi_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 static void sti_hdmi_disable(struct drm_bridge *bridge)
 {
 	struct sti_hdmi *hdmi = bridge->driver_private;
@@ -468,6 +737,11 @@ static void sti_hdmi_disable(struct drm_bridge *bridge)
 	/* Stop the phy */
 	hdmi->phy_ops->stop(hdmi);
 
+	/* Reset info frame transmission */
+	hdmi_infoframe_reset(hdmi, HDMI_IFRAME_SLOT_AVI);
+	hdmi_infoframe_reset(hdmi, HDMI_IFRAME_SLOT_AUDIO);
+	hdmi_infoframe_reset(hdmi, HDMI_IFRAME_SLOT_VENDOR);
+
 	/* Set the default channel data to be a dark red */
 	hdmi_write(hdmi, 0x0000, HDMI_DFLT_CHL0_DAT);
 	hdmi_write(hdmi, 0x0000, HDMI_DFLT_CHL1_DAT);
@@ -523,6 +797,10 @@ static void sti_hdmi_pre_enable(struct drm_bridge *bridge)
 	if (hdmi_audio_infoframe_config(hdmi))
 		DRM_ERROR("Unable to configure AUDIO infoframe\n");
 
+	/* Program VS infoframe */
+	if (hdmi_vendor_infoframe_config(hdmi))
+		DRM_ERROR("Unable to configure VS infoframe\n");
+
 	/* Sw reset */
 	hdmi_swreset(hdmi);
 }
@@ -664,12 +942,97 @@ static void sti_hdmi_connector_destroy(struct drm_connector *connector)
 	kfree(hdmi_connector);
 }
 
+static void sti_hdmi_connector_init_property(struct drm_device *drm_dev,
+					     struct drm_connector *connector)
+{
+	struct sti_hdmi_connector *hdmi_connector
+		= to_sti_hdmi_connector(connector);
+	struct sti_hdmi *hdmi = hdmi_connector->hdmi;
+	struct drm_property *prop;
+
+	/* colorspace property */
+	hdmi->colorspace = DEFAULT_COLORSPACE_MODE;
+	prop = drm_property_create_enum(drm_dev, 0, "colorspace",
+					colorspace_mode_names,
+					ARRAY_SIZE(colorspace_mode_names));
+	if (!prop) {
+		DRM_ERROR("fails to create colorspace property\n");
+		return;
+	}
+	hdmi_connector->colorspace_property = prop;
+	drm_object_attach_property(&connector->base, prop, hdmi->colorspace);
+
+	/* hdmi_mode property */
+	hdmi->hdmi_mode = DEFAULT_HDMI_MODE;
+	prop = drm_property_create_enum(drm_dev, 0, "hdmi_mode",
+					hdmi_mode_names,
+					ARRAY_SIZE(hdmi_mode_names));
+	if (!prop) {
+		DRM_ERROR("fails to create colorspace property\n");
+		return;
+	}
+	hdmi_connector->hdmi_mode_property = prop;
+	drm_object_attach_property(&connector->base, prop, hdmi->hdmi_mode);
+
+}
+
+static int
+sti_hdmi_connector_set_property(struct drm_connector *connector,
+				struct drm_connector_state *state,
+				struct drm_property *property,
+				uint64_t val)
+{
+	struct sti_hdmi_connector *hdmi_connector
+		= to_sti_hdmi_connector(connector);
+	struct sti_hdmi *hdmi = hdmi_connector->hdmi;
+
+	if (property == hdmi_connector->colorspace_property) {
+		hdmi->colorspace = val;
+		return 0;
+	}
+
+	if (property == hdmi_connector->hdmi_mode_property) {
+		hdmi->hdmi_mode = val;
+		return 0;
+	}
+
+	DRM_ERROR("failed to set hdmi connector property\n");
+	return -EINVAL;
+}
+
+static int
+sti_hdmi_connector_get_property(struct drm_connector *connector,
+				const struct drm_connector_state *state,
+				struct drm_property *property,
+				uint64_t *val)
+{
+	struct sti_hdmi_connector *hdmi_connector
+		= to_sti_hdmi_connector(connector);
+	struct sti_hdmi *hdmi = hdmi_connector->hdmi;
+
+	if (property == hdmi_connector->colorspace_property) {
+		*val = hdmi->colorspace;
+		return 0;
+	}
+
+	if (property == hdmi_connector->hdmi_mode_property) {
+		*val = hdmi->hdmi_mode;
+		return 0;
+	}
+
+	DRM_ERROR("failed to get hdmi connector property\n");
+	return -EINVAL;
+}
+
 static const struct drm_connector_funcs sti_hdmi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = sti_hdmi_connector_detect,
 	.destroy = sti_hdmi_connector_destroy,
 	.reset = drm_atomic_helper_connector_reset,
+	.set_property = drm_atomic_helper_connector_set_property,
+	.atomic_set_property = sti_hdmi_connector_set_property,
+	.atomic_get_property = sti_hdmi_connector_get_property,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
@@ -729,6 +1092,9 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 	drm_connector_helper_add(drm_connector,
 			&sti_hdmi_connector_helper_funcs);
 
+	/* initialise property */
+	sti_hdmi_connector_init_property(drm_dev, drm_connector);
+
 	err = drm_connector_register(drm_connector);
 	if (err)
 		goto err_connector;
@@ -742,6 +1108,9 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 	/* Enable default interrupts */
 	hdmi_write(hdmi, HDMI_DEFAULT_INT, HDMI_INT_EN);
 
+	if (hdmi_debugfs_init(hdmi, drm_dev->primary))
+		DRM_ERROR("HDMI debugfs setup failed\n");
+
 	return 0;
 
 err_sysfs:
@@ -755,7 +1124,10 @@ err_connector:
 static void sti_hdmi_unbind(struct device *dev,
 		struct device *master, void *data)
 {
-	/* do nothing */
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+
+	hdmi_debugfs_exit(hdmi, drm_dev->primary);
 }
 
 static const struct component_ops sti_hdmi_ops = {
diff --git a/drivers/gpu/drm/sti/sti_hdmi.h b/drivers/gpu/drm/sti/sti_hdmi.h
index 3d22390e1f3b..ef3a94583bbd 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.h
+++ b/drivers/gpu/drm/sti/sti_hdmi.h
@@ -7,15 +7,14 @@
 #ifndef _STI_HDMI_H_
 #define _STI_HDMI_H_
 
+#include <linux/hdmi.h>
 #include <linux/platform_device.h>
 
 #include <drm/drmP.h>
 
 #define HDMI_STA           0x0010
 #define HDMI_STA_DLL_LCK   BIT(5)
-
-#define HDMI_STA_HOT_PLUG_SHIFT 4
-#define HDMI_STA_HOT_PLUG	(1 << HDMI_STA_HOT_PLUG_SHIFT)
+#define HDMI_STA_HOT_PLUG  BIT(4)
 
 struct sti_hdmi;
 
@@ -24,6 +23,27 @@ struct hdmi_phy_ops {
 	void (*stop)(struct sti_hdmi *hdmi);
 };
 
+/* values for the framing mode property */
+enum sti_hdmi_modes {
+	HDMI_MODE_HDMI,
+	HDMI_MODE_DVI,
+};
+
+static const struct drm_prop_enum_list hdmi_mode_names[] = {
+	{ HDMI_MODE_HDMI, "hdmi" },
+	{ HDMI_MODE_DVI, "dvi" },
+};
+
+#define DEFAULT_HDMI_MODE HDMI_MODE_HDMI
+
+static const struct drm_prop_enum_list colorspace_mode_names[] = {
+	{ HDMI_COLORSPACE_RGB, "rgb" },
+	{ HDMI_COLORSPACE_YUV422, "yuv422" },
+	{ HDMI_COLORSPACE_YUV444, "yuv444" },
+};
+
+#define DEFAULT_COLORSPACE_MODE HDMI_COLORSPACE_RGB
+
 /**
  * STI hdmi structure
  *
@@ -44,6 +64,9 @@ struct hdmi_phy_ops {
  * @wait_event: wait event
  * @event_received: wait event status
  * @reset: reset control of the hdmi phy
+ * @ddc_adapt: i2c ddc adapter
+ * @colorspace: current colorspace selected
+ * @hdmi_mode: select framing for HDMI or DVI
  */
 struct sti_hdmi {
 	struct device dev;
@@ -64,6 +87,8 @@ struct sti_hdmi {
 	bool event_received;
 	struct reset_control *reset;
 	struct i2c_adapter *ddc_adapt;
+	enum hdmi_colorspace colorspace;
+	enum sti_hdmi_modes hdmi_mode;
 };
 
 u32 hdmi_read(struct sti_hdmi *hdmi, int offset);
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 43861b52261d..e05b0dc523ff 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -4,14 +4,11 @@
  * License terms:  GNU General Public License (GPL), version 2
  */
 
-#include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/firmware.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
 #include <linux/reset.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 
@@ -329,8 +326,6 @@ struct sti_hqvdp_cmd {
  * @reset:             reset control
  * @vtg_nb:            notifier to handle VTG Vsync
  * @btm_field_pending: is there any bottom field (interlaced frame) to display
- * @curr_field_count:  number of field updates
- * @last_field_count:  number of field updates since last fps measure
  * @hqvdp_cmd:         buffer of commands
  * @hqvdp_cmd_paddr:   physical address of hqvdp_cmd
  * @vtg:               vtg for main data path
@@ -346,10 +341,8 @@ struct sti_hqvdp {
 	struct reset_control *reset;
 	struct notifier_block vtg_nb;
 	bool btm_field_pending;
-	unsigned int curr_field_count;
-	unsigned int last_field_count;
 	void *hqvdp_cmd;
-	dma_addr_t hqvdp_cmd_paddr;
+	u32 hqvdp_cmd_paddr;
 	struct sti_vtg *vtg;
 	bool xp70_initialized;
 };
@@ -372,8 +365,8 @@ static const uint32_t hqvdp_supported_formats[] = {
  */
 static int sti_hqvdp_get_free_cmd(struct sti_hqvdp *hqvdp)
 {
-	int curr_cmd, next_cmd;
-	dma_addr_t cmd = hqvdp->hqvdp_cmd_paddr;
+	u32 curr_cmd, next_cmd;
+	u32 cmd = hqvdp->hqvdp_cmd_paddr;
 	int i;
 
 	curr_cmd = readl(hqvdp->regs + HQVDP_MBX_CURRENT_CMD);
@@ -400,8 +393,8 @@ static int sti_hqvdp_get_free_cmd(struct sti_hqvdp *hqvdp)
  */
 static int sti_hqvdp_get_curr_cmd(struct sti_hqvdp *hqvdp)
 {
-	int curr_cmd;
-	dma_addr_t cmd = hqvdp->hqvdp_cmd_paddr;
+	u32 curr_cmd;
+	u32 cmd = hqvdp->hqvdp_cmd_paddr;
 	unsigned int i;
 
 	curr_cmd = readl(hqvdp->regs + HQVDP_MBX_CURRENT_CMD);
@@ -417,6 +410,246 @@ static int sti_hqvdp_get_curr_cmd(struct sti_hqvdp *hqvdp)
 }
 
 /**
+ * sti_hqvdp_get_next_cmd
+ * @hqvdp: hqvdp structure
+ *
+ * Look for the next hqvdp_cmd that will be used by the FW.
+ *
+ * RETURNS:
+ *  the offset of the next command that will be used.
+ * -1 in error cases
+ */
+static int sti_hqvdp_get_next_cmd(struct sti_hqvdp *hqvdp)
+{
+	int next_cmd;
+	dma_addr_t cmd = hqvdp->hqvdp_cmd_paddr;
+	unsigned int i;
+
+	next_cmd = readl(hqvdp->regs + HQVDP_MBX_NEXT_CMD);
+
+	for (i = 0; i < NB_VDP_CMD; i++) {
+		if (cmd == next_cmd)
+			return i * sizeof(struct sti_hqvdp_cmd);
+
+		cmd += sizeof(struct sti_hqvdp_cmd);
+	}
+
+	return -1;
+}
+
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(hqvdp->regs + reg))
+
+static const char *hqvdp_dbg_get_lut(u32 *coef)
+{
+	if (!memcmp(coef, coef_lut_a_legacy, 16))
+		return "LUT A";
+	if (!memcmp(coef, coef_lut_b, 16))
+		return "LUT B";
+	if (!memcmp(coef, coef_lut_c_y_legacy, 16))
+		return "LUT C Y";
+	if (!memcmp(coef, coef_lut_c_c_legacy, 16))
+		return "LUT C C";
+	if (!memcmp(coef, coef_lut_d_y_legacy, 16))
+		return "LUT D Y";
+	if (!memcmp(coef, coef_lut_d_c_legacy, 16))
+		return "LUT D C";
+	if (!memcmp(coef, coef_lut_e_y_legacy, 16))
+		return "LUT E Y";
+	if (!memcmp(coef, coef_lut_e_c_legacy, 16))
+		return "LUT E C";
+	if (!memcmp(coef, coef_lut_f_y_legacy, 16))
+		return "LUT F Y";
+	if (!memcmp(coef, coef_lut_f_c_legacy, 16))
+		return "LUT F C";
+	return "<UNKNOWN>";
+}
+
+static void hqvdp_dbg_dump_cmd(struct seq_file *s, struct sti_hqvdp_cmd *c)
+{
+	int src_w, src_h, dst_w, dst_h;
+
+	seq_puts(s, "\n\tTOP:");
+	seq_printf(s, "\n\t %-20s 0x%08X", "Config", c->top.config);
+	switch (c->top.config) {
+	case TOP_CONFIG_PROGRESSIVE:
+		seq_puts(s, "\tProgressive");
+		break;
+	case TOP_CONFIG_INTER_TOP:
+		seq_puts(s, "\tInterlaced, top field");
+		break;
+	case TOP_CONFIG_INTER_BTM:
+		seq_puts(s, "\tInterlaced, bottom field");
+		break;
+	default:
+		seq_puts(s, "\t<UNKNOWN>");
+		break;
+	}
+
+	seq_printf(s, "\n\t %-20s 0x%08X", "MemFormat", c->top.mem_format);
+	seq_printf(s, "\n\t %-20s 0x%08X", "CurrentY", c->top.current_luma);
+	seq_printf(s, "\n\t %-20s 0x%08X", "CurrentC", c->top.current_chroma);
+	seq_printf(s, "\n\t %-20s 0x%08X", "YSrcPitch", c->top.luma_src_pitch);
+	seq_printf(s, "\n\t %-20s 0x%08X", "CSrcPitch",
+		   c->top.chroma_src_pitch);
+	seq_printf(s, "\n\t %-20s 0x%08X", "InputFrameSize",
+		   c->top.input_frame_size);
+	seq_printf(s, "\t%dx%d",
+		   c->top.input_frame_size & 0x0000FFFF,
+		   c->top.input_frame_size >> 16);
+	seq_printf(s, "\n\t %-20s 0x%08X", "InputViewportSize",
+		   c->top.input_viewport_size);
+	src_w = c->top.input_viewport_size & 0x0000FFFF;
+	src_h = c->top.input_viewport_size >> 16;
+	seq_printf(s, "\t%dx%d", src_w, src_h);
+
+	seq_puts(s, "\n\tHVSRC:");
+	seq_printf(s, "\n\t %-20s 0x%08X", "OutputPictureSize",
+		   c->hvsrc.output_picture_size);
+	dst_w = c->hvsrc.output_picture_size & 0x0000FFFF;
+	dst_h = c->hvsrc.output_picture_size >> 16;
+	seq_printf(s, "\t%dx%d", dst_w, dst_h);
+	seq_printf(s, "\n\t %-20s 0x%08X", "ParamCtrl", c->hvsrc.param_ctrl);
+
+	seq_printf(s, "\n\t %-20s %s", "yh_coef",
+		   hqvdp_dbg_get_lut(c->hvsrc.yh_coef));
+	seq_printf(s, "\n\t %-20s %s", "ch_coef",
+		   hqvdp_dbg_get_lut(c->hvsrc.ch_coef));
+	seq_printf(s, "\n\t %-20s %s", "yv_coef",
+		   hqvdp_dbg_get_lut(c->hvsrc.yv_coef));
+	seq_printf(s, "\n\t %-20s %s", "cv_coef",
+		   hqvdp_dbg_get_lut(c->hvsrc.cv_coef));
+
+	seq_printf(s, "\n\t %-20s", "ScaleH");
+	if (dst_w > src_w)
+		seq_printf(s, " %d/1", dst_w / src_w);
+	else
+		seq_printf(s, " 1/%d", src_w / dst_w);
+
+	seq_printf(s, "\n\t %-20s", "tScaleV");
+	if (dst_h > src_h)
+		seq_printf(s, " %d/1", dst_h / src_h);
+	else
+		seq_printf(s, " 1/%d", src_h / dst_h);
+
+	seq_puts(s, "\n\tCSDI:");
+	seq_printf(s, "\n\t %-20s 0x%08X\t", "Config", c->csdi.config);
+	switch (c->csdi.config) {
+	case CSDI_CONFIG_PROG:
+		seq_puts(s, "Bypass");
+		break;
+	case CSDI_CONFIG_INTER_DIR:
+		seq_puts(s, "Deinterlace, directional");
+		break;
+	default:
+		seq_puts(s, "<UNKNOWN>");
+		break;
+	}
+
+	seq_printf(s, "\n\t %-20s 0x%08X", "Config2", c->csdi.config2);
+	seq_printf(s, "\n\t %-20s 0x%08X", "DcdiConfig", c->csdi.dcdi_config);
+}
+
+static int hqvdp_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_hqvdp *hqvdp = (struct sti_hqvdp *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int cmd, cmd_offset, infoxp70;
+	void *virt;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s: (vaddr = 0x%p)",
+		   sti_plane_to_str(&hqvdp->plane), hqvdp->regs);
+
+	DBGFS_DUMP(HQVDP_MBX_IRQ_TO_XP70);
+	DBGFS_DUMP(HQVDP_MBX_INFO_HOST);
+	DBGFS_DUMP(HQVDP_MBX_IRQ_TO_HOST);
+	DBGFS_DUMP(HQVDP_MBX_INFO_XP70);
+	infoxp70 = readl(hqvdp->regs + HQVDP_MBX_INFO_XP70);
+	seq_puts(s, "\tFirmware state: ");
+	if (infoxp70 & INFO_XP70_FW_READY)
+		seq_puts(s, "idle and ready");
+	else if (infoxp70 & INFO_XP70_FW_PROCESSING)
+		seq_puts(s, "processing a picture");
+	else if (infoxp70 & INFO_XP70_FW_INITQUEUES)
+		seq_puts(s, "programming queues");
+	else
+		seq_puts(s, "NOT READY");
+
+	DBGFS_DUMP(HQVDP_MBX_SW_RESET_CTRL);
+	DBGFS_DUMP(HQVDP_MBX_STARTUP_CTRL1);
+	if (readl(hqvdp->regs + HQVDP_MBX_STARTUP_CTRL1)
+					& STARTUP_CTRL1_RST_DONE)
+		seq_puts(s, "\tReset is done");
+	else
+		seq_puts(s, "\tReset is NOT done");
+	DBGFS_DUMP(HQVDP_MBX_STARTUP_CTRL2);
+	if (readl(hqvdp->regs + HQVDP_MBX_STARTUP_CTRL2)
+					& STARTUP_CTRL2_FETCH_EN)
+		seq_puts(s, "\tFetch is enabled");
+	else
+		seq_puts(s, "\tFetch is NOT enabled");
+	DBGFS_DUMP(HQVDP_MBX_GP_STATUS);
+	DBGFS_DUMP(HQVDP_MBX_NEXT_CMD);
+	DBGFS_DUMP(HQVDP_MBX_CURRENT_CMD);
+	DBGFS_DUMP(HQVDP_MBX_SOFT_VSYNC);
+	if (!(readl(hqvdp->regs + HQVDP_MBX_SOFT_VSYNC) & 3))
+		seq_puts(s, "\tHW Vsync");
+	else
+		seq_puts(s, "\tSW Vsync ?!?!");
+
+	/* Last command */
+	cmd = readl(hqvdp->regs + HQVDP_MBX_CURRENT_CMD);
+	cmd_offset = sti_hqvdp_get_curr_cmd(hqvdp);
+	if (cmd_offset == -1) {
+		seq_puts(s, "\n\n  Last command: unknown");
+	} else {
+		virt = hqvdp->hqvdp_cmd + cmd_offset;
+		seq_printf(s, "\n\n  Last command: address @ 0x%x (0x%p)",
+			   cmd, virt);
+		hqvdp_dbg_dump_cmd(s, (struct sti_hqvdp_cmd *)virt);
+	}
+
+	/* Next command */
+	cmd = readl(hqvdp->regs + HQVDP_MBX_NEXT_CMD);
+	cmd_offset = sti_hqvdp_get_next_cmd(hqvdp);
+	if (cmd_offset == -1) {
+		seq_puts(s, "\n\n  Next command: unknown");
+	} else {
+		virt = hqvdp->hqvdp_cmd + cmd_offset;
+		seq_printf(s, "\n\n  Next command address: @ 0x%x (0x%p)",
+			   cmd, virt);
+		hqvdp_dbg_dump_cmd(s, (struct sti_hqvdp_cmd *)virt);
+	}
+
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list hqvdp_debugfs_files[] = {
+	{ "hqvdp", hqvdp_dbg_show, 0, NULL },
+};
+
+static int hqvdp_debugfs_init(struct sti_hqvdp *hqvdp, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(hqvdp_debugfs_files); i++)
+		hqvdp_debugfs_files[i].data = hqvdp;
+
+	return drm_debugfs_create_files(hqvdp_debugfs_files,
+					ARRAY_SIZE(hqvdp_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
+/**
  * sti_hqvdp_update_hvsrc
  * @orient: horizontal or vertical
  * @scale:  scaling/zoom factor
@@ -580,7 +813,7 @@ int sti_hqvdp_vtg_cb(struct notifier_block *nb, unsigned long evt, void *data)
 		btm_cmd_offset = sti_hqvdp_get_free_cmd(hqvdp);
 		top_cmd_offest = sti_hqvdp_get_curr_cmd(hqvdp);
 		if ((btm_cmd_offset == -1) || (top_cmd_offest == -1)) {
-			DRM_ERROR("Cannot get cmds, skip btm field\n");
+			DRM_DEBUG_DRIVER("Warning: no cmd, will skip field\n");
 			return -EBUSY;
 		}
 
@@ -599,11 +832,12 @@ int sti_hqvdp_vtg_cb(struct notifier_block *nb, unsigned long evt, void *data)
 		writel(hqvdp->hqvdp_cmd_paddr + btm_cmd_offset,
 				hqvdp->regs + HQVDP_MBX_NEXT_CMD);
 
-		hqvdp->curr_field_count++;
 		hqvdp->btm_field_pending = false;
 
 		dev_dbg(hqvdp->dev, "%s Posted command:0x%x\n",
 				__func__, hqvdp->hqvdp_cmd_paddr);
+
+		sti_plane_update_fps(&hqvdp->plane, false, true);
 	}
 
 	return 0;
@@ -612,19 +846,21 @@ int sti_hqvdp_vtg_cb(struct notifier_block *nb, unsigned long evt, void *data)
 static void sti_hqvdp_init(struct sti_hqvdp *hqvdp)
 {
 	int size;
+	dma_addr_t dma_addr;
 
 	hqvdp->vtg_nb.notifier_call = sti_hqvdp_vtg_cb;
 
 	/* Allocate memory for the VDP commands */
 	size = NB_VDP_CMD * sizeof(struct sti_hqvdp_cmd);
-	hqvdp->hqvdp_cmd = dma_alloc_writecombine(hqvdp->dev, size,
-					 &hqvdp->hqvdp_cmd_paddr,
-					 GFP_KERNEL | GFP_DMA);
+	hqvdp->hqvdp_cmd = dma_alloc_wc(hqvdp->dev, size,
+					&dma_addr,
+					GFP_KERNEL | GFP_DMA);
 	if (!hqvdp->hqvdp_cmd) {
 		DRM_ERROR("Failed to allocate memory for VDP cmd\n");
 		return;
 	}
 
+	hqvdp->hqvdp_cmd_paddr = (u32)dma_addr;
 	memset(hqvdp->hqvdp_cmd, 0, size);
 }
 
@@ -670,7 +906,7 @@ static void sti_hqvdp_start_xp70(struct sti_hqvdp *hqvdp)
 	DRM_DEBUG_DRIVER("\n");
 
 	if (hqvdp->xp70_initialized) {
-		DRM_INFO("HQVDP XP70 already initialized\n");
+		DRM_DEBUG_DRIVER("HQVDP XP70 already initialized\n");
 		return;
 	}
 
@@ -775,53 +1011,131 @@ out:
 	release_firmware(firmware);
 }
 
-static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
-				    struct drm_plane_state *oldstate)
+static int sti_hqvdp_atomic_check(struct drm_plane *drm_plane,
+				  struct drm_plane_state *state)
 {
-	struct drm_plane_state *state = drm_plane->state;
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_hqvdp *hqvdp = to_sti_hqvdp(plane);
 	struct drm_crtc *crtc = state->crtc;
-	struct sti_mixer *mixer = to_sti_mixer(crtc);
 	struct drm_framebuffer *fb = state->fb;
-	struct drm_display_mode *mode = &crtc->mode;
-	int dst_x = state->crtc_x;
-	int dst_y = state->crtc_y;
-	int dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
-	int dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
-	/* src_x are in 16.16 format */
-	int src_x = state->src_x >> 16;
-	int src_y = state->src_y >> 16;
-	int src_w = state->src_w >> 16;
-	int src_h = state->src_h >> 16;
 	bool first_prepare = plane->status == STI_PLANE_DISABLED ? true : false;
-	struct drm_gem_cma_object *cma_obj;
-	struct sti_hqvdp_cmd *cmd;
-	int scale_h, scale_v;
-	int cmd_offset;
+	struct drm_crtc_state *crtc_state;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y, dst_w, dst_h;
+	int src_x, src_y, src_w, src_h;
+
+	/* no need for further checks if the plane is being disabled */
+	if (!crtc || !fb)
+		return 0;
+
+	crtc_state = drm_atomic_get_crtc_state(state->state, crtc);
+	mode = &crtc_state->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+	dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
+	dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	/* src_x are in 16.16 format */
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+
+	if (!sti_hqvdp_check_hw_scaling(hqvdp, mode,
+					src_w, src_h,
+					dst_w, dst_h)) {
+		DRM_ERROR("Scaling beyond HW capabilities\n");
+		return -EINVAL;
+	}
+
+	if (!drm_fb_cma_get_gem_obj(fb, 0)) {
+		DRM_ERROR("Can't get CMA GEM object for fb\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Input / output size
+	 * Align to upper even value
+	 */
+	dst_w = ALIGN(dst_w, 2);
+	dst_h = ALIGN(dst_h, 2);
+
+	if ((src_w > MAX_WIDTH) || (src_w < MIN_WIDTH) ||
+	    (src_h > MAX_HEIGHT) || (src_h < MIN_HEIGHT) ||
+	    (dst_w > MAX_WIDTH) || (dst_w < MIN_WIDTH) ||
+	    (dst_h > MAX_HEIGHT) || (dst_h < MIN_HEIGHT)) {
+		DRM_ERROR("Invalid in/out size %dx%d -> %dx%d\n",
+			  src_w, src_h,
+			  dst_w, dst_h);
+		return -EINVAL;
+	}
+
+	if (first_prepare) {
+		/* Start HQVDP XP70 coprocessor */
+		sti_hqvdp_start_xp70(hqvdp);
+
+		/* Prevent VTG shutdown */
+		if (clk_prepare_enable(hqvdp->clk_pix_main)) {
+			DRM_ERROR("Failed to prepare/enable pix main clk\n");
+			return -EINVAL;
+		}
+
+		/* Register VTG Vsync callback to handle bottom fields */
+		if (sti_vtg_register_client(hqvdp->vtg,
+					    &hqvdp->vtg_nb,
+					    crtc)) {
+			DRM_ERROR("Cannot register VTG notifier\n");
+			return -EINVAL;
+		}
+	}
 
 	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
-		      crtc->base.id, sti_mixer_to_str(mixer),
+		      crtc->base.id, sti_mixer_to_str(to_sti_mixer(crtc)),
 		      drm_plane->base.id, sti_plane_to_str(plane));
 	DRM_DEBUG_KMS("%s dst=(%dx%d)@(%d,%d) - src=(%dx%d)@(%d,%d)\n",
 		      sti_plane_to_str(plane),
 		      dst_w, dst_h, dst_x, dst_y,
 		      src_w, src_h, src_x, src_y);
 
+	return 0;
+}
+
+static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
+				    struct drm_plane_state *oldstate)
+{
+	struct drm_plane_state *state = drm_plane->state;
+	struct sti_plane *plane = to_sti_plane(drm_plane);
+	struct sti_hqvdp *hqvdp = to_sti_hqvdp(plane);
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y, dst_w, dst_h;
+	int src_x, src_y, src_w, src_h;
+	struct drm_gem_cma_object *cma_obj;
+	struct sti_hqvdp_cmd *cmd;
+	int scale_h, scale_v;
+	int cmd_offset;
+
+	if (!crtc || !fb)
+		return;
+
+	mode = &crtc->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+	dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
+	dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	/* src_x are in 16.16 format */
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+
 	cmd_offset = sti_hqvdp_get_free_cmd(hqvdp);
 	if (cmd_offset == -1) {
-		DRM_ERROR("No available hqvdp_cmd now\n");
+		DRM_DEBUG_DRIVER("Warning: no cmd, will skip frame\n");
 		return;
 	}
 	cmd = hqvdp->hqvdp_cmd + cmd_offset;
 
-	if (!sti_hqvdp_check_hw_scaling(hqvdp, mode,
-					src_w, src_h,
-					dst_w, dst_h)) {
-		DRM_ERROR("Scaling beyond HW capabilities\n");
-		return;
-	}
-
 	/* Static parameters, defaulting to progressive mode */
 	cmd->top.config = TOP_CONFIG_PROGRESSIVE;
 	cmd->top.mem_format = TOP_MEM_FORMAT_DFLT;
@@ -836,10 +1150,6 @@ static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
 	cmd->iqi.pxf_conf = IQI_PXF_CONF_DFLT;
 
 	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	if (!cma_obj) {
-		DRM_ERROR("Can't get CMA GEM object for fb\n");
-		return;
-	}
 
 	DRM_DEBUG_DRIVER("drm FB:%d format:%.4s phys@:0x%lx\n", fb->base.id,
 			 (char *)&fb->pixel_format,
@@ -860,16 +1170,6 @@ static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
 	dst_w = ALIGN(dst_w, 2);
 	dst_h = ALIGN(dst_h, 2);
 
-	if ((src_w > MAX_WIDTH) || (src_w < MIN_WIDTH) ||
-	    (src_h > MAX_HEIGHT) || (src_h < MIN_HEIGHT) ||
-	    (dst_w > MAX_WIDTH) || (dst_w < MIN_WIDTH) ||
-	    (dst_h > MAX_HEIGHT) || (dst_h < MIN_HEIGHT)) {
-		DRM_ERROR("Invalid in/out size %dx%d -> %dx%d\n",
-			  src_w, src_h,
-			  dst_w, dst_h);
-		return;
-	}
-
 	cmd->top.input_viewport_size = src_h << 16 | src_w;
 	cmd->top.input_frame_size = src_h << 16 | src_w;
 	cmd->hvsrc.output_picture_size = dst_h << 16 | dst_w;
@@ -900,30 +1200,9 @@ static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
 	scale_v = SCALE_FACTOR * dst_h / src_h;
 	sti_hqvdp_update_hvsrc(HVSRC_VERT, scale_v, &cmd->hvsrc);
 
-	if (first_prepare) {
-		/* Start HQVDP XP70 coprocessor */
-		sti_hqvdp_start_xp70(hqvdp);
-
-		/* Prevent VTG shutdown */
-		if (clk_prepare_enable(hqvdp->clk_pix_main)) {
-			DRM_ERROR("Failed to prepare/enable pix main clk\n");
-			return;
-		}
-
-		/* Register VTG Vsync callback to handle bottom fields */
-		if (sti_vtg_register_client(hqvdp->vtg,
-					    &hqvdp->vtg_nb,
-					    crtc)) {
-			DRM_ERROR("Cannot register VTG notifier\n");
-			return;
-		}
-	}
-
 	writel(hqvdp->hqvdp_cmd_paddr + cmd_offset,
 	       hqvdp->regs + HQVDP_MBX_NEXT_CMD);
 
-	hqvdp->curr_field_count++;
-
 	/* Interlaced : get ready to display the bottom field at next Vsync */
 	if (fb->flags & DRM_MODE_FB_INTERLACED)
 		hqvdp->btm_field_pending = true;
@@ -931,6 +1210,8 @@ static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
 	dev_dbg(hqvdp->dev, "%s Posted command:0x%x\n",
 		__func__, hqvdp->hqvdp_cmd_paddr + cmd_offset);
 
+	sti_plane_update_fps(plane, true, true);
+
 	plane->status = STI_PLANE_UPDATED;
 }
 
@@ -938,7 +1219,6 @@ static void sti_hqvdp_atomic_disable(struct drm_plane *drm_plane,
 				     struct drm_plane_state *oldstate)
 {
 	struct sti_plane *plane = to_sti_plane(drm_plane);
-	struct sti_mixer *mixer = to_sti_mixer(drm_plane->crtc);
 
 	if (!drm_plane->crtc) {
 		DRM_DEBUG_DRIVER("drm plane:%d not enabled\n",
@@ -947,13 +1227,15 @@ static void sti_hqvdp_atomic_disable(struct drm_plane *drm_plane,
 	}
 
 	DRM_DEBUG_DRIVER("CRTC:%d (%s) drm plane:%d (%s)\n",
-			 drm_plane->crtc->base.id, sti_mixer_to_str(mixer),
+			 drm_plane->crtc->base.id,
+			 sti_mixer_to_str(to_sti_mixer(drm_plane->crtc)),
 			 drm_plane->base.id, sti_plane_to_str(plane));
 
 	plane->status = STI_PLANE_DISABLING;
 }
 
 static const struct drm_plane_helper_funcs sti_hqvdp_helpers_funcs = {
+	.atomic_check = sti_hqvdp_atomic_check,
 	.atomic_update = sti_hqvdp_atomic_update,
 	.atomic_disable = sti_hqvdp_atomic_disable,
 };
@@ -983,6 +1265,9 @@ static struct drm_plane *sti_hqvdp_create(struct drm_device *drm_dev,
 
 	sti_plane_init_property(&hqvdp->plane, DRM_PLANE_TYPE_OVERLAY);
 
+	if (hqvdp_debugfs_init(hqvdp, drm_dev->primary))
+		DRM_ERROR("HQVDP debugfs setup failed\n");
+
 	return &hqvdp->plane.drm_plane;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_mixer.c b/drivers/gpu/drm/sti/sti_mixer.c
index 49db835dce03..e7425c38fc93 100644
--- a/drivers/gpu/drm/sti/sti_mixer.c
+++ b/drivers/gpu/drm/sti/sti_mixer.c
@@ -75,6 +75,145 @@ static inline void sti_mixer_reg_write(struct sti_mixer *mixer,
 	writel(val, mixer->regs + reg_id);
 }
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   sti_mixer_reg_read(mixer, reg))
+
+static void mixer_dbg_ctl(struct seq_file *s, int val)
+{
+	unsigned int i;
+	int count = 0;
+	char *const disp_layer[] = {"BKG", "VID0", "VID1", "GDP0",
+				    "GDP1", "GDP2", "GDP3"};
+
+	seq_puts(s, "\tEnabled: ");
+	for (i = 0; i < 7; i++) {
+		if (val & 1) {
+			seq_printf(s, "%s ", disp_layer[i]);
+			count++;
+		}
+		val = val >> 1;
+	}
+
+	val = val >> 2;
+	if (val & 1) {
+		seq_puts(s, "CURS ");
+		count++;
+	}
+	if (!count)
+		seq_puts(s, "Nothing");
+}
+
+static void mixer_dbg_crb(struct seq_file *s, int val)
+{
+	int i;
+
+	seq_puts(s, "\tDepth: ");
+	for (i = 0; i < GAM_MIXER_NB_DEPTH_LEVEL; i++) {
+		switch (val & GAM_DEPTH_MASK_ID) {
+		case GAM_DEPTH_VID0_ID:
+			seq_puts(s, "VID0");
+			break;
+		case GAM_DEPTH_VID1_ID:
+			seq_puts(s, "VID1");
+			break;
+		case GAM_DEPTH_GDP0_ID:
+			seq_puts(s, "GDP0");
+			break;
+		case GAM_DEPTH_GDP1_ID:
+			seq_puts(s, "GDP1");
+			break;
+		case GAM_DEPTH_GDP2_ID:
+			seq_puts(s, "GDP2");
+			break;
+		case GAM_DEPTH_GDP3_ID:
+			seq_puts(s, "GDP3");
+			break;
+		default:
+			seq_puts(s, "---");
+		}
+
+		if (i < GAM_MIXER_NB_DEPTH_LEVEL - 1)
+			seq_puts(s, " < ");
+		val = val >> 3;
+	}
+}
+
+static void mixer_dbg_mxn(struct seq_file *s, void *addr)
+{
+	int i;
+
+	for (i = 1; i < 8; i++)
+		seq_printf(s, "-0x%08X", (int)readl(addr + i * 4));
+}
+
+static int mixer_dbg_show(struct seq_file *s, void *arg)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_mixer *mixer = (struct sti_mixer *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s: (vaddr = 0x%p)",
+		   sti_mixer_to_str(mixer), mixer->regs);
+
+	DBGFS_DUMP(GAM_MIXER_CTL);
+	mixer_dbg_ctl(s, sti_mixer_reg_read(mixer, GAM_MIXER_CTL));
+	DBGFS_DUMP(GAM_MIXER_BKC);
+	DBGFS_DUMP(GAM_MIXER_BCO);
+	DBGFS_DUMP(GAM_MIXER_BCS);
+	DBGFS_DUMP(GAM_MIXER_AVO);
+	DBGFS_DUMP(GAM_MIXER_AVS);
+	DBGFS_DUMP(GAM_MIXER_CRB);
+	mixer_dbg_crb(s, sti_mixer_reg_read(mixer, GAM_MIXER_CRB));
+	DBGFS_DUMP(GAM_MIXER_ACT);
+	DBGFS_DUMP(GAM_MIXER_MBP);
+	DBGFS_DUMP(GAM_MIXER_MX0);
+	mixer_dbg_mxn(s, mixer->regs + GAM_MIXER_MX0);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list mixer0_debugfs_files[] = {
+	{ "mixer_main", mixer_dbg_show, 0, NULL },
+};
+
+static struct drm_info_list mixer1_debugfs_files[] = {
+	{ "mixer_aux", mixer_dbg_show, 0, NULL },
+};
+
+static int mixer_debugfs_init(struct sti_mixer *mixer, struct drm_minor *minor)
+{
+	unsigned int i;
+	struct drm_info_list *mixer_debugfs_files;
+	int nb_files;
+
+	switch (mixer->id) {
+	case STI_MIXER_MAIN:
+		mixer_debugfs_files = mixer0_debugfs_files;
+		nb_files = ARRAY_SIZE(mixer0_debugfs_files);
+		break;
+	case STI_MIXER_AUX:
+		mixer_debugfs_files = mixer1_debugfs_files;
+		nb_files = ARRAY_SIZE(mixer1_debugfs_files);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nb_files; i++)
+		mixer_debugfs_files[i].data = mixer;
+
+	return drm_debugfs_create_files(mixer_debugfs_files,
+					nb_files,
+					minor->debugfs_root, minor);
+}
+
 void sti_mixer_set_background_status(struct sti_mixer *mixer, bool enable)
 {
 	u32 val = sti_mixer_reg_read(mixer, GAM_MIXER_CTL);
@@ -237,7 +376,9 @@ void sti_mixer_set_matrix(struct sti_mixer *mixer)
 				    mixerColorSpaceMatIdentity[i]);
 }
 
-struct sti_mixer *sti_mixer_create(struct device *dev, int id,
+struct sti_mixer *sti_mixer_create(struct device *dev,
+				   struct drm_device *drm_dev,
+				   int id,
 				   void __iomem *baseaddr)
 {
 	struct sti_mixer *mixer = devm_kzalloc(dev, sizeof(*mixer), GFP_KERNEL);
@@ -258,5 +399,8 @@ struct sti_mixer *sti_mixer_create(struct device *dev, int id,
 	DRM_DEBUG_DRIVER("%s created. Regs=%p\n",
 			 sti_mixer_to_str(mixer), mixer->regs);
 
+	if (mixer_debugfs_init(mixer, drm_dev->primary))
+		DRM_ERROR("MIXER debugfs setup failed\n");
+
 	return mixer;
 }
diff --git a/drivers/gpu/drm/sti/sti_mixer.h b/drivers/gpu/drm/sti/sti_mixer.h
index efb1a9a5ba86..6f35fc086873 100644
--- a/drivers/gpu/drm/sti/sti_mixer.h
+++ b/drivers/gpu/drm/sti/sti_mixer.h
@@ -42,7 +42,9 @@ struct sti_mixer {
 
 const char *sti_mixer_to_str(struct sti_mixer *mixer);
 
-struct sti_mixer *sti_mixer_create(struct device *dev, int id,
+struct sti_mixer *sti_mixer_create(struct device *dev,
+				   struct drm_device *drm_dev,
+				   int id,
 				   void __iomem *baseaddr);
 
 int sti_mixer_set_plane_status(struct sti_mixer *mixer,
diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c
index 2e5c751910c5..f10c98d3f012 100644
--- a/drivers/gpu/drm/sti/sti_plane.c
+++ b/drivers/gpu/drm/sti/sti_plane.c
@@ -43,6 +43,69 @@ const char *sti_plane_to_str(struct sti_plane *plane)
 	}
 }
 
+#define STI_FPS_INTERVAL_MS     3000
+
+static int sti_plane_timespec_ms_diff(struct timespec lhs, struct timespec rhs)
+{
+	struct timespec tmp_ts = timespec_sub(lhs, rhs);
+	u64 tmp_ns = (u64)timespec_to_ns(&tmp_ts);
+
+	do_div(tmp_ns, NSEC_PER_MSEC);
+
+	return (u32)tmp_ns;
+}
+
+void sti_plane_update_fps(struct sti_plane *plane,
+			  bool new_frame,
+			  bool new_field)
+{
+	struct timespec now;
+	struct sti_fps_info *fps;
+	int fpks, fipks, ms_since_last, num_frames, num_fields;
+
+	getrawmonotonic(&now);
+
+	/* Compute number of frame updates */
+	fps = &plane->fps_info;
+
+	if (new_field)
+		fps->curr_field_counter++;
+
+	/* do not perform fps calcul if new_frame is false */
+	if (!new_frame)
+		return;
+
+	fps->curr_frame_counter++;
+	ms_since_last = sti_plane_timespec_ms_diff(now, fps->last_timestamp);
+	num_frames = fps->curr_frame_counter - fps->last_frame_counter;
+
+	if (num_frames <= 0  || ms_since_last < STI_FPS_INTERVAL_MS)
+		return;
+
+	fps->last_timestamp = now;
+	fps->last_frame_counter = fps->curr_frame_counter;
+	fpks = (num_frames * 1000000) / ms_since_last;
+	snprintf(plane->fps_info.fps_str, FPS_LENGTH, "%-6s @ %d.%.3d fps",
+		 sti_plane_to_str(plane), fpks / 1000, fpks % 1000);
+
+	if (fps->curr_field_counter) {
+		/* Compute number of field updates */
+		num_fields = fps->curr_field_counter - fps->last_field_counter;
+		fps->last_field_counter = fps->curr_field_counter;
+		fipks = (num_fields * 1000000) / ms_since_last;
+		snprintf(plane->fps_info.fips_str,
+			 FPS_LENGTH, " - %d.%.3d field/sec",
+			 fipks / 1000, fipks % 1000);
+	} else {
+		plane->fps_info.fips_str[0] = '\0';
+	}
+
+	if (fps->output)
+		DRM_INFO("%s%s\n",
+			 plane->fps_info.fps_str,
+			 plane->fps_info.fips_str);
+}
+
 static void sti_plane_destroy(struct drm_plane *drm_plane)
 {
 	DRM_DEBUG_DRIVER("\n");
diff --git a/drivers/gpu/drm/sti/sti_plane.h b/drivers/gpu/drm/sti/sti_plane.h
index 86f1e6fc81b9..c50a3b9f5d37 100644
--- a/drivers/gpu/drm/sti/sti_plane.h
+++ b/drivers/gpu/drm/sti/sti_plane.h
@@ -50,6 +50,18 @@ enum sti_plane_status {
 	STI_PLANE_DISABLED,
 };
 
+#define FPS_LENGTH 64
+struct sti_fps_info {
+	bool output;
+	unsigned int curr_frame_counter;
+	unsigned int last_frame_counter;
+	unsigned int curr_field_counter;
+	unsigned int last_field_counter;
+	struct timespec last_timestamp;
+	char fps_str[FPS_LENGTH];
+	char fips_str[FPS_LENGTH];
+};
+
 /**
  * STI plane structure
  *
@@ -57,15 +69,20 @@ enum sti_plane_status {
  * @desc:               plane type & id
  * @status:             to know the status of the plane
  * @zorder:             plane z-order
+ * @fps_info:           frame per second info
  */
 struct sti_plane {
 	struct drm_plane drm_plane;
 	enum sti_plane_desc desc;
 	enum sti_plane_status status;
 	int zorder;
+	struct sti_fps_info fps_info;
 };
 
 const char *sti_plane_to_str(struct sti_plane *plane);
+void sti_plane_update_fps(struct sti_plane *plane,
+			  bool new_frame,
+			  bool new_field);
 void sti_plane_init_property(struct sti_plane *plane,
 			     enum drm_plane_type type);
 #endif
diff --git a/drivers/gpu/drm/sti/sti_tvout.c b/drivers/gpu/drm/sti/sti_tvout.c
index 24a3735b88fd..2c99016443e5 100644
--- a/drivers/gpu/drm/sti/sti_tvout.c
+++ b/drivers/gpu/drm/sti/sti_tvout.c
@@ -17,6 +17,7 @@
 #include <drm/drm_crtc_helper.h>
 
 #include "sti_crtc.h"
+#include "sti_vtg.h"
 
 /* glue registers */
 #define TVO_CSC_MAIN_M0                  0x000
@@ -85,19 +86,7 @@
 #define TVO_VIP_SEL_INPUT_BYPASSED       1
 
 #define TVO_SYNC_MAIN_VTG_SET_REF        0x00
-#define TVO_SYNC_MAIN_VTG_SET_1          0x01
-#define TVO_SYNC_MAIN_VTG_SET_2          0x02
-#define TVO_SYNC_MAIN_VTG_SET_3          0x03
-#define TVO_SYNC_MAIN_VTG_SET_4          0x04
-#define TVO_SYNC_MAIN_VTG_SET_5          0x05
-#define TVO_SYNC_MAIN_VTG_SET_6          0x06
 #define TVO_SYNC_AUX_VTG_SET_REF         0x10
-#define TVO_SYNC_AUX_VTG_SET_1           0x11
-#define TVO_SYNC_AUX_VTG_SET_2           0x12
-#define TVO_SYNC_AUX_VTG_SET_3           0x13
-#define TVO_SYNC_AUX_VTG_SET_4           0x14
-#define TVO_SYNC_AUX_VTG_SET_5           0x15
-#define TVO_SYNC_AUX_VTG_SET_6           0x16
 
 #define TVO_SYNC_HD_DCS_SHIFT            8
 
@@ -106,6 +95,8 @@
 
 #define ENCODER_CRTC_MASK                (BIT(0) | BIT(1))
 
+#define TVO_MIN_HD_HEIGHT                720
+
 /* enum listing the supported output data format */
 enum sti_tvout_video_out_type {
 	STI_TVOUT_VIDEO_OUT_RGB,
@@ -269,6 +260,31 @@ static void tvout_vip_set_in_vid_fmt(struct sti_tvout *tvout,
 }
 
 /**
+ * Set preformatter matrix
+ *
+ * @tvout: tvout structure
+ * @mode: display mode structure
+ */
+static void tvout_preformatter_set_matrix(struct sti_tvout *tvout,
+					  struct drm_display_mode *mode)
+{
+	unsigned int i;
+	const u32 *pf_matrix;
+
+	if (mode->vdisplay >= TVO_MIN_HD_HEIGHT)
+		pf_matrix = rgb_to_ycbcr_709;
+	else
+		pf_matrix = rgb_to_ycbcr_601;
+
+	for (i = 0; i < 8; i++) {
+		tvout_write(tvout, *(pf_matrix + i),
+			    TVO_CSC_MAIN_M0 + (i * 4));
+		tvout_write(tvout, *(pf_matrix + i),
+			    TVO_CSC_AUX_M0 + (i * 4));
+	}
+}
+
+/**
  * Start VIP block for DVO output
  *
  * @tvout: pointer on tvout structure
@@ -280,24 +296,26 @@ static void tvout_dvo_start(struct sti_tvout *tvout, bool main_path)
 	struct device_node *node = tvout->dev->of_node;
 	bool sel_input_logic_inverted = false;
 	u32 tvo_in_vid_format;
-	int val;
+	int val, tmp;
 
 	dev_dbg(tvout->dev, "%s\n", __func__);
 
 	if (main_path) {
 		DRM_DEBUG_DRIVER("main vip for DVO\n");
-		/* Select the input sync for dvo = VTG set 4 */
-		val  = TVO_SYNC_MAIN_VTG_SET_4 << TVO_SYNC_DVO_PAD_VSYNC_SHIFT;
-		val |= TVO_SYNC_MAIN_VTG_SET_4 << TVO_SYNC_DVO_PAD_HSYNC_SHIFT;
-		val |= TVO_SYNC_MAIN_VTG_SET_4;
+		/* Select the input sync for dvo */
+		tmp = TVO_SYNC_MAIN_VTG_SET_REF | VTG_SYNC_ID_DVO;
+		val  = tmp << TVO_SYNC_DVO_PAD_VSYNC_SHIFT;
+		val |= tmp << TVO_SYNC_DVO_PAD_HSYNC_SHIFT;
+		val |= tmp;
 		tvout_write(tvout, val, TVO_DVO_SYNC_SEL);
 		tvo_in_vid_format = TVO_MAIN_IN_VID_FORMAT;
 	} else {
 		DRM_DEBUG_DRIVER("aux vip for DVO\n");
-		/* Select the input sync for dvo = VTG set 4 */
-		val  = TVO_SYNC_AUX_VTG_SET_4 << TVO_SYNC_DVO_PAD_VSYNC_SHIFT;
-		val |= TVO_SYNC_AUX_VTG_SET_4 << TVO_SYNC_DVO_PAD_HSYNC_SHIFT;
-		val |= TVO_SYNC_AUX_VTG_SET_4;
+		/* Select the input sync for dvo */
+		tmp = TVO_SYNC_AUX_VTG_SET_REF | VTG_SYNC_ID_DVO;
+		val  = tmp << TVO_SYNC_DVO_PAD_VSYNC_SHIFT;
+		val |= tmp << TVO_SYNC_DVO_PAD_HSYNC_SHIFT;
+		val |= tmp;
 		tvout_write(tvout, val, TVO_DVO_SYNC_SEL);
 		tvo_in_vid_format = TVO_AUX_IN_VID_FORMAT;
 	}
@@ -308,9 +326,8 @@ static void tvout_dvo_start(struct sti_tvout *tvout, bool main_path)
 				  TVO_VIP_REORDER_Y_G_SEL,
 				  TVO_VIP_REORDER_CB_B_SEL);
 
-	/* Set clipping mode (Limited range RGB/Y) */
-	tvout_vip_set_clip_mode(tvout, TVO_VIP_DVO,
-				TVO_VIP_CLIP_LIMITED_RANGE_RGB_Y);
+	/* Set clipping mode */
+	tvout_vip_set_clip_mode(tvout, TVO_VIP_DVO, TVO_VIP_CLIP_DISABLED);
 
 	/* Set round mode (rounded to 8-bit per component) */
 	tvout_vip_set_rnd(tvout, TVO_VIP_DVO, TVO_VIP_RND_8BIT_ROUNDED);
@@ -345,13 +362,17 @@ static void tvout_hdmi_start(struct sti_tvout *tvout, bool main_path)
 
 	if (main_path) {
 		DRM_DEBUG_DRIVER("main vip for hdmi\n");
-		/* select the input sync for hdmi = VTG set 1 */
-		tvout_write(tvout, TVO_SYNC_MAIN_VTG_SET_1, TVO_HDMI_SYNC_SEL);
+		/* select the input sync for hdmi */
+		tvout_write(tvout,
+			    TVO_SYNC_MAIN_VTG_SET_REF | VTG_SYNC_ID_HDMI,
+			    TVO_HDMI_SYNC_SEL);
 		tvo_in_vid_format = TVO_MAIN_IN_VID_FORMAT;
 	} else {
 		DRM_DEBUG_DRIVER("aux vip for hdmi\n");
-		/* select the input sync for hdmi = VTG set 1 */
-		tvout_write(tvout, TVO_SYNC_AUX_VTG_SET_1, TVO_HDMI_SYNC_SEL);
+		/* select the input sync for hdmi */
+		tvout_write(tvout,
+			    TVO_SYNC_AUX_VTG_SET_REF | VTG_SYNC_ID_HDMI,
+			    TVO_HDMI_SYNC_SEL);
 		tvo_in_vid_format = TVO_AUX_IN_VID_FORMAT;
 	}
 
@@ -361,9 +382,8 @@ static void tvout_hdmi_start(struct sti_tvout *tvout, bool main_path)
 				  TVO_VIP_REORDER_Y_G_SEL,
 				  TVO_VIP_REORDER_CB_B_SEL);
 
-	/* set clipping mode (Limited range RGB/Y) */
-	tvout_vip_set_clip_mode(tvout, TVO_VIP_HDMI,
-			TVO_VIP_CLIP_LIMITED_RANGE_RGB_Y);
+	/* set clipping mode */
+	tvout_vip_set_clip_mode(tvout, TVO_VIP_HDMI, TVO_VIP_CLIP_DISABLED);
 
 	/* set round mode (rounded to 8-bit per component) */
 	tvout_vip_set_rnd(tvout, TVO_VIP_HDMI, TVO_VIP_RND_8BIT_ROUNDED);
@@ -397,13 +417,19 @@ static void tvout_hda_start(struct sti_tvout *tvout, bool main_path)
 	dev_dbg(tvout->dev, "%s\n", __func__);
 
 	if (main_path) {
-		val = TVO_SYNC_MAIN_VTG_SET_2 << TVO_SYNC_HD_DCS_SHIFT;
-		val |= TVO_SYNC_MAIN_VTG_SET_3;
+		DRM_DEBUG_DRIVER("main vip for HDF\n");
+		/* Select the input sync for HD analog and HD DCS */
+		val  = TVO_SYNC_MAIN_VTG_SET_REF | VTG_SYNC_ID_HDDCS;
+		val  = val << TVO_SYNC_HD_DCS_SHIFT;
+		val |= TVO_SYNC_MAIN_VTG_SET_REF | VTG_SYNC_ID_HDF;
 		tvout_write(tvout, val, TVO_HD_SYNC_SEL);
 		tvo_in_vid_format = TVO_MAIN_IN_VID_FORMAT;
 	} else {
-		val = TVO_SYNC_AUX_VTG_SET_2 << TVO_SYNC_HD_DCS_SHIFT;
-		val |= TVO_SYNC_AUX_VTG_SET_3;
+		DRM_DEBUG_DRIVER("aux vip for HDF\n");
+		/* Select the input sync for HD analog and HD DCS */
+		val  = TVO_SYNC_AUX_VTG_SET_REF | VTG_SYNC_ID_HDDCS;
+		val  = val << TVO_SYNC_HD_DCS_SHIFT;
+		val |= TVO_SYNC_AUX_VTG_SET_REF | VTG_SYNC_ID_HDF;
 		tvout_write(tvout, val, TVO_HD_SYNC_SEL);
 		tvo_in_vid_format = TVO_AUX_IN_VID_FORMAT;
 	}
@@ -414,8 +440,8 @@ static void tvout_hda_start(struct sti_tvout *tvout, bool main_path)
 				  TVO_VIP_REORDER_Y_G_SEL,
 				  TVO_VIP_REORDER_CB_B_SEL);
 
-	/* set clipping mode (EAV/SAV clipping) */
-	tvout_vip_set_clip_mode(tvout, TVO_VIP_HDF, TVO_VIP_CLIP_EAV_SAV);
+	/* set clipping mode */
+	tvout_vip_set_clip_mode(tvout, TVO_VIP_HDF, TVO_VIP_CLIP_DISABLED);
 
 	/* set round mode (rounded to 10-bit per component) */
 	tvout_vip_set_rnd(tvout, TVO_VIP_HDF, TVO_VIP_RND_10BIT_ROUNDED);
@@ -436,6 +462,157 @@ static void tvout_hda_start(struct sti_tvout *tvout, bool main_path)
 	tvout_write(tvout, 0, TVO_HD_DAC_CFG_OFF);
 }
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(tvout->regs + reg))
+
+static void tvout_dbg_vip(struct seq_file *s, int val)
+{
+	int r, g, b, tmp, mask;
+	char *const reorder[] = {"Y_G", "Cb_B", "Cr_R"};
+	char *const clipping[] = {"No", "EAV/SAV", "Limited range RGB/Y",
+				  "Limited range Cb/Cr", "decided by register"};
+	char *const round[] = {"8-bit", "10-bit", "12-bit"};
+	char *const input_sel[] = {"Main (color matrix enabled)",
+				   "Main (color matrix by-passed)",
+				   "", "", "", "", "", "",
+				   "Aux (color matrix enabled)",
+				   "Aux (color matrix by-passed)",
+				   "", "", "", "", "", "Force value"};
+
+	seq_puts(s, "\t");
+	mask = TVO_VIP_REORDER_MASK << TVO_VIP_REORDER_R_SHIFT;
+	r = (val & mask) >> TVO_VIP_REORDER_R_SHIFT;
+	mask = TVO_VIP_REORDER_MASK << TVO_VIP_REORDER_G_SHIFT;
+	g = (val & mask) >> TVO_VIP_REORDER_G_SHIFT;
+	mask = TVO_VIP_REORDER_MASK << TVO_VIP_REORDER_B_SHIFT;
+	b = (val & mask) >> TVO_VIP_REORDER_B_SHIFT;
+	seq_printf(s, "%-24s %s->%s %s->%s %s->%s\n", "Reorder:",
+		   reorder[r], reorder[TVO_VIP_REORDER_CR_R_SEL],
+		   reorder[g], reorder[TVO_VIP_REORDER_Y_G_SEL],
+		   reorder[b], reorder[TVO_VIP_REORDER_CB_B_SEL]);
+	seq_puts(s, "\t\t\t\t\t");
+	mask = TVO_VIP_CLIP_MASK << TVO_VIP_CLIP_SHIFT;
+	tmp = (val & mask) >> TVO_VIP_CLIP_SHIFT;
+	seq_printf(s, "%-24s %s\n", "Clipping:", clipping[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	mask = TVO_VIP_RND_MASK << TVO_VIP_RND_SHIFT;
+	tmp = (val & mask) >> TVO_VIP_RND_SHIFT;
+	seq_printf(s, "%-24s input data rounded to %s per component\n",
+		   "Round:", round[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & TVO_VIP_SEL_INPUT_MASK);
+	seq_printf(s, "%-24s %s", "Input selection:", input_sel[tmp]);
+}
+
+static void tvout_dbg_hd_dac_cfg(struct seq_file *s, int val)
+{
+	seq_printf(s, "\t%-24s %s", "HD DAC:",
+		   val & 1 ? "disabled" : "enabled");
+}
+
+static int tvout_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_tvout *tvout = (struct sti_tvout *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_crtc *crtc;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "TVOUT: (vaddr = 0x%p)", tvout->regs);
+
+	seq_puts(s, "\n\n  HDMI encoder: ");
+	crtc = tvout->hdmi->crtc;
+	if (crtc) {
+		seq_printf(s, "connected to %s path",
+			   sti_crtc_is_main(crtc) ? "main" : "aux");
+		DBGFS_DUMP(TVO_HDMI_SYNC_SEL);
+		DBGFS_DUMP(TVO_VIP_HDMI);
+		tvout_dbg_vip(s, readl(tvout->regs + TVO_VIP_HDMI));
+	} else {
+		seq_puts(s, "disabled");
+	}
+
+	seq_puts(s, "\n\n  DVO encoder: ");
+	crtc = tvout->dvo->crtc;
+	if (crtc) {
+		seq_printf(s, "connected to %s path",
+			   sti_crtc_is_main(crtc) ? "main" : "aux");
+		DBGFS_DUMP(TVO_DVO_SYNC_SEL);
+		DBGFS_DUMP(TVO_DVO_CONFIG);
+		DBGFS_DUMP(TVO_VIP_DVO);
+		tvout_dbg_vip(s, readl(tvout->regs + TVO_VIP_DVO));
+	} else {
+		seq_puts(s, "disabled");
+	}
+
+	seq_puts(s, "\n\n  HDA encoder: ");
+	crtc = tvout->hda->crtc;
+	if (crtc) {
+		seq_printf(s, "connected to %s path",
+			   sti_crtc_is_main(crtc) ? "main" : "aux");
+		DBGFS_DUMP(TVO_HD_SYNC_SEL);
+		DBGFS_DUMP(TVO_HD_DAC_CFG_OFF);
+		tvout_dbg_hd_dac_cfg(s,
+				     readl(tvout->regs + TVO_HD_DAC_CFG_OFF));
+		DBGFS_DUMP(TVO_VIP_HDF);
+		tvout_dbg_vip(s, readl(tvout->regs + TVO_VIP_HDF));
+	} else {
+		seq_puts(s, "disabled");
+	}
+
+	seq_puts(s, "\n\n  main path configuration");
+	DBGFS_DUMP(TVO_CSC_MAIN_M0);
+	DBGFS_DUMP(TVO_CSC_MAIN_M1);
+	DBGFS_DUMP(TVO_CSC_MAIN_M2);
+	DBGFS_DUMP(TVO_CSC_MAIN_M3);
+	DBGFS_DUMP(TVO_CSC_MAIN_M4);
+	DBGFS_DUMP(TVO_CSC_MAIN_M5);
+	DBGFS_DUMP(TVO_CSC_MAIN_M6);
+	DBGFS_DUMP(TVO_CSC_MAIN_M7);
+	DBGFS_DUMP(TVO_MAIN_IN_VID_FORMAT);
+
+	seq_puts(s, "\n\n  auxiliary path configuration");
+	DBGFS_DUMP(TVO_CSC_AUX_M0);
+	DBGFS_DUMP(TVO_CSC_AUX_M2);
+	DBGFS_DUMP(TVO_CSC_AUX_M3);
+	DBGFS_DUMP(TVO_CSC_AUX_M4);
+	DBGFS_DUMP(TVO_CSC_AUX_M5);
+	DBGFS_DUMP(TVO_CSC_AUX_M6);
+	DBGFS_DUMP(TVO_CSC_AUX_M7);
+	DBGFS_DUMP(TVO_AUX_IN_VID_FORMAT);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list tvout_debugfs_files[] = {
+	{ "tvout", tvout_dbg_show, 0, NULL },
+};
+
+static void tvout_debugfs_exit(struct sti_tvout *tvout, struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(tvout_debugfs_files,
+				 ARRAY_SIZE(tvout_debugfs_files),
+				 minor);
+}
+
+static int tvout_debugfs_init(struct sti_tvout *tvout, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(tvout_debugfs_files); i++)
+		tvout_debugfs_files[i].data = tvout;
+
+	return drm_debugfs_create_files(tvout_debugfs_files,
+					ARRAY_SIZE(tvout_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 static void sti_tvout_encoder_dpms(struct drm_encoder *encoder, int mode)
 {
 }
@@ -446,10 +623,6 @@ static void sti_tvout_encoder_mode_set(struct drm_encoder *encoder,
 {
 }
 
-static void sti_tvout_encoder_prepare(struct drm_encoder *encoder)
-{
-}
-
 static void sti_tvout_encoder_destroy(struct drm_encoder *encoder)
 {
 	struct sti_tvout_encoder *sti_encoder = to_sti_tvout_encoder(encoder);
@@ -462,10 +635,12 @@ static const struct drm_encoder_funcs sti_tvout_encoder_funcs = {
 	.destroy = sti_tvout_encoder_destroy,
 };
 
-static void sti_dvo_encoder_commit(struct drm_encoder *encoder)
+static void sti_dvo_encoder_enable(struct drm_encoder *encoder)
 {
 	struct sti_tvout *tvout = to_sti_tvout(encoder);
 
+	tvout_preformatter_set_matrix(tvout, &encoder->crtc->mode);
+
 	tvout_dvo_start(tvout, sti_crtc_is_main(encoder->crtc));
 }
 
@@ -480,8 +655,7 @@ static void sti_dvo_encoder_disable(struct drm_encoder *encoder)
 static const struct drm_encoder_helper_funcs sti_dvo_encoder_helper_funcs = {
 	.dpms = sti_tvout_encoder_dpms,
 	.mode_set = sti_tvout_encoder_mode_set,
-	.prepare = sti_tvout_encoder_prepare,
-	.commit = sti_dvo_encoder_commit,
+	.enable = sti_dvo_encoder_enable,
 	.disable = sti_dvo_encoder_disable,
 };
 
@@ -512,10 +686,12 @@ sti_tvout_create_dvo_encoder(struct drm_device *dev,
 	return drm_encoder;
 }
 
-static void sti_hda_encoder_commit(struct drm_encoder *encoder)
+static void sti_hda_encoder_enable(struct drm_encoder *encoder)
 {
 	struct sti_tvout *tvout = to_sti_tvout(encoder);
 
+	tvout_preformatter_set_matrix(tvout, &encoder->crtc->mode);
+
 	tvout_hda_start(tvout, sti_crtc_is_main(encoder->crtc));
 }
 
@@ -533,8 +709,7 @@ static void sti_hda_encoder_disable(struct drm_encoder *encoder)
 static const struct drm_encoder_helper_funcs sti_hda_encoder_helper_funcs = {
 	.dpms = sti_tvout_encoder_dpms,
 	.mode_set = sti_tvout_encoder_mode_set,
-	.prepare = sti_tvout_encoder_prepare,
-	.commit = sti_hda_encoder_commit,
+	.commit = sti_hda_encoder_enable,
 	.disable = sti_hda_encoder_disable,
 };
 
@@ -563,10 +738,12 @@ static struct drm_encoder *sti_tvout_create_hda_encoder(struct drm_device *dev,
 	return drm_encoder;
 }
 
-static void sti_hdmi_encoder_commit(struct drm_encoder *encoder)
+static void sti_hdmi_encoder_enable(struct drm_encoder *encoder)
 {
 	struct sti_tvout *tvout = to_sti_tvout(encoder);
 
+	tvout_preformatter_set_matrix(tvout, &encoder->crtc->mode);
+
 	tvout_hdmi_start(tvout, sti_crtc_is_main(encoder->crtc));
 }
 
@@ -581,8 +758,7 @@ static void sti_hdmi_encoder_disable(struct drm_encoder *encoder)
 static const struct drm_encoder_helper_funcs sti_hdmi_encoder_helper_funcs = {
 	.dpms = sti_tvout_encoder_dpms,
 	.mode_set = sti_tvout_encoder_mode_set,
-	.prepare = sti_tvout_encoder_prepare,
-	.commit = sti_hdmi_encoder_commit,
+	.commit = sti_hdmi_encoder_enable,
 	.disable = sti_hdmi_encoder_disable,
 };
 
@@ -628,26 +804,24 @@ static void sti_tvout_destroy_encoders(struct sti_tvout *tvout)
 	if (tvout->hda)
 		drm_encoder_cleanup(tvout->hda);
 	tvout->hda = NULL;
+
+	if (tvout->dvo)
+		drm_encoder_cleanup(tvout->dvo);
+	tvout->dvo = NULL;
 }
 
 static int sti_tvout_bind(struct device *dev, struct device *master, void *data)
 {
 	struct sti_tvout *tvout = dev_get_drvdata(dev);
 	struct drm_device *drm_dev = data;
-	unsigned int i;
 
 	tvout->drm_dev = drm_dev;
 
-	/* set preformatter matrix */
-	for (i = 0; i < 8; i++) {
-		tvout_write(tvout, rgb_to_ycbcr_601[i],
-			TVO_CSC_MAIN_M0 + (i * 4));
-		tvout_write(tvout, rgb_to_ycbcr_601[i],
-			TVO_CSC_AUX_M0 + (i * 4));
-	}
-
 	sti_tvout_create_encoders(drm_dev, tvout);
 
+	if (tvout_debugfs_init(tvout, drm_dev->primary))
+		DRM_ERROR("TVOUT debugfs setup failed\n");
+
 	return 0;
 }
 
@@ -655,8 +829,11 @@ static void sti_tvout_unbind(struct device *dev, struct device *master,
 	void *data)
 {
 	struct sti_tvout *tvout = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
 
 	sti_tvout_destroy_encoders(tvout);
+
+	tvout_debugfs_exit(tvout, drm_dev->primary);
 }
 
 static const struct component_ops sti_tvout_ops = {
diff --git a/drivers/gpu/drm/sti/sti_vid.c b/drivers/gpu/drm/sti/sti_vid.c
index a8254cc362a1..5a2c5dc3687b 100644
--- a/drivers/gpu/drm/sti/sti_vid.c
+++ b/drivers/gpu/drm/sti/sti_vid.c
@@ -42,6 +42,104 @@
 #define VID_MPR1_BT709          0x0AC50000
 #define VID_MPR2_BT709          0x07150545
 #define VID_MPR3_BT709          0x00000AE8
+/* YCbCr to RGB BT709:
+ * R = Y+1.3711Cr
+ * G = Y-0.6992Cr-0.3359Cb
+ * B = Y+1.7344Cb
+ */
+#define VID_MPR0_BT601          0x0A800000
+#define VID_MPR1_BT601          0x0AAF0000
+#define VID_MPR2_BT601          0x094E0754
+#define VID_MPR3_BT601          0x00000ADD
+
+#define VID_MIN_HD_HEIGHT       720
+
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(vid->regs + reg))
+
+static void vid_dbg_ctl(struct seq_file *s, int val)
+{
+	val = val >> 30;
+	seq_puts(s, "\t");
+
+	if (!(val & 1))
+		seq_puts(s, "NOT ");
+	seq_puts(s, "ignored on main mixer - ");
+
+	if (!(val & 2))
+		seq_puts(s, "NOT ");
+	seq_puts(s, "ignored on aux mixer");
+}
+
+static void vid_dbg_vpo(struct seq_file *s, int val)
+{
+	seq_printf(s, "\txdo:%4d\tydo:%4d", val & 0x0FFF, (val >> 16) & 0x0FFF);
+}
+
+static void vid_dbg_vps(struct seq_file *s, int val)
+{
+	seq_printf(s, "\txds:%4d\tyds:%4d", val & 0x0FFF, (val >> 16) & 0x0FFF);
+}
+
+static void vid_dbg_mst(struct seq_file *s, int val)
+{
+	if (val & 1)
+		seq_puts(s, "\tBUFFER UNDERFLOW!");
+}
+
+static int vid_dbg_show(struct seq_file *s, void *arg)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_vid *vid = (struct sti_vid *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "VID: (vaddr= 0x%p)", vid->regs);
+
+	DBGFS_DUMP(VID_CTL);
+	vid_dbg_ctl(s, readl(vid->regs + VID_CTL));
+	DBGFS_DUMP(VID_ALP);
+	DBGFS_DUMP(VID_CLF);
+	DBGFS_DUMP(VID_VPO);
+	vid_dbg_vpo(s, readl(vid->regs + VID_VPO));
+	DBGFS_DUMP(VID_VPS);
+	vid_dbg_vps(s, readl(vid->regs + VID_VPS));
+	DBGFS_DUMP(VID_KEY1);
+	DBGFS_DUMP(VID_KEY2);
+	DBGFS_DUMP(VID_MPR0);
+	DBGFS_DUMP(VID_MPR1);
+	DBGFS_DUMP(VID_MPR2);
+	DBGFS_DUMP(VID_MPR3);
+	DBGFS_DUMP(VID_MST);
+	vid_dbg_mst(s, readl(vid->regs + VID_MST));
+	DBGFS_DUMP(VID_BC);
+	DBGFS_DUMP(VID_TINT);
+	DBGFS_DUMP(VID_CSAT);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list vid_debugfs_files[] = {
+	{ "vid", vid_dbg_show, 0, NULL },
+};
+
+static int vid_debugfs_init(struct sti_vid *vid, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(vid_debugfs_files); i++)
+		vid_debugfs_files[i].data = vid;
+
+	return drm_debugfs_create_files(vid_debugfs_files,
+					ARRAY_SIZE(vid_debugfs_files),
+					minor->debugfs_root, minor);
+}
 
 void sti_vid_commit(struct sti_vid *vid,
 		    struct drm_plane_state *state)
@@ -52,6 +150,7 @@ void sti_vid_commit(struct sti_vid *vid,
 	int dst_y = state->crtc_y;
 	int dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
 	int dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	int src_h = state->src_h >> 16;
 	u32 val, ydo, xdo, yds, xds;
 
 	/* Input / output size
@@ -71,6 +170,19 @@ void sti_vid_commit(struct sti_vid *vid,
 
 	writel((ydo << 16) | xdo, vid->regs + VID_VPO);
 	writel((yds << 16) | xds, vid->regs + VID_VPS);
+
+	/* Color conversion parameters */
+	if (src_h >= VID_MIN_HD_HEIGHT) {
+		writel(VID_MPR0_BT709, vid->regs + VID_MPR0);
+		writel(VID_MPR1_BT709, vid->regs + VID_MPR1);
+		writel(VID_MPR2_BT709, vid->regs + VID_MPR2);
+		writel(VID_MPR3_BT709, vid->regs + VID_MPR3);
+	} else {
+		writel(VID_MPR0_BT601, vid->regs + VID_MPR0);
+		writel(VID_MPR1_BT601, vid->regs + VID_MPR1);
+		writel(VID_MPR2_BT601, vid->regs + VID_MPR2);
+		writel(VID_MPR3_BT601, vid->regs + VID_MPR3);
+	}
 }
 
 void sti_vid_disable(struct sti_vid *vid)
@@ -91,20 +203,14 @@ static void sti_vid_init(struct sti_vid *vid)
 	/* Opaque */
 	writel(VID_ALP_OPAQUE, vid->regs + VID_ALP);
 
-	/* Color conversion parameters */
-	writel(VID_MPR0_BT709, vid->regs + VID_MPR0);
-	writel(VID_MPR1_BT709, vid->regs + VID_MPR1);
-	writel(VID_MPR2_BT709, vid->regs + VID_MPR2);
-	writel(VID_MPR3_BT709, vid->regs + VID_MPR3);
-
 	/* Brightness, contrast, tint, saturation */
 	writel(VID_BC_DFLT, vid->regs + VID_BC);
 	writel(VID_TINT_DFLT, vid->regs + VID_TINT);
 	writel(VID_CSAT_DFLT, vid->regs + VID_CSAT);
 }
 
-struct sti_vid *sti_vid_create(struct device *dev, int id,
-			       void __iomem *baseaddr)
+struct sti_vid *sti_vid_create(struct device *dev, struct drm_device *drm_dev,
+			       int id, void __iomem *baseaddr)
 {
 	struct sti_vid *vid;
 
@@ -120,5 +226,8 @@ struct sti_vid *sti_vid_create(struct device *dev, int id,
 
 	sti_vid_init(vid);
 
+	if (vid_debugfs_init(vid, drm_dev->primary))
+		DRM_ERROR("VID debugfs setup failed\n");
+
 	return vid;
 }
diff --git a/drivers/gpu/drm/sti/sti_vid.h b/drivers/gpu/drm/sti/sti_vid.h
index 5dea4791f1d6..6c842344f3d8 100644
--- a/drivers/gpu/drm/sti/sti_vid.h
+++ b/drivers/gpu/drm/sti/sti_vid.h
@@ -23,7 +23,7 @@ struct sti_vid {
 void sti_vid_commit(struct sti_vid *vid,
 		    struct drm_plane_state *state);
 void sti_vid_disable(struct sti_vid *vid);
-struct sti_vid *sti_vid_create(struct device *dev, int id,
-			       void __iomem *baseaddr);
+struct sti_vid *sti_vid_create(struct device *dev, struct drm_device *drm_dev,
+			       int id, void __iomem *baseaddr);
 
 #endif
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index d56630c60039..32c7986b63ab 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -15,8 +15,8 @@
 
 #include "sti_vtg.h"
 
-#define VTG_TYPE_MASTER         0
-#define VTG_TYPE_SLAVE_BY_EXT0  1
+#define VTG_MODE_MASTER         0
+#define VTG_MODE_SLAVE_BY_EXT0  1
 
 /* registers offset */
 #define VTG_MODE            0x0000
@@ -64,6 +64,9 @@
 /* Delay introduced by the HDMI in nb of pixel */
 #define HDMI_DELAY          (5)
 
+/* Delay introduced by the DVO in nb of pixel */
+#define DVO_DELAY           (2)
+
 /* delay introduced by the Arbitrary Waveform Generator in nb of pixels */
 #define AWG_DELAY_HD        (-9)
 #define AWG_DELAY_ED        (-8)
@@ -71,13 +74,61 @@
 
 LIST_HEAD(vtg_lookup);
 
+/*
+ * STI VTG register offset structure
+ *
+ *@h_hd:     stores the VTG_H_HD_x     register offset
+ *@top_v_vd: stores the VTG_TOP_V_VD_x register offset
+ *@bot_v_vd: stores the VTG_BOT_V_VD_x register offset
+ *@top_v_hd: stores the VTG_TOP_V_HD_x register offset
+ *@bot_v_hd: stores the VTG_BOT_V_HD_x register offset
+ */
+struct sti_vtg_regs_offs {
+	u32 h_hd;
+	u32 top_v_vd;
+	u32 bot_v_vd;
+	u32 top_v_hd;
+	u32 bot_v_hd;
+};
+
+#define VTG_MAX_SYNC_OUTPUT 4
+static const struct sti_vtg_regs_offs vtg_regs_offs[VTG_MAX_SYNC_OUTPUT] = {
+	{ VTG_H_HD_1,
+	  VTG_TOP_V_VD_1, VTG_BOT_V_VD_1, VTG_TOP_V_HD_1, VTG_BOT_V_HD_1 },
+	{ VTG_H_HD_2,
+	  VTG_TOP_V_VD_2, VTG_BOT_V_VD_2, VTG_TOP_V_HD_2, VTG_BOT_V_HD_2 },
+	{ VTG_H_HD_3,
+	  VTG_TOP_V_VD_3, VTG_BOT_V_VD_3, VTG_TOP_V_HD_3, VTG_BOT_V_HD_3 },
+	{ VTG_H_HD_4,
+	  VTG_TOP_V_VD_4, VTG_BOT_V_VD_4, VTG_TOP_V_HD_4, VTG_BOT_V_HD_4 }
+};
+
+/*
+ * STI VTG synchronisation parameters structure
+ *
+ *@hsync: sample number falling and rising edge
+ *@vsync_line_top: vertical top field line number falling and rising edge
+ *@vsync_line_bot: vertical bottom field line number falling and rising edge
+ *@vsync_off_top: vertical top field sample number rising and falling edge
+ *@vsync_off_bot: vertical bottom field sample number rising and falling edge
+ */
+struct sti_vtg_sync_params {
+	u32 hsync;
+	u32 vsync_line_top;
+	u32 vsync_line_bot;
+	u32 vsync_off_top;
+	u32 vsync_off_bot;
+};
+
 /**
  * STI VTG structure
  *
  * @dev: pointer to device driver
- * @data: data associated to the device
+ * @np: device node
+ * @regs: register mapping
+ * @sync_params: synchronisation parameters used to generate timings
  * @irq: VTG irq
- * @type: VTG type (main or aux)
+ * @irq_status: store the IRQ status value
  * @notifier_list: notifier callback
  * @crtc: the CRTC for vblank event
  * @slave: slave vtg
@@ -87,6 +138,7 @@ struct sti_vtg {
 	struct device *dev;
 	struct device_node *np;
 	void __iomem *regs;
+	struct sti_vtg_sync_params sync_params[VTG_MAX_SYNC_OUTPUT];
 	int irq;
 	u32 irq_status;
 	struct raw_notifier_head notifier_list;
@@ -146,13 +198,69 @@ static void vtg_set_output_window(void __iomem *regs,
 	writel(video_bottom_field_stop, regs + VTG_VID_BFS);
 }
 
+static void vtg_set_hsync_vsync_pos(struct sti_vtg_sync_params *sync,
+				    int delay,
+				    const struct drm_display_mode *mode)
+{
+	long clocksperline, start, stop;
+	u32 risesync_top, fallsync_top;
+	u32 risesync_offs_top, fallsync_offs_top;
+
+	clocksperline = mode->htotal;
+
+	/* Get the hsync position */
+	start = 0;
+	stop = mode->hsync_end - mode->hsync_start;
+
+	start += delay;
+	stop  += delay;
+
+	if (start < 0)
+		start += clocksperline;
+	else if (start >= clocksperline)
+		start -= clocksperline;
+
+	if (stop < 0)
+		stop += clocksperline;
+	else if (stop >= clocksperline)
+		stop -= clocksperline;
+
+	sync->hsync = (stop << 16) | start;
+
+	/* Get the vsync position */
+	if (delay >= 0) {
+		risesync_top = 1;
+		fallsync_top = risesync_top;
+		fallsync_top += mode->vsync_end - mode->vsync_start;
+
+		fallsync_offs_top = (u32)delay;
+		risesync_offs_top = (u32)delay;
+	} else {
+		risesync_top = mode->vtotal;
+		fallsync_top = mode->vsync_end - mode->vsync_start;
+
+		fallsync_offs_top = clocksperline + delay;
+		risesync_offs_top = clocksperline + delay;
+	}
+
+	sync->vsync_line_top = (fallsync_top << 16) | risesync_top;
+	sync->vsync_off_top = (fallsync_offs_top << 16) | risesync_offs_top;
+
+	/* Only progressive supported for now */
+	sync->vsync_line_bot = sync->vsync_line_top;
+	sync->vsync_off_bot = sync->vsync_off_top;
+}
+
 static void vtg_set_mode(struct sti_vtg *vtg,
-			 int type, const struct drm_display_mode *mode)
+			 int type,
+			 struct sti_vtg_sync_params *sync,
+			 const struct drm_display_mode *mode)
 {
-	u32 tmp;
+	unsigned int i;
 
 	if (vtg->slave)
-		vtg_set_mode(vtg->slave, VTG_TYPE_SLAVE_BY_EXT0, mode);
+		vtg_set_mode(vtg->slave, VTG_MODE_SLAVE_BY_EXT0,
+			     vtg->sync_params, mode);
 
 	/* Set the number of clock cycles per line */
 	writel(mode->htotal, vtg->regs + VTG_CLKLN);
@@ -163,57 +271,31 @@ static void vtg_set_mode(struct sti_vtg *vtg,
 	/* Program output window */
 	vtg_set_output_window(vtg->regs, mode);
 
-	/* prepare VTG set 1 for HDMI */
-	tmp = (mode->hsync_end - mode->hsync_start + HDMI_DELAY) << 16;
-	tmp |= HDMI_DELAY;
-	writel(tmp, vtg->regs + VTG_H_HD_1);
-
-	tmp = (mode->vsync_end - mode->vsync_start + 1) << 16;
-	tmp |= 1;
-	writel(tmp, vtg->regs + VTG_TOP_V_VD_1);
-	writel(tmp, vtg->regs + VTG_BOT_V_VD_1);
-
-	tmp = HDMI_DELAY << 16;
-	tmp |= HDMI_DELAY;
-	writel(tmp, vtg->regs + VTG_TOP_V_HD_1);
-	writel(tmp, vtg->regs + VTG_BOT_V_HD_1);
-
-	/* prepare VTG set 2 for for HD DCS */
-	tmp = (mode->hsync_end - mode->hsync_start) << 16;
-	writel(tmp, vtg->regs + VTG_H_HD_2);
-
-	tmp = (mode->vsync_end - mode->vsync_start + 1) << 16;
-	tmp |= 1;
-	writel(tmp, vtg->regs + VTG_TOP_V_VD_2);
-	writel(tmp, vtg->regs + VTG_BOT_V_VD_2);
-	writel(0, vtg->regs + VTG_TOP_V_HD_2);
-	writel(0, vtg->regs + VTG_BOT_V_HD_2);
-
-	/* prepare VTG set 3 for HD Analog in HD mode */
-	tmp = (mode->hsync_end - mode->hsync_start + AWG_DELAY_HD) << 16;
-	tmp |= mode->htotal + AWG_DELAY_HD;
-	writel(tmp, vtg->regs + VTG_H_HD_3);
-
-	tmp = (mode->vsync_end - mode->vsync_start) << 16;
-	tmp |= mode->vtotal;
-	writel(tmp, vtg->regs + VTG_TOP_V_VD_3);
-	writel(tmp, vtg->regs + VTG_BOT_V_VD_3);
-
-	tmp = (mode->htotal + AWG_DELAY_HD) << 16;
-	tmp |= mode->htotal + AWG_DELAY_HD;
-	writel(tmp, vtg->regs + VTG_TOP_V_HD_3);
-	writel(tmp, vtg->regs + VTG_BOT_V_HD_3);
-
-	/* Prepare VTG set 4 for DVO */
-	tmp = (mode->hsync_end - mode->hsync_start) << 16;
-	writel(tmp, vtg->regs + VTG_H_HD_4);
-
-	tmp = (mode->vsync_end - mode->vsync_start + 1) << 16;
-	tmp |= 1;
-	writel(tmp, vtg->regs + VTG_TOP_V_VD_4);
-	writel(tmp, vtg->regs + VTG_BOT_V_VD_4);
-	writel(0, vtg->regs + VTG_TOP_V_HD_4);
-	writel(0, vtg->regs + VTG_BOT_V_HD_4);
+	/* Set hsync and vsync position for HDMI */
+	vtg_set_hsync_vsync_pos(&sync[VTG_SYNC_ID_HDMI - 1], HDMI_DELAY, mode);
+
+	/* Set hsync and vsync position for HD DCS */
+	vtg_set_hsync_vsync_pos(&sync[VTG_SYNC_ID_HDDCS - 1], 0, mode);
+
+	/* Set hsync and vsync position for HDF */
+	vtg_set_hsync_vsync_pos(&sync[VTG_SYNC_ID_HDF - 1], AWG_DELAY_HD, mode);
+
+	/* Set hsync and vsync position for DVO */
+	vtg_set_hsync_vsync_pos(&sync[VTG_SYNC_ID_DVO - 1], DVO_DELAY, mode);
+
+	/* Progam the syncs outputs */
+	for (i = 0; i < VTG_MAX_SYNC_OUTPUT ; i++) {
+		writel(sync[i].hsync,
+		       vtg->regs + vtg_regs_offs[i].h_hd);
+		writel(sync[i].vsync_line_top,
+		       vtg->regs + vtg_regs_offs[i].top_v_vd);
+		writel(sync[i].vsync_line_bot,
+		       vtg->regs + vtg_regs_offs[i].bot_v_vd);
+		writel(sync[i].vsync_off_top,
+		       vtg->regs + vtg_regs_offs[i].top_v_hd);
+		writel(sync[i].vsync_off_bot,
+		       vtg->regs + vtg_regs_offs[i].bot_v_hd);
+	}
 
 	/* mode */
 	writel(type, vtg->regs + VTG_MODE);
@@ -231,7 +313,7 @@ void sti_vtg_set_config(struct sti_vtg *vtg,
 		const struct drm_display_mode *mode)
 {
 	/* write configuration */
-	vtg_set_mode(vtg, VTG_TYPE_MASTER, mode);
+	vtg_set_mode(vtg, VTG_MODE_MASTER, vtg->sync_params, mode);
 
 	vtg_reset(vtg);
 
diff --git a/drivers/gpu/drm/sti/sti_vtg.h b/drivers/gpu/drm/sti/sti_vtg.h
index cd2439f89d05..f1dcdf9c2342 100644
--- a/drivers/gpu/drm/sti/sti_vtg.h
+++ b/drivers/gpu/drm/sti/sti_vtg.h
@@ -10,6 +10,11 @@
 #define VTG_TOP_FIELD_EVENT     1
 #define VTG_BOTTOM_FIELD_EVENT  2
 
+#define VTG_SYNC_ID_HDMI        1
+#define VTG_SYNC_ID_HDDCS       2
+#define VTG_SYNC_ID_HDF         3
+#define VTG_SYNC_ID_DVO         4
+
 struct sti_vtg;
 struct drm_display_mode;
 struct notifier_block;
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 33add93b4ed9..3b0d8c392b70 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -175,8 +175,7 @@ static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo)
 		sg_free_table(bo->sgt);
 		kfree(bo->sgt);
 	} else if (bo->vaddr) {
-		dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr,
-				      bo->paddr);
+		dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
 	}
 }
 
@@ -233,8 +232,8 @@ static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo)
 	} else {
 		size_t size = bo->gem.size;
 
-		bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
-						   GFP_KERNEL | __GFP_NOWARN);
+		bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->paddr,
+					 GFP_KERNEL | __GFP_NOWARN);
 		if (!bo->vaddr) {
 			dev_err(drm->dev,
 				"failed to allocate buffer of size %zu\n",
@@ -472,8 +471,8 @@ int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma)
 		vma->vm_flags &= ~VM_PFNMAP;
 		vma->vm_pgoff = 0;
 
-		ret = dma_mmap_writecombine(gem->dev->dev, vma, bo->vaddr,
-					    bo->paddr, gem->size);
+		ret = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->paddr,
+				  gem->size);
 		if (ret) {
 			drm_gem_vm_close(vma);
 			return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 4e19d0f9cc30..077ae9b2865d 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -311,7 +311,7 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
 			goto out_err;
 
 		copy_highpage(to_page, from_page);
-		page_cache_release(from_page);
+		put_page(from_page);
 	}
 
 	if (!(ttm->page_flags & TTM_PAGE_FLAG_PERSISTENT_SWAP))
@@ -361,7 +361,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage)
 		copy_highpage(to_page, from_page);
 		set_page_dirty(to_page);
 		mark_page_accessed(to_page);
-		page_cache_release(to_page);
+		put_page(to_page);
 	}
 
 	ttm_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index c427499133d6..fd1eb9d03f0b 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -423,8 +423,8 @@ static int udl_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	}
 
 	if (ufb->obj->base.import_attach) {
-		dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
-				       DMA_FROM_DEVICE);
+		ret = dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
+					     DMA_FROM_DEVICE);
 	}
 
  unlock:
@@ -536,7 +536,7 @@ static int udlfb_create(struct drm_fb_helper *helper,
 out_destroy_fbi:
 	drm_fb_helper_release_fbi(helper);
 out_gfree:
-	drm_gem_object_unreference(&ufbdev->ufb.obj->base);
+	drm_gem_object_unreference_unlocked(&ufbdev->ufb.obj->base);
 out:
 	return ret;
 }
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index 2a0a784ab6ee..d7528e0d8442 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -52,7 +52,7 @@ udl_gem_create(struct drm_file *file,
 		return ret;
 	}
 
-	drm_gem_object_unreference(&obj->base);
+	drm_gem_object_unreference_unlocked(&obj->base);
 	*handle_p = handle;
 	return 0;
 }
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index ac8eafea6361..9807bc9d296e 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -398,9 +398,8 @@ int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
 	vma->vm_flags &= ~VM_PFNMAP;
 	vma->vm_pgoff = 0;
 
-	ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
-				    bo->base.vaddr, bo->base.paddr,
-				    vma->vm_end - vma->vm_start);
+	ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
+			  bo->base.paddr, vma->vm_end - vma->vm_start);
 	if (ret)
 		drm_gem_vm_close(vma);
 
diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index d0cbd5ecd7f0..7e2a12c4fed2 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -188,7 +188,7 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 			if (NULL != (page = vsg->pages[i])) {
 				if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
 					SetPageDirty(page);
-				page_cache_release(page);
+				put_page(page);
 			}
 		}
 	case dr_via_pages_alloc:
@@ -239,8 +239,7 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg,  drm_via_dmablit_t *xfer)
 	if (NULL == vsg->pages)
 		return -ENOMEM;
 	down_read(&current->mm->mmap_sem);
-	ret = get_user_pages(current, current->mm,
-			     (unsigned long)xfer->mem_addr,
+	ret = get_user_pages((unsigned long)xfer->mem_addr,
 			     vsg->num_pages,
 			     (vsg->direction == DMA_FROM_DEVICE),
 			     0, vsg->pages, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index 58704f0a4607..531d22025fec 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -25,6 +25,8 @@
  *
  **************************************************************************/
 
+#include <linux/kernel.h>
+
 #ifdef __KERNEL__
 
 #include <drm/vmwgfx_drm.h>
@@ -36,7 +38,6 @@
 #define ARRAY_SIZE(_A) (sizeof(_A) / sizeof((_A)[0]))
 #endif /* ARRAY_SIZE */
 
-#define DIV_ROUND_UP(x, y)  (((x) + (y) - 1) / (y))
 #define max_t(type, x, y)  ((x) > (y) ? (x) : (y))
 #define surf_size_struct SVGA3dSize
 #define u32 uint32
@@ -987,12 +988,12 @@ svga3dsurface_get_size_in_blocks(const struct svga3d_surface_desc *desc,
 				 const surf_size_struct *pixel_size,
 				 surf_size_struct *block_size)
 {
-	block_size->width = DIV_ROUND_UP(pixel_size->width,
-					 desc->block_size.width);
-	block_size->height = DIV_ROUND_UP(pixel_size->height,
-					  desc->block_size.height);
-	block_size->depth = DIV_ROUND_UP(pixel_size->depth,
-					 desc->block_size.depth);
+	block_size->width = __KERNEL_DIV_ROUND_UP(pixel_size->width,
+						  desc->block_size.width);
+	block_size->height = __KERNEL_DIV_ROUND_UP(pixel_size->height,
+						   desc->block_size.height);
+	block_size->depth = __KERNEL_DIV_ROUND_UP(pixel_size->depth,
+						  desc->block_size.depth);
 }
 
 static inline bool
@@ -1100,8 +1101,9 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
 	const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
 	const u32 bw = desc->block_size.width, bh = desc->block_size.height;
 	const u32 bd = desc->block_size.depth;
-	const u32 rowstride = DIV_ROUND_UP(width, bw) * desc->bytes_per_block;
-	const u32 imgstride = DIV_ROUND_UP(height, bh) * rowstride;
+	const u32 rowstride = __KERNEL_DIV_ROUND_UP(width, bw) *
+			      desc->bytes_per_block;
+	const u32 imgstride = __KERNEL_DIV_ROUND_UP(height, bh) * rowstride;
 	const u32 offset = (z / bd * imgstride +
 			    y / bh * rowstride +
 			    x / bw * desc->bytes_per_block);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 0ee76e523a90..6cbb7d4bdd11 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -195,7 +195,7 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_MASTER | DRM_AUTH),
 	VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT,
 		      vmw_kms_update_layout_ioctl,
-		      DRM_MASTER),
+		      DRM_MASTER | DRM_CONTROL_ALLOW),
 	VMW_IOCTL_DEF(VMW_CREATE_SHADER,
 		      vmw_shader_define_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
@@ -1204,6 +1204,7 @@ static int vmw_master_set(struct drm_device *dev,
 	}
 
 	dev_priv->active_master = vmaster;
+	drm_sysfs_hotplug_event(dev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 5cb1b1687cd4..019a6ca3e8e9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -40,9 +40,9 @@
 #include <drm/ttm/ttm_module.h>
 #include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20150810"
+#define VMWGFX_DRIVER_DATE "20160210"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 9
+#define VMWGFX_DRIVER_MINOR 10
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
@@ -407,8 +407,11 @@ struct vmw_private {
 	void *fb_info;
 	enum vmw_display_unit_type active_display_unit;
 	struct vmw_legacy_display *ldu_priv;
-	struct vmw_screen_object_display *sou_priv;
 	struct vmw_overlay *overlay_priv;
+	struct drm_property *hotplug_mode_update_property;
+	struct drm_property *implicit_placement_property;
+	unsigned num_implicit;
+	struct vmw_framebuffer *implicit_fb;
 
 	/*
 	 * Context and surface management.
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 5da5de0cb522..723ba16c6084 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -3009,6 +3009,26 @@ out_unref:
 	return ret;
 }
 
+/**
+ * vmw_cmd_dx_genmips - Validate an SVGA_3D_CMD_DX_GENMIPS command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_dx_genmips(struct vmw_private *dev_priv,
+			      struct vmw_sw_context *sw_context,
+			      SVGA3dCmdHeader *header)
+{
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDXGenMips body;
+	} *cmd = container_of(header, typeof(*cmd), header);
+
+	return vmw_view_id_val_add(sw_context, vmw_view_sr,
+				   cmd->body.shaderResourceViewId);
+}
+
 static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv,
 				struct vmw_sw_context *sw_context,
 				void *buf, uint32_t *size)
@@ -3297,7 +3317,7 @@ static const struct vmw_cmd_entry vmw_cmd_entries[SVGA_3D_CMD_MAX] = {
 		    &vmw_cmd_dx_clear_depthstencil_view, true, false, true),
 	VMW_CMD_DEF(SVGA_3D_CMD_DX_PRED_COPY, &vmw_cmd_invalid,
 		    true, false, true),
-	VMW_CMD_DEF(SVGA_3D_CMD_DX_GENMIPS, &vmw_cmd_invalid,
+	VMW_CMD_DEF(SVGA_3D_CMD_DX_GENMIPS, &vmw_cmd_dx_genmips,
 		    true, false, true),
 	VMW_CMD_DEF(SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE,
 		    &vmw_cmd_dx_check_subresource, true, false, true),
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index b221a8c40282..4742ec4ead27 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -236,8 +236,8 @@ int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
 	bool shown = du->cursor_surface || du->cursor_dmabuf ? true : false;
 
-	du->cursor_x = x + crtc->x;
-	du->cursor_y = y + crtc->y;
+	du->cursor_x = x + du->set_gui_x;
+	du->cursor_y = y + du->set_gui_y;
 
 	/*
 	 * FIXME: Unclear whether there's any global state touched by the
@@ -663,9 +663,8 @@ static int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 		break;
 	case vmw_du_screen_object:
 		ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, &vfbd->base,
-						  clips, num_clips, increment,
-						  true,
-						  NULL);
+						  clips, NULL, num_clips,
+						  increment, true, NULL);
 		break;
 	case vmw_du_legacy:
 		ret = vmw_kms_ldu_do_dmabuf_dirty(dev_priv, &vfbd->base, 0, 0,
@@ -1109,6 +1108,22 @@ int vmw_kms_present(struct vmw_private *dev_priv,
 	return 0;
 }
 
+static void
+vmw_kms_create_hotplug_mode_update_property(struct vmw_private *dev_priv)
+{
+	if (dev_priv->hotplug_mode_update_property)
+		return;
+
+	dev_priv->hotplug_mode_update_property =
+		drm_property_create_range(dev_priv->dev,
+					  DRM_MODE_PROP_IMMUTABLE,
+					  "hotplug_mode_update", 0, 1);
+
+	if (!dev_priv->hotplug_mode_update_property)
+		return;
+
+}
+
 int vmw_kms_init(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -1121,6 +1136,9 @@ int vmw_kms_init(struct vmw_private *dev_priv)
 	dev->mode_config.max_width = dev_priv->texture_max_width;
 	dev->mode_config.max_height = dev_priv->texture_max_height;
 
+	drm_mode_create_suggested_offset_properties(dev);
+	vmw_kms_create_hotplug_mode_update_property(dev_priv);
+
 	ret = vmw_kms_stdu_init_display(dev_priv);
 	if (ret) {
 		ret = vmw_kms_sou_init_display(dev_priv);
@@ -1360,15 +1378,28 @@ static int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
 			du->pref_active = true;
 			du->gui_x = rects[du->unit].x;
 			du->gui_y = rects[du->unit].y;
+			drm_object_property_set_value
+			  (&con->base, dev->mode_config.suggested_x_property,
+			   du->gui_x);
+			drm_object_property_set_value
+			  (&con->base, dev->mode_config.suggested_y_property,
+			   du->gui_y);
 		} else {
 			du->pref_width = 800;
 			du->pref_height = 600;
 			du->pref_active = false;
+			drm_object_property_set_value
+			  (&con->base, dev->mode_config.suggested_x_property,
+			   0);
+			drm_object_property_set_value
+			  (&con->base, dev->mode_config.suggested_y_property,
+			   0);
 		}
 		con->status = vmw_du_connector_detect(con, true);
 	}
 
 	mutex_unlock(&dev->mode_config.mutex);
+	drm_sysfs_hotplug_event(dev);
 
 	return 0;
 }
@@ -1591,6 +1622,12 @@ int vmw_du_connector_set_property(struct drm_connector *connector,
 				  struct drm_property *property,
 				  uint64_t val)
 {
+	struct vmw_display_unit *du = vmw_connector_to_du(connector);
+	struct vmw_private *dev_priv = vmw_priv(connector->dev);
+
+	if (property == dev_priv->implicit_placement_property)
+		du->is_implicit = val;
+
 	return 0;
 }
 
@@ -2096,3 +2133,119 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
 
 	return 0;
 }
+
+/**
+ * vmw_kms_del_active - unregister a crtc binding to the implicit framebuffer
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @du: The display unit of the crtc.
+ */
+void vmw_kms_del_active(struct vmw_private *dev_priv,
+			struct vmw_display_unit *du)
+{
+	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+
+	if (du->active_implicit) {
+		if (--(dev_priv->num_implicit) == 0)
+			dev_priv->implicit_fb = NULL;
+		du->active_implicit = false;
+	}
+}
+
+/**
+ * vmw_kms_add_active - register a crtc binding to an implicit framebuffer
+ *
+ * @vmw_priv: Pointer to a device private struct.
+ * @du: The display unit of the crtc.
+ * @vfb: The implicit framebuffer
+ *
+ * Registers a binding to an implicit framebuffer.
+ */
+void vmw_kms_add_active(struct vmw_private *dev_priv,
+			struct vmw_display_unit *du,
+			struct vmw_framebuffer *vfb)
+{
+	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+
+	WARN_ON_ONCE(!dev_priv->num_implicit && dev_priv->implicit_fb);
+
+	if (!du->active_implicit && du->is_implicit) {
+		dev_priv->implicit_fb = vfb;
+		du->active_implicit = true;
+		dev_priv->num_implicit++;
+	}
+}
+
+/**
+ * vmw_kms_screen_object_flippable - Check whether we can page-flip a crtc.
+ *
+ * @dev_priv: Pointer to device-private struct.
+ * @crtc: The crtc we want to flip.
+ *
+ * Returns true or false depending whether it's OK to flip this crtc
+ * based on the criterion that we must not have more than one implicit
+ * frame-buffer at any one time.
+ */
+bool vmw_kms_crtc_flippable(struct vmw_private *dev_priv,
+			    struct drm_crtc *crtc)
+{
+	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
+
+	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+
+	if (!du->is_implicit)
+		return true;
+
+	if (dev_priv->num_implicit != 1)
+		return false;
+
+	return true;
+}
+
+/**
+ * vmw_kms_update_implicit_fb - Update the implicit fb.
+ *
+ * @dev_priv: Pointer to device-private struct.
+ * @crtc: The crtc the new implicit frame-buffer is bound to.
+ */
+void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv,
+				struct drm_crtc *crtc)
+{
+	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
+	struct vmw_framebuffer *vfb;
+
+	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+
+	if (!du->is_implicit)
+		return;
+
+	vfb = vmw_framebuffer_to_vfb(crtc->primary->fb);
+	WARN_ON_ONCE(dev_priv->num_implicit != 1 &&
+		     dev_priv->implicit_fb != vfb);
+
+	dev_priv->implicit_fb = vfb;
+}
+
+/**
+ * vmw_kms_create_implicit_placement_proparty - Set up the implicit placement
+ * property.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @immutable: Whether the property is immutable.
+ *
+ * Sets up the implicit placement property unless it's already set up.
+ */
+void
+vmw_kms_create_implicit_placement_property(struct vmw_private *dev_priv,
+					   bool immutable)
+{
+	if (dev_priv->implicit_placement_property)
+		return;
+
+	dev_priv->implicit_placement_property =
+		drm_property_create_range(dev_priv->dev,
+					  immutable ?
+					  DRM_MODE_PROP_IMMUTABLE : 0,
+					  "implicit_placement", 0, 1);
+
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index edd81503516d..57203212c501 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -178,6 +178,9 @@ struct vmw_display_unit {
 	int gui_x;
 	int gui_y;
 	bool is_implicit;
+	bool active_implicit;
+	int set_gui_x;
+	int set_gui_y;
 };
 
 #define vmw_crtc_to_du(x) \
@@ -254,6 +257,18 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
 			    struct drm_crtc **p_crtc,
 			    struct drm_display_mode **p_mode);
 void vmw_guess_mode_timing(struct drm_display_mode *mode);
+void vmw_kms_del_active(struct vmw_private *dev_priv,
+			struct vmw_display_unit *du);
+void vmw_kms_add_active(struct vmw_private *dev_priv,
+			struct vmw_display_unit *du,
+			struct vmw_framebuffer *vfb);
+bool vmw_kms_crtc_flippable(struct vmw_private *dev_priv,
+			    struct drm_crtc *crtc);
+void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv,
+				struct drm_crtc *crtc);
+void vmw_kms_create_implicit_placement_property(struct vmw_private *dev_priv,
+						bool immutable);
+
 
 /*
  * Legacy display unit functions - vmwgfx_ldu.c
@@ -287,6 +302,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
 int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 				struct vmw_framebuffer *framebuffer,
 				struct drm_clip_rect *clips,
+				struct drm_vmw_rect *vclips,
 				unsigned num_clips, int increment,
 				bool interruptible,
 				struct vmw_fence_obj **out_fence);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index b6fa44fe8929..63ccd9871ec9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -288,6 +288,8 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
 	crtc->y = set->y;
 	crtc->mode = *mode;
 	crtc->enabled = true;
+	ldu->base.set_gui_x = set->x;
+	ldu->base.set_gui_y = set->y;
 
 	vmw_ldu_add_active(dev_priv, ldu, vfb);
 
@@ -375,8 +377,19 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
 	drm_object_attach_property(&connector->base,
-				      dev->mode_config.dirty_info_property,
-				      1);
+				   dev->mode_config.dirty_info_property,
+				   1);
+	drm_object_attach_property(&connector->base,
+				   dev_priv->hotplug_mode_update_property, 1);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_x_property, 0);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_y_property, 0);
+	if (dev_priv->implicit_placement_property)
+		drm_object_attach_property
+			(&connector->base,
+			 dev_priv->implicit_placement_property,
+			 1);
 
 	return 0;
 }
@@ -412,6 +425,8 @@ int vmw_kms_ldu_init_display(struct vmw_private *dev_priv)
 	if (ret != 0)
 		goto err_vblank_cleanup;
 
+	vmw_kms_create_implicit_placement_property(dev_priv, true);
+
 	if (dev_priv->capabilities & SVGA_CAP_MULTIMON)
 		for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
 			vmw_ldu_init(dev_priv, i);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index c5a1a08b0449..0ea22fd112c9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -74,19 +74,6 @@ struct vmw_kms_sou_dirty_cmd {
 	SVGA3dCmdBlitSurfaceToScreen body;
 };
 
-
-/*
- * Other structs.
- */
-
-struct vmw_screen_object_display {
-	unsigned num_implicit;
-
-	struct vmw_framebuffer *implicit_fb;
-	SVGAFifoCmdDefineGMRFB cur;
-	struct vmw_dma_buffer *pinned_gmrfb;
-};
-
 /**
  * Display unit using screen objects.
  */
@@ -97,7 +84,6 @@ struct vmw_screen_object_unit {
 	struct vmw_dma_buffer *buffer; /**< Backing store buffer */
 
 	bool defined;
-	bool active_implicit;
 };
 
 static void vmw_sou_destroy(struct vmw_screen_object_unit *sou)
@@ -116,33 +102,6 @@ static void vmw_sou_crtc_destroy(struct drm_crtc *crtc)
 	vmw_sou_destroy(vmw_crtc_to_sou(crtc));
 }
 
-static void vmw_sou_del_active(struct vmw_private *vmw_priv,
-			       struct vmw_screen_object_unit *sou)
-{
-	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
-
-	if (sou->active_implicit) {
-		if (--(ld->num_implicit) == 0)
-			ld->implicit_fb = NULL;
-		sou->active_implicit = false;
-	}
-}
-
-static void vmw_sou_add_active(struct vmw_private *vmw_priv,
-			       struct vmw_screen_object_unit *sou,
-			       struct vmw_framebuffer *vfb)
-{
-	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
-
-	BUG_ON(!ld->num_implicit && ld->implicit_fb);
-
-	if (!sou->active_implicit && sou->base.is_implicit) {
-		ld->implicit_fb = vfb;
-		sou->active_implicit = true;
-		ld->num_implicit++;
-	}
-}
-
 /**
  * Send the fifo command to create a screen.
  */
@@ -185,6 +144,8 @@ static int vmw_sou_fifo_create(struct vmw_private *dev_priv,
 		cmd->obj.root.x = sou->base.gui_x;
 		cmd->obj.root.y = sou->base.gui_y;
 	}
+	sou->base.set_gui_x = cmd->obj.root.x;
+	sou->base.set_gui_y = cmd->obj.root.y;
 
 	/* Ok to assume that buffer is pinned in vram */
 	vmw_bo_get_guest_ptr(&sou->buffer->base, &cmd->obj.backingStore.ptr);
@@ -323,13 +284,13 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
 		return -EINVAL;
 	}
 
-	/* sou only supports one fb active at the time */
+	/* Only one active implicit frame-buffer at a time. */
 	if (sou->base.is_implicit &&
-	    dev_priv->sou_priv->implicit_fb && vfb &&
-	    !(dev_priv->sou_priv->num_implicit == 1 &&
-	      sou->active_implicit) &&
-	    dev_priv->sou_priv->implicit_fb != vfb) {
-		DRM_ERROR("Multiple framebuffers not supported\n");
+	    dev_priv->implicit_fb && vfb &&
+	    !(dev_priv->num_implicit == 1 &&
+	      sou->base.active_implicit) &&
+	    dev_priv->implicit_fb != vfb) {
+		DRM_ERROR("Multiple implicit framebuffers not supported.\n");
 		return -EINVAL;
 	}
 
@@ -351,7 +312,7 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
 		crtc->y = 0;
 		crtc->enabled = false;
 
-		vmw_sou_del_active(dev_priv, sou);
+		vmw_kms_del_active(dev_priv, &sou->base);
 
 		vmw_sou_backing_free(dev_priv, sou);
 
@@ -415,7 +376,7 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
 		return ret;
 	}
 
-	vmw_sou_add_active(dev_priv, sou, vfb);
+	vmw_kms_add_active(dev_priv, &sou->base, vfb);
 
 	connector->encoder = encoder;
 	encoder->crtc = crtc;
@@ -428,39 +389,6 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
 	return 0;
 }
 
-/**
- * Returns if this unit can be page flipped.
- * Must be called with the mode_config mutex held.
- */
-static bool vmw_sou_screen_object_flippable(struct vmw_private *dev_priv,
-					    struct drm_crtc *crtc)
-{
-	struct vmw_screen_object_unit *sou = vmw_crtc_to_sou(crtc);
-
-	if (!sou->base.is_implicit)
-		return true;
-
-	if (dev_priv->sou_priv->num_implicit != 1)
-		return false;
-
-	return true;
-}
-
-/**
- * Update the implicit fb to the current fb of this crtc.
- * Must be called with the mode_config mutex held.
- */
-static void vmw_sou_update_implicit_fb(struct vmw_private *dev_priv,
-				       struct drm_crtc *crtc)
-{
-	struct vmw_screen_object_unit *sou = vmw_crtc_to_sou(crtc);
-
-	BUG_ON(!sou->base.is_implicit);
-
-	dev_priv->sou_priv->implicit_fb =
-		vmw_framebuffer_to_vfb(sou->base.crtc.primary->fb);
-}
-
 static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc,
 				  struct drm_framebuffer *fb,
 				  struct drm_pending_vblank_event *event,
@@ -470,30 +398,27 @@ static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc,
 	struct drm_framebuffer *old_fb = crtc->primary->fb;
 	struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(fb);
 	struct vmw_fence_obj *fence = NULL;
-	struct drm_clip_rect clips;
+	struct drm_vmw_rect vclips;
 	int ret;
 
-	/* require ScreenObject support for page flipping */
-	if (!dev_priv->sou_priv)
-		return -ENOSYS;
-
-	if (!vmw_sou_screen_object_flippable(dev_priv, crtc))
+	if (!vmw_kms_crtc_flippable(dev_priv, crtc))
 		return -EINVAL;
 
 	crtc->primary->fb = fb;
 
 	/* do a full screen dirty update */
-	clips.x1 = clips.y1 = 0;
-	clips.x2 = fb->width;
-	clips.y2 = fb->height;
+	vclips.x = crtc->x;
+	vclips.y = crtc->y;
+	vclips.w = crtc->mode.hdisplay;
+	vclips.h = crtc->mode.vdisplay;
 
 	if (vfb->dmabuf)
 		ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, vfb,
-						  &clips, 1, 1,
+						  NULL, &vclips, 1, 1,
 						  true, &fence);
 	else
 		ret = vmw_kms_sou_do_surface_dirty(dev_priv, vfb,
-						   &clips, NULL, NULL,
+						   NULL, &vclips, NULL,
 						   0, 0, 1, 1, &fence);
 
 
@@ -521,7 +446,7 @@ static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc,
 	vmw_fence_obj_unreference(&fence);
 
 	if (vmw_crtc_to_du(crtc)->is_implicit)
-		vmw_sou_update_implicit_fb(dev_priv, crtc);
+		vmw_kms_update_implicit_fb(dev_priv, crtc);
 
 	return ret;
 
@@ -586,13 +511,12 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit)
 	encoder = &sou->base.encoder;
 	connector = &sou->base.connector;
 
-	sou->active_implicit = false;
-
+	sou->base.active_implicit = false;
 	sou->base.pref_active = (unit == 0);
 	sou->base.pref_width = dev_priv->initial_width;
 	sou->base.pref_height = dev_priv->initial_height;
 	sou->base.pref_mode = NULL;
-	sou->base.is_implicit = true;
+	sou->base.is_implicit = false;
 
 	drm_connector_init(dev, connector, &vmw_sou_connector_funcs,
 			   DRM_MODE_CONNECTOR_VIRTUAL);
@@ -611,8 +535,19 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit)
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
 	drm_object_attach_property(&connector->base,
-				      dev->mode_config.dirty_info_property,
-				      1);
+				   dev->mode_config.dirty_info_property,
+				   1);
+	drm_object_attach_property(&connector->base,
+				   dev_priv->hotplug_mode_update_property, 1);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_x_property, 0);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_y_property, 0);
+	if (dev_priv->implicit_placement_property)
+		drm_object_attach_property
+			(&connector->base,
+			 dev_priv->implicit_placement_property,
+			 sou->base.is_implicit);
 
 	return 0;
 }
@@ -622,11 +557,6 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
 	struct drm_device *dev = dev_priv->dev;
 	int i, ret;
 
-	if (dev_priv->sou_priv) {
-		DRM_INFO("sou system already on\n");
-		return -EINVAL;
-	}
-
 	if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) {
 		DRM_INFO("Not using screen objects,"
 			 " missing cap SCREEN_OBJECT_2\n");
@@ -634,21 +564,19 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
 	}
 
 	ret = -ENOMEM;
-	dev_priv->sou_priv = kmalloc(sizeof(*dev_priv->sou_priv), GFP_KERNEL);
-	if (unlikely(!dev_priv->sou_priv))
-		goto err_no_mem;
-
-	dev_priv->sou_priv->num_implicit = 0;
-	dev_priv->sou_priv->implicit_fb = NULL;
+	dev_priv->num_implicit = 0;
+	dev_priv->implicit_fb = NULL;
 
 	ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
 	if (unlikely(ret != 0))
-		goto err_free;
+		return ret;
 
 	ret = drm_mode_create_dirty_info_property(dev);
 	if (unlikely(ret != 0))
 		goto err_vblank_cleanup;
 
+	vmw_kms_create_implicit_placement_property(dev_priv, false);
+
 	for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
 		vmw_sou_init(dev_priv, i);
 
@@ -660,10 +588,6 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
 
 err_vblank_cleanup:
 	drm_vblank_cleanup(dev);
-err_free:
-	kfree(dev_priv->sou_priv);
-	dev_priv->sou_priv = NULL;
-err_no_mem:
 	return ret;
 }
 
@@ -671,13 +595,8 @@ int vmw_kms_sou_close_display(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
 
-	if (!dev_priv->sou_priv)
-		return -ENOSYS;
-
 	drm_vblank_cleanup(dev);
 
-	kfree(dev_priv->sou_priv);
-
 	return 0;
 }
 
@@ -738,6 +657,11 @@ static void vmw_sou_surface_fifo_commit(struct vmw_kms_dirty *dirty)
 	SVGASignedRect *blit = (SVGASignedRect *) &cmd[1];
 	int i;
 
+	if (!dirty->num_hits) {
+		vmw_fifo_commit(dirty->dev_priv, 0);
+		return;
+	}
+
 	cmd->header.id = SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN;
 	cmd->header.size = sizeof(cmd->body) + region_size;
 
@@ -875,6 +799,11 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
  */
 static void vmw_sou_dmabuf_fifo_commit(struct vmw_kms_dirty *dirty)
 {
+	if (!dirty->num_hits) {
+		vmw_fifo_commit(dirty->dev_priv, 0);
+		return;
+	}
+
 	vmw_fifo_commit(dirty->dev_priv,
 			sizeof(struct vmw_kms_sou_dmabuf_blit) *
 			dirty->num_hits);
@@ -909,6 +838,8 @@ static void vmw_sou_dmabuf_clip(struct vmw_kms_dirty *dirty)
  * @dev_priv: Pointer to the device private structure.
  * @framebuffer: Pointer to the dma-buffer backed framebuffer.
  * @clips: Array of clip rects.
+ * @vclips: Alternate array of clip rects. Either @clips or @vclips must
+ * be NULL.
  * @num_clips: Number of clip rects in @clips.
  * @increment: Increment to use when looping over @clips.
  * @interruptible: Whether to perform waits interruptible if possible.
@@ -922,6 +853,7 @@ static void vmw_sou_dmabuf_clip(struct vmw_kms_dirty *dirty)
 int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 				struct vmw_framebuffer *framebuffer,
 				struct drm_clip_rect *clips,
+				struct drm_vmw_rect *vclips,
 				unsigned num_clips, int increment,
 				bool interruptible,
 				struct vmw_fence_obj **out_fence)
@@ -945,7 +877,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 	dirty.clip = vmw_sou_dmabuf_clip;
 	dirty.fifo_reserve_size = sizeof(struct vmw_kms_sou_dmabuf_blit) *
 		num_clips;
-	ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, NULL,
+	ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips,
 				   0, 0, num_clips, increment, &dirty);
 	vmw_kms_helper_buffer_finish(dev_priv, NULL, buf, out_fence, NULL);
 
@@ -967,6 +899,11 @@ out_revert:
  */
 static void vmw_sou_readback_fifo_commit(struct vmw_kms_dirty *dirty)
 {
+	if (!dirty->num_hits) {
+		vmw_fifo_commit(dirty->dev_priv, 0);
+		return;
+	}
+
 	vmw_fifo_commit(dirty->dev_priv,
 			sizeof(struct vmw_kms_sou_readback_blit) *
 			dirty->num_hits);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 4ef5ffd7189d..b949102ad864 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -96,7 +96,6 @@ struct vmw_stdu_surface_copy {
  *               content_vfbs dimensions, then this is a pointer into the
  *               corresponding field in content_vfbs.  If not, then this
  *               is a separate buffer to which content_vfbs will blit to.
- * @content_fb: holds the rendered content, can be a surface or DMA buffer
  * @content_type:  content_fb type
  * @defined:  true if the current display unit has been initialized
  */
@@ -104,8 +103,6 @@ struct vmw_screen_target_display_unit {
 	struct vmw_display_unit base;
 
 	struct vmw_surface     *display_srf;
-	struct drm_framebuffer *content_fb;
-
 	enum stdu_content_type content_fb_type;
 
 	bool defined;
@@ -122,22 +119,6 @@ static void vmw_stdu_destroy(struct vmw_screen_target_display_unit *stdu);
  *****************************************************************************/
 
 /**
- * vmw_stdu_pin_display - pins the resource associated with the display surface
- *
- * @stdu: contains the display surface
- *
- * Since the display surface can either be a private surface allocated by us,
- * or it can point to the content surface, we use this function to not pin the
- * same resource twice.
- */
-static int vmw_stdu_pin_display(struct vmw_screen_target_display_unit *stdu)
-{
-	return vmw_resource_pin(&stdu->display_srf->res, false);
-}
-
-
-
-/**
  * vmw_stdu_unpin_display - unpins the resource associated with display surface
  *
  * @stdu: contains the display surface
@@ -153,13 +134,7 @@ static void vmw_stdu_unpin_display(struct vmw_screen_target_display_unit *stdu)
 		struct vmw_resource *res = &stdu->display_srf->res;
 
 		vmw_resource_unpin(res);
-
-		if (stdu->content_fb_type != SAME_AS_DISPLAY) {
-			vmw_resource_unreference(&res);
-			stdu->content_fb_type = SAME_AS_DISPLAY;
-		}
-
-		stdu->display_srf = NULL;
+		vmw_surface_unreference(&stdu->display_srf);
 	}
 }
 
@@ -185,6 +160,9 @@ static void vmw_stdu_crtc_destroy(struct drm_crtc *crtc)
  *
  * @dev_priv:  VMW DRM device
  * @stdu: display unit to create a Screen Target for
+ * @mode: The mode to set.
+ * @crtc_x: X coordinate of screen target relative to framebuffer origin.
+ * @crtc_y: Y coordinate of screen target relative to framebuffer origin.
  *
  * Creates a STDU that we can used later.  This function is called whenever the
  * framebuffer size changes.
@@ -193,7 +171,9 @@ static void vmw_stdu_crtc_destroy(struct drm_crtc *crtc)
  * 0 on success, error code on failure
  */
 static int vmw_stdu_define_st(struct vmw_private *dev_priv,
-			      struct vmw_screen_target_display_unit *stdu)
+			      struct vmw_screen_target_display_unit *stdu,
+			      struct drm_display_mode *mode,
+			      int crtc_x, int crtc_y)
 {
 	struct {
 		SVGA3dCmdHeader header;
@@ -211,17 +191,19 @@ static int vmw_stdu_define_st(struct vmw_private *dev_priv,
 	cmd->header.size = sizeof(cmd->body);
 
 	cmd->body.stid   = stdu->base.unit;
-	cmd->body.width  = stdu->display_srf->base_size.width;
-	cmd->body.height = stdu->display_srf->base_size.height;
+	cmd->body.width  = mode->hdisplay;
+	cmd->body.height = mode->vdisplay;
 	cmd->body.flags  = (0 == cmd->body.stid) ? SVGA_STFLAG_PRIMARY : 0;
 	cmd->body.dpi    = 0;
-	cmd->body.xRoot  = stdu->base.crtc.x;
-	cmd->body.yRoot  = stdu->base.crtc.y;
-
-	if (!stdu->base.is_implicit) {
+	if (stdu->base.is_implicit) {
+		cmd->body.xRoot  = crtc_x;
+		cmd->body.yRoot  = crtc_y;
+	} else {
 		cmd->body.xRoot  = stdu->base.gui_x;
 		cmd->body.yRoot  = stdu->base.gui_y;
 	}
+	stdu->base.set_gui_x = cmd->body.xRoot;
+	stdu->base.set_gui_y = cmd->body.yRoot;
 
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
 
@@ -392,126 +374,43 @@ static int vmw_stdu_destroy_st(struct vmw_private *dev_priv,
 	return ret;
 }
 
-
-
 /**
- * vmw_stdu_crtc_set_config - Sets a mode
+ * vmw_stdu_bind_fb - Bind an fb to a defined screen target
  *
- * @set:  mode parameters
- *
- * This function is the device-specific portion of the DRM CRTC mode set.
- * For the SVGA device, we do this by defining a Screen Target, binding a
- * GB Surface to that target, and finally update the screen target.
+ * @dev_priv: Pointer to a device private struct.
+ * @crtc: The crtc holding the screen target.
+ * @mode: The mode currently used by the screen target. Must be non-NULL.
+ * @new_fb: The new framebuffer to bind. Must be non-NULL.
  *
  * RETURNS:
- * 0 on success, error code otherwise
+ * 0 on success, error code on failure.
  */
-static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
+static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
+			    struct drm_crtc *crtc,
+			    struct drm_display_mode *mode,
+			    struct drm_framebuffer *new_fb)
 {
-	struct vmw_private *dev_priv;
-	struct vmw_screen_target_display_unit *stdu;
-	struct vmw_framebuffer *vfb;
+	struct vmw_screen_target_display_unit *stdu = vmw_crtc_to_stdu(crtc);
+	struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(new_fb);
+	struct vmw_surface *new_display_srf = NULL;
+	enum stdu_content_type new_content_type;
 	struct vmw_framebuffer_surface *new_vfbs;
-	struct drm_display_mode *mode;
-	struct drm_framebuffer  *new_fb;
-	struct drm_crtc      *crtc;
-	struct drm_encoder   *encoder;
-	struct drm_connector *connector;
-	int    ret;
-
-
-	if (!set || !set->crtc)
-		return -EINVAL;
-
-	crtc     = set->crtc;
-	crtc->x  = set->x;
-	crtc->y  = set->y;
-	stdu     = vmw_crtc_to_stdu(crtc);
-	mode     = set->mode;
-	new_fb   = set->fb;
-	dev_priv = vmw_priv(crtc->dev);
-
-
-	if (set->num_connectors > 1) {
-		DRM_ERROR("Too many connectors\n");
-		return -EINVAL;
-	}
-
-	if (set->num_connectors == 1 &&
-	    set->connectors[0] != &stdu->base.connector) {
-		DRM_ERROR("Connectors don't match %p %p\n",
-			set->connectors[0], &stdu->base.connector);
-		return -EINVAL;
-	}
-
-
-	/* Since they always map one to one these are safe */
-	connector = &stdu->base.connector;
-	encoder   = &stdu->base.encoder;
-
-
-	/*
-	 * After this point the CRTC will be considered off unless a new fb
-	 * is bound
-	 */
-	if (stdu->defined) {
-		/* Unbind current surface by binding an invalid one */
-		ret = vmw_stdu_bind_st(dev_priv, stdu, NULL);
-		if (unlikely(ret != 0))
-			return ret;
-
-		/* Update Screen Target, display will now be blank */
-		if (crtc->primary->fb) {
-			vmw_stdu_update_st(dev_priv, stdu);
-			if (unlikely(ret != 0))
-				return ret;
-		}
-
-		crtc->primary->fb  = NULL;
-		crtc->enabled      = false;
-		encoder->crtc      = NULL;
-		connector->encoder = NULL;
-
-		vmw_stdu_unpin_display(stdu);
-		stdu->content_fb      = NULL;
-		stdu->content_fb_type = SAME_AS_DISPLAY;
-
-		ret = vmw_stdu_destroy_st(dev_priv, stdu);
-		/* The hardware is hung, give up */
-		if (unlikely(ret != 0))
-			return ret;
-	}
-
-
-	/* Any of these conditions means the caller wants CRTC off */
-	if (set->num_connectors == 0 || !mode || !new_fb)
-		return 0;
-
-
-	if (set->x + mode->hdisplay > new_fb->width ||
-	    set->y + mode->vdisplay > new_fb->height) {
-		DRM_ERROR("Set outside of framebuffer\n");
-		return -EINVAL;
-	}
+	int ret;
 
-	stdu->content_fb = new_fb;
-	vfb = vmw_framebuffer_to_vfb(stdu->content_fb);
+	WARN_ON_ONCE(!stdu->defined);
 
-	if (vfb->dmabuf)
-		stdu->content_fb_type = SEPARATE_DMA;
+	if (!vfb->dmabuf && new_fb->width == mode->hdisplay &&
+	    new_fb->height == mode->vdisplay)
+		new_content_type = SAME_AS_DISPLAY;
+	else if (vfb->dmabuf)
+		new_content_type = SEPARATE_DMA;
+	else
+		new_content_type = SEPARATE_SURFACE;
 
-	/*
-	 * If the requested mode is different than the width and height
-	 * of the FB or if the content buffer is a DMA buf, then allocate
-	 * a display FB that matches the dimension of the mode
-	 */
-	if (mode->hdisplay != new_fb->width  ||
-	    mode->vdisplay != new_fb->height ||
-	    stdu->content_fb_type != SAME_AS_DISPLAY) {
+	if (new_content_type != SAME_AS_DISPLAY &&
+	    !stdu->display_srf) {
 		struct vmw_surface content_srf;
 		struct drm_vmw_size display_base_size = {0};
-		struct vmw_surface *display_srf;
-
 
 		display_base_size.width  = mode->hdisplay;
 		display_base_size.height = mode->vdisplay;
@@ -521,7 +420,7 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
 		 * If content buffer is a DMA buf, then we have to construct
 		 * surface info
 		 */
-		if (stdu->content_fb_type == SEPARATE_DMA) {
+		if (new_content_type == SEPARATE_DMA) {
 
 			switch (new_fb->bits_per_pixel) {
 			case 32:
@@ -538,17 +437,13 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
 
 			default:
 				DRM_ERROR("Invalid format\n");
-				ret = -EINVAL;
-				goto err_unref_content;
+				return -EINVAL;
 			}
 
 			content_srf.flags             = 0;
 			content_srf.mip_levels[0]     = 1;
 			content_srf.multisample_count = 0;
 		} else {
-
-			stdu->content_fb_type = SEPARATE_SURFACE;
-
 			new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 			content_srf = *new_vfbs->surface;
 		}
@@ -563,26 +458,136 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
 				content_srf.multisample_count,
 				0,
 				display_base_size,
-				&display_srf);
+				&new_display_srf);
 		if (unlikely(ret != 0)) {
-			DRM_ERROR("Cannot allocate a display FB.\n");
-			goto err_unref_content;
+			DRM_ERROR("Could not allocate screen target surface.\n");
+			return ret;
 		}
-
-		stdu->display_srf = display_srf;
-	} else {
+	} else if (new_content_type == SAME_AS_DISPLAY) {
 		new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
-		stdu->display_srf = new_vfbs->surface;
+		new_display_srf = vmw_surface_reference(new_vfbs->surface);
 	}
 
+	if (new_display_srf) {
+		/* Pin new surface before flipping */
+		ret = vmw_resource_pin(&new_display_srf->res, false);
+		if (ret)
+			goto out_srf_unref;
+
+		ret = vmw_stdu_bind_st(dev_priv, stdu, &new_display_srf->res);
+		if (ret)
+			goto out_srf_unpin;
+
+		/* Unpin and unreference old surface */
+		vmw_stdu_unpin_display(stdu);
 
-	ret = vmw_stdu_pin_display(stdu);
-	if (unlikely(ret != 0)) {
-		stdu->display_srf = NULL;
-		goto err_unref_content;
+		/* Transfer the reference */
+		stdu->display_srf = new_display_srf;
+		new_display_srf = NULL;
 	}
 
-	vmw_svga_enable(dev_priv);
+	crtc->primary->fb = new_fb;
+	stdu->content_fb_type = new_content_type;
+	return 0;
+
+out_srf_unpin:
+	vmw_resource_unpin(&new_display_srf->res);
+out_srf_unref:
+	vmw_surface_unreference(&new_display_srf);
+	return ret;
+}
+
+/**
+ * vmw_stdu_crtc_set_config - Sets a mode
+ *
+ * @set:  mode parameters
+ *
+ * This function is the device-specific portion of the DRM CRTC mode set.
+ * For the SVGA device, we do this by defining a Screen Target, binding a
+ * GB Surface to that target, and finally update the screen target.
+ *
+ * RETURNS:
+ * 0 on success, error code otherwise
+ */
+static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
+{
+	struct vmw_private *dev_priv;
+	struct vmw_framebuffer *vfb;
+	struct vmw_screen_target_display_unit *stdu;
+	struct drm_display_mode *mode;
+	struct drm_framebuffer  *new_fb;
+	struct drm_crtc      *crtc;
+	struct drm_encoder   *encoder;
+	struct drm_connector *connector;
+	bool turning_off;
+	int    ret;
+
+
+	if (!set || !set->crtc)
+		return -EINVAL;
+
+	crtc     = set->crtc;
+	stdu     = vmw_crtc_to_stdu(crtc);
+	mode     = set->mode;
+	new_fb   = set->fb;
+	dev_priv = vmw_priv(crtc->dev);
+	turning_off = set->num_connectors == 0 || !mode || !new_fb;
+	vfb = (new_fb) ? vmw_framebuffer_to_vfb(new_fb) : NULL;
+
+	if (set->num_connectors > 1) {
+		DRM_ERROR("Too many connectors\n");
+		return -EINVAL;
+	}
+
+	if (set->num_connectors == 1 &&
+	    set->connectors[0] != &stdu->base.connector) {
+		DRM_ERROR("Connectors don't match %p %p\n",
+			set->connectors[0], &stdu->base.connector);
+		return -EINVAL;
+	}
+
+	if (!turning_off && (set->x + mode->hdisplay > new_fb->width ||
+			     set->y + mode->vdisplay > new_fb->height)) {
+		DRM_ERROR("Set outside of framebuffer\n");
+		return -EINVAL;
+	}
+
+	/* Only one active implicit frame-buffer at a time. */
+	if (!turning_off && stdu->base.is_implicit && dev_priv->implicit_fb &&
+	    !(dev_priv->num_implicit == 1 && stdu->base.active_implicit)
+	    && dev_priv->implicit_fb != vfb) {
+		DRM_ERROR("Multiple implicit framebuffers not supported.\n");
+		return -EINVAL;
+	}
+
+	/* Since they always map one to one these are safe */
+	connector = &stdu->base.connector;
+	encoder   = &stdu->base.encoder;
+
+	if (stdu->defined) {
+		ret = vmw_stdu_bind_st(dev_priv, stdu, NULL);
+		if (ret)
+			return ret;
+
+		vmw_stdu_unpin_display(stdu);
+		(void) vmw_stdu_update_st(dev_priv, stdu);
+		vmw_kms_del_active(dev_priv, &stdu->base);
+
+		ret = vmw_stdu_destroy_st(dev_priv, stdu);
+		if (ret)
+			return ret;
+
+		crtc->primary->fb = NULL;
+		crtc->enabled = false;
+		encoder->crtc = NULL;
+		connector->encoder = NULL;
+		stdu->content_fb_type = SAME_AS_DISPLAY;
+		crtc->x = set->x;
+		crtc->y = set->y;
+	}
+
+	if (turning_off)
+		return 0;
 
 	/*
 	 * Steps to displaying a surface, assume surface is already
@@ -592,35 +597,33 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
 	 *   3.  update that screen target (this is done later by
 	 *       vmw_kms_stdu_do_surface_dirty_or_present)
 	 */
-	ret = vmw_stdu_define_st(dev_priv, stdu);
-	if (unlikely(ret != 0))
-		goto err_unpin_display_and_content;
+	/*
+	 * Note on error handling: We can't really restore the crtc to
+	 * it's original state on error, but we at least update the
+	 * current state to what's submitted to hardware to enable
+	 * future recovery.
+	 */
+	vmw_svga_enable(dev_priv);
+	ret = vmw_stdu_define_st(dev_priv, stdu, mode, set->x, set->y);
+	if (ret)
+		return ret;
 
-	ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res);
-	if (unlikely(ret != 0))
-		goto err_unpin_destroy_st;
+	crtc->x = set->x;
+	crtc->y = set->y;
+	crtc->mode = *mode;
 
+	ret = vmw_stdu_bind_fb(dev_priv, crtc, mode, new_fb);
+	if (ret)
+		return ret;
 
+	vmw_kms_add_active(dev_priv, &stdu->base, vfb);
+	crtc->enabled = true;
 	connector->encoder = encoder;
 	encoder->crtc      = crtc;
 
-	crtc->mode    = *mode;
-	crtc->primary->fb = new_fb;
-	crtc->enabled = true;
-
-	return ret;
-
-err_unpin_destroy_st:
-	vmw_stdu_destroy_st(dev_priv, stdu);
-err_unpin_display_and_content:
-	vmw_stdu_unpin_display(stdu);
-err_unref_content:
-	stdu->content_fb = NULL;
-	return ret;
+	return 0;
 }
 
-
-
 /**
  * vmw_stdu_crtc_page_flip - Binds a buffer to a screen target
  *
@@ -648,59 +651,34 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc,
 {
 	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
 	struct vmw_screen_target_display_unit *stdu;
+	struct drm_vmw_rect vclips;
+	struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(new_fb);
 	int ret;
 
-	if (crtc == NULL)
-		return -EINVAL;
-
 	dev_priv          = vmw_priv(crtc->dev);
 	stdu              = vmw_crtc_to_stdu(crtc);
-	crtc->primary->fb = new_fb;
-	stdu->content_fb  = new_fb;
-
-	if (stdu->display_srf) {
-		/*
-		 * If the display surface is the same as the content surface
-		 * then remove the reference
-		 */
-		if (stdu->content_fb_type == SAME_AS_DISPLAY) {
-			if (stdu->defined) {
-				/* Unbind the current surface */
-				ret = vmw_stdu_bind_st(dev_priv, stdu, NULL);
-				if (unlikely(ret != 0))
-					goto err_out;
-			}
-			vmw_stdu_unpin_display(stdu);
-			stdu->display_srf = NULL;
-		}
-	}
-
-
-	if (!new_fb) {
-		/* Blanks the display */
-		(void) vmw_stdu_update_st(dev_priv, stdu);
-
-		return 0;
-	}
 
+	if (!stdu->defined || !vmw_kms_crtc_flippable(dev_priv, crtc))
+		return -EINVAL;
 
-	if (stdu->content_fb_type == SAME_AS_DISPLAY) {
-		stdu->display_srf = vmw_framebuffer_to_vfbs(new_fb)->surface;
-		ret = vmw_stdu_pin_display(stdu);
-		if (ret) {
-			stdu->display_srf = NULL;
-			goto err_out;
-		}
+	ret = vmw_stdu_bind_fb(dev_priv, crtc, &crtc->mode, new_fb);
+	if (ret)
+		return ret;
 
-		/* Bind display surface */
-		ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res);
-		if (unlikely(ret != 0))
-			goto err_unpin_display_and_content;
-	}
+	if (stdu->base.is_implicit)
+		vmw_kms_update_implicit_fb(dev_priv, crtc);
 
-	/* Update display surface: after this point everything is bound */
-	ret = vmw_stdu_update_st(dev_priv, stdu);
-	if (unlikely(ret != 0))
+	vclips.x = crtc->x;
+	vclips.y = crtc->y;
+	vclips.w = crtc->mode.hdisplay;
+	vclips.h = crtc->mode.vdisplay;
+	if (vfb->dmabuf)
+		ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL, &vclips,
+				       1, 1, true, false);
+	else
+		ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, &vclips,
+						 NULL, 0, 0, 1, 1, NULL);
+	if (ret)
 		return ret;
 
 	if (event) {
@@ -721,14 +699,7 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc,
 		vmw_fifo_flush(dev_priv, false);
 	}
 
-	return ret;
-
-err_unpin_display_and_content:
-	vmw_stdu_unpin_display(stdu);
-err_out:
-	crtc->primary->fb = NULL;
-	stdu->content_fb = NULL;
-	return ret;
+	return 0;
 }
 
 
@@ -1138,7 +1109,7 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit)
 	stdu->base.pref_active = (unit == 0);
 	stdu->base.pref_width  = dev_priv->initial_width;
 	stdu->base.pref_height = dev_priv->initial_height;
-	stdu->base.is_implicit = true;
+	stdu->base.is_implicit = false;
 
 	drm_connector_init(dev, connector, &vmw_stdu_connector_funcs,
 			   DRM_MODE_CONNECTOR_VIRTUAL);
@@ -1159,7 +1130,17 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit)
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.dirty_info_property,
 				   1);
-
+	drm_object_attach_property(&connector->base,
+				   dev_priv->hotplug_mode_update_property, 1);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_x_property, 0);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_y_property, 0);
+	if (dev_priv->implicit_placement_property)
+		drm_object_attach_property
+			(&connector->base,
+			 dev_priv->implicit_placement_property,
+			 stdu->base.is_implicit);
 	return 0;
 }
 
@@ -1224,6 +1205,8 @@ int vmw_kms_stdu_init_display(struct vmw_private *dev_priv)
 
 	dev_priv->active_display_unit = vmw_du_screen_target;
 
+	vmw_kms_create_implicit_placement_property(dev_priv, false);
+
 	for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i) {
 		ret = vmw_stdu_init(dev_priv, i);
author	Daniel Vetter <daniel.vetter@ffwll.ch>	2016-04-11 19:21:06 +0200
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2016-04-11 19:25:13 +0200
commit	39702853197b191bda32315260255053aa3e57f7 (patch)
tree	00185427bd7c5e6a335c9ea99ed7ee65b9ceaa9c /drivers/gpu/drm
parent	fb8621d3bee88badeb25dccce0fb59ad145dba9e (diff)
parent	bf16200689118d19de1b8d2a3c314fc21f5dc7bb (diff)
download	linux-39702853197b191bda32315260255053aa3e57f7.tar.gz linux-39702853197b191bda32315260255053aa3e57f7.tar.bz2 linux-39702853197b191bda32315260255053aa3e57f7.zip