97 files changed, 3741 insertions, 2055 deletions
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 54060cd6c419..4e59db1cfb00 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -246,6 +246,18 @@ Display State Buffer
 .. kernel-doc:: drivers/gpu/drm/i915/display/intel_dsb.c
    :internal:
 
+GT Programming
+==============
+
+Multicast/Replicated (MCR) Registers
+------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+   :doc: GT Multicast/Replicated (MCR) Register Support
+
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+   :internal:
+
 Memory Management and Command Submission
 ========================================
 
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 79a1b65527c2..fe7e2105e766 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -744,7 +744,7 @@ static void i830_write_entry(dma_addr_t addr, unsigned int entry,
 	writel_relaxed(addr | pte_flags, intel_private.gtt + entry);
 }
 
-bool intel_enable_gtt(void)
+bool intel_gmch_enable_gtt(void)
 {
 	u8 __iomem *reg;
 
@@ -787,7 +787,7 @@ bool intel_enable_gtt(void)
 
 	return true;
 }
-EXPORT_SYMBOL(intel_enable_gtt);
+EXPORT_SYMBOL(intel_gmch_enable_gtt);
 
 static int i830_setup(void)
 {
@@ -821,8 +821,8 @@ static int intel_fake_agp_free_gatt_table(struct agp_bridge_data *bridge)
 
 static int intel_fake_agp_configure(void)
 {
-	if (!intel_enable_gtt())
-	    return -EIO;
+	if (!intel_gmch_enable_gtt())
+		return -EIO;
 
 	intel_private.clear_fake_agp = true;
 	agp_bridge->gart_bus_addr = intel_private.gma_bus_addr;
@@ -844,20 +844,20 @@ static bool i830_check_flags(unsigned int flags)
 	return false;
 }
 
-void intel_gtt_insert_page(dma_addr_t addr,
-			   unsigned int pg,
-			   unsigned int flags)
+void intel_gmch_gtt_insert_page(dma_addr_t addr,
+				unsigned int pg,
+				unsigned int flags)
 {
 	intel_private.driver->write_entry(addr, pg, flags);
 	readl(intel_private.gtt + pg);
 	if (intel_private.driver->chipset_flush)
 		intel_private.driver->chipset_flush();
 }
-EXPORT_SYMBOL(intel_gtt_insert_page);
+EXPORT_SYMBOL(intel_gmch_gtt_insert_page);
 
-void intel_gtt_insert_sg_entries(struct sg_table *st,
-				 unsigned int pg_start,
-				 unsigned int flags)
+void intel_gmch_gtt_insert_sg_entries(struct sg_table *st,
+				      unsigned int pg_start,
+				      unsigned int flags)
 {
 	struct scatterlist *sg;
 	unsigned int len, m;
@@ -879,13 +879,13 @@ void intel_gtt_insert_sg_entries(struct sg_table *st,
 	if (intel_private.driver->chipset_flush)
 		intel_private.driver->chipset_flush();
 }
-EXPORT_SYMBOL(intel_gtt_insert_sg_entries);
+EXPORT_SYMBOL(intel_gmch_gtt_insert_sg_entries);
 
 #if IS_ENABLED(CONFIG_AGP_INTEL)
-static void intel_gtt_insert_pages(unsigned int first_entry,
-				   unsigned int num_entries,
-				   struct page **pages,
-				   unsigned int flags)
+static void intel_gmch_gtt_insert_pages(unsigned int first_entry,
+					unsigned int num_entries,
+					struct page **pages,
+					unsigned int flags)
 {
 	int i, j;
 
@@ -905,7 +905,7 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 	if (intel_private.clear_fake_agp) {
 		int start = intel_private.stolen_size / PAGE_SIZE;
 		int end = intel_private.gtt_mappable_entries;
-		intel_gtt_clear_range(start, end - start);
+		intel_gmch_gtt_clear_range(start, end - start);
 		intel_private.clear_fake_agp = false;
 	}
 
@@ -934,12 +934,12 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 		if (ret != 0)
 			return ret;
 
-		intel_gtt_insert_sg_entries(&st, pg_start, type);
+		intel_gmch_gtt_insert_sg_entries(&st, pg_start, type);
 		mem->sg_list = st.sgl;
 		mem->num_sg = st.nents;
 	} else
-		intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages,
-				       type);
+		intel_gmch_gtt_insert_pages(pg_start, mem->page_count, mem->pages,
+					    type);
 
 out:
 	ret = 0;
@@ -949,7 +949,7 @@ out_err:
 }
 #endif
 
-void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries)
+void intel_gmch_gtt_clear_range(unsigned int first_entry, unsigned int num_entries)
 {
 	unsigned int i;
 
@@ -959,7 +959,7 @@ void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries)
 	}
 	wmb();
 }
-EXPORT_SYMBOL(intel_gtt_clear_range);
+EXPORT_SYMBOL(intel_gmch_gtt_clear_range);
 
 #if IS_ENABLED(CONFIG_AGP_INTEL)
 static int intel_fake_agp_remove_entries(struct agp_memory *mem,
@@ -968,7 +968,7 @@ static int intel_fake_agp_remove_entries(struct agp_memory *mem,
 	if (mem->page_count == 0)
 		return 0;
 
-	intel_gtt_clear_range(pg_start, mem->page_count);
+	intel_gmch_gtt_clear_range(pg_start, mem->page_count);
 
 	if (intel_private.needs_dmar) {
 		intel_gtt_unmap_memory(mem->sg_list, mem->num_sg);
@@ -1431,22 +1431,22 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 }
 EXPORT_SYMBOL(intel_gmch_probe);
 
-void intel_gtt_get(u64 *gtt_total,
-		   phys_addr_t *mappable_base,
-		   resource_size_t *mappable_end)
+void intel_gmch_gtt_get(u64 *gtt_total,
+			phys_addr_t *mappable_base,
+			resource_size_t *mappable_end)
 {
 	*gtt_total = intel_private.gtt_total_entries << PAGE_SHIFT;
 	*mappable_base = intel_private.gma_bus_addr;
 	*mappable_end = intel_private.gtt_mappable_entries << PAGE_SHIFT;
 }
-EXPORT_SYMBOL(intel_gtt_get);
+EXPORT_SYMBOL(intel_gmch_gtt_get);
 
-void intel_gtt_chipset_flush(void)
+void intel_gmch_gtt_flush(void)
 {
 	if (intel_private.driver->chipset_flush)
 		intel_private.driver->chipset_flush();
 }
-EXPORT_SYMBOL(intel_gtt_chipset_flush);
+EXPORT_SYMBOL(intel_gmch_gtt_flush);
 
 void intel_gmch_remove(void)
 {
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c84a9cd8440d..522ef9b4aff3 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -103,6 +103,7 @@ gt-y += \
 	gt/intel_gt_debugfs.o \
 	gt/intel_gt_engines_debugfs.o \
 	gt/intel_gt_irq.o \
+	gt/intel_gt_mcr.o \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_debugfs.o \
 	gt/intel_gt_pm_irq.o \
@@ -129,7 +130,7 @@ gt-y += \
 	gt/shmem_utils.o \
 	gt/sysfs_engines.o
 # x86 intel-gtt module support
-gt-$(CONFIG_X86) += gt/intel_gt_gmch.o
+gt-$(CONFIG_X86) += gt/intel_ggtt_gmch.o
 # autogenerated null render state
 gt-y += \
 	gt/gen6_renderstate.o \
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index fb0e7e79e0cd..ac587647e1f5 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -4,6 +4,7 @@
  */
 
 #include "gem/i915_gem_domain.h"
+#include "gem/i915_gem_internal.h"
 #include "gt/gen8_ppgtt.h"
 
 #include "i915_drv.h"
@@ -127,8 +128,12 @@ struct i915_vma *intel_dpt_pin(struct i915_address_space *vm)
 	struct i915_vma *vma;
 	void __iomem *iomem;
 	struct i915_gem_ww_ctx ww;
+	u64 pin_flags = 0;
 	int err;
 
+	if (i915_gem_object_is_stolen(dpt->obj))
+		pin_flags |= PIN_MAPPABLE;
+
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	atomic_inc(&i915->gpu_error.pending_fb_pin);
 
@@ -138,7 +143,7 @@ struct i915_vma *intel_dpt_pin(struct i915_address_space *vm)
 			continue;
 
 		vma = i915_gem_object_ggtt_pin_ww(dpt->obj, &ww, NULL, 0, 4096,
-						  HAS_LMEM(i915) ? 0 : PIN_MAPPABLE);
+						  pin_flags);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
 			continue;
@@ -248,10 +253,13 @@ intel_dpt_create(struct intel_framebuffer *fb)
 
 	size = round_up(size * sizeof(gen8_pte_t), I915_GTT_PAGE_SIZE);
 
-	if (HAS_LMEM(i915))
-		dpt_obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_CONTIGUOUS);
-	else
+	dpt_obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_CONTIGUOUS);
+	if (IS_ERR(dpt_obj) && i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		dpt_obj = i915_gem_object_create_stolen(i915, size);
+	if (IS_ERR(dpt_obj) && !HAS_LMEM(i915)) {
+		drm_dbg_kms(&i915->drm, "Allocating dpt from smem\n");
+		dpt_obj = i915_gem_object_create_internal(i915, size);
+	}
 	if (IS_ERR(dpt_obj))
 		return ERR_CAST(dpt_obj);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index ab4c5ab28e4d..dabdfe09f5e5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -933,8 +933,9 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
 	case I915_CONTEXT_PARAM_PERSISTENCE:
 		if (args->size)
 			ret = -EINVAL;
-		ret = proto_context_set_persistence(fpriv->dev_priv, pc,
-						    args->value);
+		else
+			ret = proto_context_set_persistence(fpriv->dev_priv, pc,
+							    args->value);
 		break;
 
 	case I915_CONTEXT_PARAM_PROTECTED_CONTENT:
@@ -1367,7 +1368,8 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_engines(struct i915_gem_engines *engines, bool ban)
+static void
+kill_engines(struct i915_gem_engines *engines, bool exit, bool persistent)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -1381,9 +1383,15 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 	 */
 	for_each_gem_engine(ce, engines, it) {
 		struct intel_engine_cs *engine;
+		bool skip = false;
 
-		if (ban && intel_context_ban(ce, NULL))
-			continue;
+		if (exit)
+			skip = intel_context_set_exiting(ce);
+		else if (!persistent)
+			skip = intel_context_exit_nonpersistent(ce, NULL);
+
+		if (skip)
+			continue; /* Already marked. */
 
 		/*
 		 * Check the current active state of this context; if we
@@ -1395,7 +1403,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && (exit || !persistent))
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
@@ -1407,8 +1415,6 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 
 static void kill_context(struct i915_gem_context *ctx)
 {
-	bool ban = (!i915_gem_context_is_persistent(ctx) ||
-		    !ctx->i915->params.enable_hangcheck);
 	struct i915_gem_engines *pos, *next;
 
 	spin_lock_irq(&ctx->stale.lock);
@@ -1421,7 +1427,8 @@ static void kill_context(struct i915_gem_context *ctx)
 
 		spin_unlock_irq(&ctx->stale.lock);
 
-		kill_engines(pos, ban);
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
 
 		spin_lock_irq(&ctx->stale.lock);
 		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
@@ -1467,7 +1474,8 @@ static void engines_idle_release(struct i915_gem_context *ctx,
 
 kill:
 	if (list_empty(&engines->link)) /* raced, already closed */
-		kill_engines(engines, true);
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
 
 	i915_sw_fence_commit(&engines->fence);
 }
@@ -1875,6 +1883,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
 {
 	const struct sseu_dev_info *device = &gt->info.sseu;
 	struct drm_i915_private *i915 = gt->i915;
+	unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 0);
 
 	/* No zeros in any field. */
 	if (!user->slice_mask || !user->subslice_mask ||
@@ -1901,7 +1910,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
 	if (user->slice_mask & ~device->slice_mask)
 		return -EINVAL;
 
-	if (user->subslice_mask & ~device->subslice_mask[0])
+	if (user->subslice_mask & ~dev_subslice_mask)
 		return -EINVAL;
 
 	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
@@ -1915,7 +1924,7 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
 	/* Part specific restrictions. */
 	if (GRAPHICS_VER(i915) == 11) {
 		unsigned int hw_s = hweight8(device->slice_mask);
-		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+		unsigned int hw_ss_per_s = hweight8(dev_subslice_mask);
 		unsigned int req_s = hweight8(context->slice_mask);
 		unsigned int req_ss = hweight8(context->subslice_mask);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 3e5d6057b3ef..1674b0c5802b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -35,12 +35,12 @@ bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 	if (obj->cache_dirty)
 		return false;
 
-	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
-		return true;
-
 	if (IS_DGFX(i915))
 		return false;
 
+	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+		return true;
+
 	/* Currently in use by HW (display engine)? Keep flushed. */
 	return i915_gem_object_is_framebuffer(obj);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c326bd2b444f..30fe847c6664 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -999,7 +999,8 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
 			}
 		}
 
-		err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
+		/* Reserve enough slots to accommodate composite fences */
+		err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
 		if (err)
 			return err;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 2e16e91a5a56..4eed3dd90ba8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -670,17 +670,10 @@ fail:
 
 static int init_shmem(struct intel_memory_region *mem)
 {
-	int err;
-
-	err = i915_gemfs_init(mem->i915);
-	if (err) {
-		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n",
-			 err);
-	}
-
+	i915_gemfs_init(mem->i915);
 	intel_memory_region_set_name(mem, "system");
 
-	return 0; /* Don't error, we can simply fallback to the kernel mnt */
+	return 0; /* We have fallback to the kernel mnt if gemfs init failed. */
 }
 
 static int release_shmem(struct intel_memory_region *mem)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 6a6ff98a8746..1030053571a2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -36,7 +36,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
 }
 
-static int drop_pages(struct drm_i915_gem_object *obj,
+static bool drop_pages(struct drm_i915_gem_object *obj,
 		       unsigned long shrink, bool trylock_vm)
 {
 	unsigned long flags;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 47b5e0e342ab..166d0a4b9e8c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -13,6 +13,8 @@
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
+#include "gt/intel_gt_regs.h"
 #include "gt/intel_region_lmem.h"
 #include "i915_drv.h"
 #include "i915_gem_stolen.h"
@@ -834,8 +836,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 	} else {
 		resource_size_t lmem_range;
 
-		lmem_range = intel_gt_read_register(&i915->gt0, XEHPSDV_TILE0_ADDR_RANGE) & 0xFFFF;
-		lmem_size = lmem_range >> XEHPSDV_TILE_LMEM_RANGE_SHIFT;
+		lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
+		lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT;
 		lmem_size *= SZ_1G;
 	}
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 80ac0db1ae8c..85518b28cd72 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -114,7 +114,7 @@ u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size,
 	return i915_gem_fence_size(i915, size, tiling, stride);
 }
 
-/* Check pitch constriants for all chips & tiling formats */
+/* Check pitch constraints for all chips & tiling formats */
 static bool
 i915_tiling_ok(struct drm_i915_gem_object *obj,
 	       unsigned int tiling, unsigned int stride)
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index ee87874e59dc..46b9a17d6abc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -11,16 +11,11 @@
 #include "i915_gemfs.h"
 #include "i915_utils.h"
 
-int i915_gemfs_init(struct drm_i915_private *i915)
+void i915_gemfs_init(struct drm_i915_private *i915)
 {
 	char huge_opt[] = "huge=within_size"; /* r/w */
 	struct file_system_type *type;
 	struct vfsmount *gemfs;
-	char *opts;
-
-	type = get_fs_type("tmpfs");
-	if (!type)
-		return -ENODEV;
 
 	/*
 	 * By creating our own shmemfs mountpoint, we can pass in
@@ -28,30 +23,35 @@ int i915_gemfs_init(struct drm_i915_private *i915)
 	 *
 	 * One example, although it is probably better with a per-file
 	 * control, is selecting huge page allocations ("huge=within_size").
-	 * However, we only do so to offset the overhead of iommu lookups
-	 * due to bandwidth issues (slow reads) on Broadwell+.
+	 * However, we only do so on platforms which benefit from it, or to
+	 * offset the overhead of iommu lookups, where with latter it is a net
+	 * win even on platforms which would otherwise see some performance
+	 * regressions such a slow reads issue on Broadwell and Skylake.
 	 */
 
-	opts = NULL;
-	if (i915_vtd_active(i915)) {
-		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-			opts = huge_opt;
-			drm_info(&i915->drm,
-				 "Transparent Hugepage mode '%s'\n",
-				 opts);
-		} else {
-			drm_notice(&i915->drm,
-				   "Transparent Hugepage support is recommended for optimal performance when IOMMU is enabled!\n");
-		}
-	}
-
-	gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, opts);
+	if (GRAPHICS_VER(i915) < 11 && !i915_vtd_active(i915))
+		return;
+
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		goto err;
+
+	type = get_fs_type("tmpfs");
+	if (!type)
+		goto err;
+
+	gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
 	if (IS_ERR(gemfs))
-		return PTR_ERR(gemfs);
+		goto err;
 
 	i915->mm.gemfs = gemfs;
-
-	return 0;
+	drm_info(&i915->drm, "Using Transparent Hugepages\n");
+	return;
+
+err:
+	drm_notice(&i915->drm,
+		   "Transparent Hugepage support is recommended for optimal performance%s\n",
+		   GRAPHICS_VER(i915) >= 11 ? " on this platform!" :
+					      " when IOMMU is enabled!");
 }
 
 void i915_gemfs_fini(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h
index 2a1e59af3e4a..5d835e44c4f6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gemfs.h
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h
@@ -9,8 +9,7 @@
 
 struct drm_i915_private;
 
-int i915_gemfs_init(struct drm_i915_private *i915);
-
+void i915_gemfs_init(struct drm_i915_private *i915);
 void i915_gemfs_fini(struct drm_i915_private *i915);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index ddd0772fd828..3cfc621ef363 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -6,6 +6,7 @@
 #include "i915_selftest.h"
 
 #include "gt/intel_context.h"
+#include "gt/intel_engine_regs.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
@@ -18,10 +19,71 @@
 #include "huge_gem_object.h"
 #include "mock_context.h"
 
+#define OW_SIZE 16                      /* in bytes */
+#define F_SUBTILE_SIZE 64               /* in bytes */
+#define F_TILE_WIDTH 128                /* in bytes */
+#define F_TILE_HEIGHT 32                /* in pixels */
+#define F_SUBTILE_WIDTH  OW_SIZE        /* in bytes */
+#define F_SUBTILE_HEIGHT 4              /* in pixels */
+
+static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp)
+{
+	int tile_base;
+	int tile_x, tile_y;
+	int swizzle, subtile;
+	int pixel_size = bpp / 8;
+	int pos;
+
+	/*
+	 * Subtile remapping for F tile. Note that map[a]==b implies map[b]==a
+	 * so we can use the same table to tile and until.
+	 */
+	static const u8 f_subtile_map[] = {
+		 0,  1,  2,  3,  8,  9, 10, 11,
+		 4,  5,  6,  7, 12, 13, 14, 15,
+		16, 17, 18, 19, 24, 25, 26, 27,
+		20, 21, 22, 23, 28, 29, 30, 31,
+		32, 33, 34, 35, 40, 41, 42, 43,
+		36, 37, 38, 39, 44, 45, 46, 47,
+		48, 49, 50, 51, 56, 57, 58, 59,
+		52, 53, 54, 55, 60, 61, 62, 63
+	};
+
+	x *= pixel_size;
+	/*
+	 * Where does the 4k tile start (in bytes)?  This is the same for Y and
+	 * F so we can use the Y-tile algorithm to get to that point.
+	 */
+	tile_base =
+		y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT +
+		x / F_TILE_WIDTH * 4096;
+
+	/* Find pixel within tile */
+	tile_x = x % F_TILE_WIDTH;
+	tile_y = y % F_TILE_HEIGHT;
+
+	/* And figure out the subtile within the 4k tile */
+	subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH;
+
+	/* Swizzle the subtile number according to the bspec diagram */
+	swizzle = f_subtile_map[subtile];
+
+	/* Calculate new position */
+	pos = tile_base +
+		swizzle * F_SUBTILE_SIZE +
+		tile_y % F_SUBTILE_HEIGHT * OW_SIZE +
+		tile_x % F_SUBTILE_WIDTH;
+
+	GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size));
+
+	return pos / pixel_size * 4;
+}
+
 enum client_tiling {
 	CLIENT_TILING_LINEAR,
 	CLIENT_TILING_X,
 	CLIENT_TILING_Y,
+	CLIENT_TILING_4,
 	CLIENT_NUM_TILING_TYPES
 };
 
@@ -45,6 +107,36 @@ struct tiled_blits {
 	u32 height;
 };
 
+static bool supports_x_tiling(const struct drm_i915_private *i915)
+{
+	int gen = GRAPHICS_VER(i915);
+
+	if (gen < 12)
+		return true;
+
+	if (!HAS_LMEM(i915) || IS_DG1(i915))
+		return false;
+
+	return true;
+}
+
+static bool fast_blit_ok(const struct blit_buffer *buf)
+{
+	int gen = GRAPHICS_VER(buf->vma->vm->i915);
+
+	if (gen < 9)
+		return false;
+
+	if (gen < 12)
+		return true;
+
+	/* filter out platforms with unsupported X-tile support in fastblit */
+	if (buf->tiling == CLIENT_TILING_X && !supports_x_tiling(buf->vma->vm->i915))
+		return false;
+
+	return true;
+}
+
 static int prepare_blit(const struct tiled_blits *t,
 			struct blit_buffer *dst,
 			struct blit_buffer *src,
@@ -59,51 +151,103 @@ static int prepare_blit(const struct tiled_blits *t,
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
-	cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
-	if (src->tiling == CLIENT_TILING_Y)
-		cmd |= BCS_SRC_Y;
-	if (dst->tiling == CLIENT_TILING_Y)
-		cmd |= BCS_DST_Y;
-	*cs++ = cmd;
-
-	cmd = MI_FLUSH_DW;
-	if (ver >= 8)
-		cmd++;
-	*cs++ = cmd;
-	*cs++ = 0;
-	*cs++ = 0;
-	*cs++ = 0;
-
-	cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
-	if (ver >= 8)
-		cmd += 2;
-
-	src_pitch = t->width * 4;
-	if (src->tiling) {
-		cmd |= XY_SRC_COPY_BLT_SRC_TILED;
-		src_pitch /= 4;
-	}
+	if (fast_blit_ok(dst) && fast_blit_ok(src)) {
+		struct intel_gt *gt = t->ce->engine->gt;
+		u32 src_tiles = 0, dst_tiles = 0;
+		u32 src_4t = 0, dst_4t = 0;
+
+		/* Need to program BLIT_CCTL if it is not done previously
+		 * before using XY_FAST_COPY_BLT
+		 */
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = i915_mmio_reg_offset(BLIT_CCTL(t->ce->engine->mmio_base));
+		*cs++ = (BLIT_CCTL_SRC_MOCS(gt->mocs.uc_index) |
+			 BLIT_CCTL_DST_MOCS(gt->mocs.uc_index));
+
+		src_pitch = t->width; /* in dwords */
+		if (src->tiling == CLIENT_TILING_4) {
+			src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(YMAJOR);
+			src_4t = XY_FAST_COPY_BLT_D1_SRC_TILE4;
+		} else if (src->tiling == CLIENT_TILING_Y) {
+			src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(YMAJOR);
+		} else if (src->tiling == CLIENT_TILING_X) {
+			src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(TILE_X);
+		} else {
+			src_pitch *= 4; /* in bytes */
+		}
 
-	dst_pitch = t->width * 4;
-	if (dst->tiling) {
-		cmd |= XY_SRC_COPY_BLT_DST_TILED;
-		dst_pitch /= 4;
-	}
+		dst_pitch = t->width; /* in dwords */
+		if (dst->tiling == CLIENT_TILING_4) {
+			dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(YMAJOR);
+			dst_4t = XY_FAST_COPY_BLT_D1_DST_TILE4;
+		} else if (dst->tiling == CLIENT_TILING_Y) {
+			dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(YMAJOR);
+		} else if (dst->tiling == CLIENT_TILING_X) {
+			dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(TILE_X);
+		} else {
+			dst_pitch *= 4; /* in bytes */
+		}
 
-	*cs++ = cmd;
-	*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch;
-	*cs++ = 0;
-	*cs++ = t->height << 16 | t->width;
-	*cs++ = lower_32_bits(dst->vma->node.start);
-	if (use_64b_reloc)
+		*cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2) |
+			src_tiles | dst_tiles;
+		*cs++ = src_4t | dst_4t | BLT_DEPTH_32 | dst_pitch;
+		*cs++ = 0;
+		*cs++ = t->height << 16 | t->width;
+		*cs++ = lower_32_bits(dst->vma->node.start);
 		*cs++ = upper_32_bits(dst->vma->node.start);
-	*cs++ = 0;
-	*cs++ = src_pitch;
-	*cs++ = lower_32_bits(src->vma->node.start);
-	if (use_64b_reloc)
+		*cs++ = 0;
+		*cs++ = src_pitch;
+		*cs++ = lower_32_bits(src->vma->node.start);
 		*cs++ = upper_32_bits(src->vma->node.start);
+	} else {
+		if (ver >= 6) {
+			*cs++ = MI_LOAD_REGISTER_IMM(1);
+			*cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
+			cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
+			if (src->tiling == CLIENT_TILING_Y)
+				cmd |= BCS_SRC_Y;
+			if (dst->tiling == CLIENT_TILING_Y)
+				cmd |= BCS_DST_Y;
+			*cs++ = cmd;
+
+			cmd = MI_FLUSH_DW;
+			if (ver >= 8)
+				cmd++;
+			*cs++ = cmd;
+			*cs++ = 0;
+			*cs++ = 0;
+			*cs++ = 0;
+		}
+
+		cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
+		if (ver >= 8)
+			cmd += 2;
+
+		src_pitch = t->width * 4;
+		if (src->tiling) {
+			cmd |= XY_SRC_COPY_BLT_SRC_TILED;
+			src_pitch /= 4;
+		}
+
+		dst_pitch = t->width * 4;
+		if (dst->tiling) {
+			cmd |= XY_SRC_COPY_BLT_DST_TILED;
+			dst_pitch /= 4;
+		}
+
+		*cs++ = cmd;
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch;
+		*cs++ = 0;
+		*cs++ = t->height << 16 | t->width;
+		*cs++ = lower_32_bits(dst->vma->node.start);
+		if (use_64b_reloc)
+			*cs++ = upper_32_bits(dst->vma->node.start);
+		*cs++ = 0;
+		*cs++ = src_pitch;
+		*cs++ = lower_32_bits(src->vma->node.start);
+		if (use_64b_reloc)
+			*cs++ = upper_32_bits(src->vma->node.start);
+	}
 
 	*cs++ = MI_BATCH_BUFFER_END;
 
@@ -181,7 +325,13 @@ static int tiled_blits_create_buffers(struct tiled_blits *t,
 
 		t->buffers[i].vma = vma;
 		t->buffers[i].tiling =
-			i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng);
+			i915_prandom_u32_max_state(CLIENT_NUM_TILING_TYPES, prng);
+
+		/* Platforms support either TileY or Tile4, not both */
+		if (HAS_4TILE(i915) && t->buffers[i].tiling == CLIENT_TILING_Y)
+			t->buffers[i].tiling = CLIENT_TILING_4;
+		else if (!HAS_4TILE(i915) && t->buffers[i].tiling == CLIENT_TILING_4)
+			t->buffers[i].tiling = CLIENT_TILING_Y;
 	}
 
 	return 0;
@@ -206,7 +356,8 @@ static u64 swizzle_bit(unsigned int bit, u64 offset)
 static u64 tiled_offset(const struct intel_gt *gt,
 			u64 v,
 			unsigned int stride,
-			enum client_tiling tiling)
+			enum client_tiling tiling,
+			int x_pos, int y_pos)
 {
 	unsigned int swizzle;
 	u64 x, y;
@@ -216,7 +367,12 @@ static u64 tiled_offset(const struct intel_gt *gt,
 
 	y = div64_u64_rem(v, stride, &x);
 
-	if (tiling == CLIENT_TILING_X) {
+	if (tiling == CLIENT_TILING_4) {
+		v = linear_x_y_to_ftiled_pos(x_pos, y_pos, stride, 32);
+
+		/* no swizzling for f-tiling */
+		swizzle = I915_BIT_6_SWIZZLE_NONE;
+	} else if (tiling == CLIENT_TILING_X) {
 		v = div64_u64_rem(y, 8, &y) * stride * 8;
 		v += y * 512;
 		v += div64_u64_rem(x, 512, &x) << 12;
@@ -259,6 +415,7 @@ static const char *repr_tiling(enum client_tiling tiling)
 	case CLIENT_TILING_LINEAR: return "linear";
 	case CLIENT_TILING_X: return "X";
 	case CLIENT_TILING_Y: return "Y";
+	case CLIENT_TILING_4: return "F";
 	default: return "unknown";
 	}
 }
@@ -284,7 +441,7 @@ static int verify_buffer(const struct tiled_blits *t,
 	} else {
 		u64 v = tiled_offset(buf->vma->vm->gt,
 				     p * 4, t->width * 4,
-				     buf->tiling);
+				     buf->tiling, x, y);
 
 		if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p)
 			ret = -EINVAL;
@@ -504,6 +661,9 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
 	if (err)
 		return err;
 
+	/* Simulating GTT eviction of the same buffer / layout */
+	t->buffers[2].tiling = t->buffers[0].tiling;
+
 	/* Reposition so that we overlap the old addresses, and slightly off */
 	err = tiled_blit(t,
 			 &t->buffers[2], t->hole + t->align,
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 93a67422ca3b..c6ad67b90e8a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -212,7 +212,7 @@ static int __live_parallel_switch1(void *data)
 
 			i915_request_add(rq);
 		}
-		if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		if (i915_request_wait(rq, 0, HZ) < 0)
 			err = -ETIME;
 		i915_request_put(rq);
 		if (err)
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 3e13960615bd..98645797962f 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -197,8 +197,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
 		flags |= PIPE_CONTROL_CS_STALL;
 
-		if (engine->class == COMPUTE_CLASS)
-			flags &= ~PIPE_CONTROL_3D_FLAGS;
+		if (!HAS_3D_PIPELINE(engine->i915))
+			flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+		else if (engine->class == COMPUTE_CLASS)
+			flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
 
 		cs = intel_ring_begin(rq, 6);
 		if (IS_ERR(cs))
@@ -227,8 +229,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
 		flags |= PIPE_CONTROL_CS_STALL;
 
-		if (engine->class == COMPUTE_CLASS)
-			flags &= ~PIPE_CONTROL_3D_FLAGS;
+		if (!HAS_3D_PIPELINE(engine->i915))
+			flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+		else if (engine->class == COMPUTE_CLASS)
+			flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
 
 		if (!HAS_FLAT_CCS(rq->engine->i915))
 			count = 8 + 4;
@@ -272,7 +276,8 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
 		if (!HAS_FLAT_CCS(rq->engine->i915) &&
 		    (rq->engine->class == VIDEO_DECODE_CLASS ||
 		     rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
-			aux_inv = rq->engine->mask & ~BIT(BCS0);
+			aux_inv = rq->engine->mask &
+				~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
 			if (aux_inv)
 				cmd += 4;
 		}
@@ -716,8 +721,10 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 		/* Wa_1409600907 */
 		flags |= PIPE_CONTROL_DEPTH_STALL;
 
-	if (rq->engine->class == COMPUTE_CLASS)
-		flags &= ~PIPE_CONTROL_3D_FLAGS;
+	if (!HAS_3D_PIPELINE(rq->engine->i915))
+		flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+	else if (rq->engine->class == COMPUTE_CLASS)
+		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
 
 	cs = gen12_emit_ggtt_write_rcs(cs,
 				       rq->fence.seqno,
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 4070cb5711d8..654a092ed3d6 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -601,6 +601,30 @@ u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
 	return avg;
 }
 
+bool intel_context_ban(struct intel_context *ce, struct i915_request *rq)
+{
+	bool ret = intel_context_set_banned(ce);
+
+	trace_intel_context_ban(ce);
+
+	if (ce->ops->revoke)
+		ce->ops->revoke(ce, rq,
+				INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS);
+
+	return ret;
+}
+
+bool intel_context_exit_nonpersistent(struct intel_context *ce,
+				      struct i915_request *rq)
+{
+	bool ret = intel_context_set_exiting(ce);
+
+	if (ce->ops->revoke)
+		ce->ops->revoke(ce, rq, ce->engine->props.preempt_timeout_ms);
+
+	return ret;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_context.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index b7d3214d2cdd..8e2d70630c49 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -25,6 +25,8 @@
 		     ##__VA_ARGS__);					\
 } while (0)
 
+#define INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS (1)
+
 struct i915_gem_ww_ctx;
 
 void intel_context_init(struct intel_context *ce,
@@ -309,18 +311,27 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
-static inline bool intel_context_ban(struct intel_context *ce,
-				     struct i915_request *rq)
+bool intel_context_ban(struct intel_context *ce, struct i915_request *rq);
+
+static inline bool intel_context_is_schedulable(const struct intel_context *ce)
 {
-	bool ret = intel_context_set_banned(ce);
+	return !test_bit(CONTEXT_EXITING, &ce->flags) &&
+	       !test_bit(CONTEXT_BANNED, &ce->flags);
+}
 
-	trace_intel_context_ban(ce);
-	if (ce->ops->ban)
-		ce->ops->ban(ce, rq);
+static inline bool intel_context_is_exiting(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_EXITING, &ce->flags);
+}
 
-	return ret;
+static inline bool intel_context_set_exiting(struct intel_context *ce)
+{
+	return test_and_set_bit(CONTEXT_EXITING, &ce->flags);
 }
 
+bool intel_context_exit_nonpersistent(struct intel_context *ce,
+				      struct i915_request *rq);
+
 static inline bool
 intel_context_force_single_submission(const struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 09f82545789f..d2d75d9c0c8d 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -40,7 +40,8 @@ struct intel_context_ops {
 
 	int (*alloc)(struct intel_context *ce);
 
-	void (*ban)(struct intel_context *ce, struct i915_request *rq);
+	void (*revoke)(struct intel_context *ce, struct i915_request *rq,
+		       unsigned int preempt_timeout_ms);
 
 	int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
 	int (*pin)(struct intel_context *ce, void *vaddr);
@@ -122,6 +123,7 @@ struct intel_context {
 #define CONTEXT_GUC_INIT		10
 #define CONTEXT_PERMA_PIN		11
 #define CONTEXT_IS_PARKING		12
+#define CONTEXT_EXITING			13
 
 	struct {
 		u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 1431f1e9dbee..04e435bce79b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -201,6 +201,8 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine);
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
 
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine);
+
 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 14c6ddbbfde8..283870c65991 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -21,8 +21,9 @@
 #include "intel_engine_user.h"
 #include "intel_execlists_submission.h"
 #include "intel_gt.h"
-#include "intel_gt_requests.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc.h"
 #include "intel_lrc_reg.h"
 #include "intel_reset.h"
@@ -71,6 +72,62 @@ static const struct engine_info intel_engines[] = {
 			{ .graphics_ver = 6, .base = BLT_RING_BASE }
 		},
 	},
+	[BCS1] = {
+		.class = COPY_ENGINE_CLASS,
+		.instance = 1,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
+		},
+	},
+	[BCS2] = {
+		.class = COPY_ENGINE_CLASS,
+		.instance = 2,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
+		},
+	},
+	[BCS3] = {
+		.class = COPY_ENGINE_CLASS,
+		.instance = 3,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
+		},
+	},
+	[BCS4] = {
+		.class = COPY_ENGINE_CLASS,
+		.instance = 4,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
+		},
+	},
+	[BCS5] = {
+		.class = COPY_ENGINE_CLASS,
+		.instance = 5,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
+		},
+	},
+	[BCS6] = {
+		.class = COPY_ENGINE_CLASS,
+		.instance = 6,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
+		},
+	},
+	[BCS7] = {
+		.class = COPY_ENGINE_CLASS,
+		.instance = 7,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
+		},
+	},
+	[BCS8] = {
+		.class = COPY_ENGINE_CLASS,
+		.instance = 8,
+		.mmio_bases = {
+			{ .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
+		},
+	},
 	[VCS0] = {
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 0,
@@ -334,6 +391,14 @@ static u32 get_reset_domain(u8 ver, enum intel_engine_id id)
 		static const u32 engine_reset_domains[] = {
 			[RCS0]  = GEN11_GRDOM_RENDER,
 			[BCS0]  = GEN11_GRDOM_BLT,
+			[BCS1]  = XEHPC_GRDOM_BLT1,
+			[BCS2]  = XEHPC_GRDOM_BLT2,
+			[BCS3]  = XEHPC_GRDOM_BLT3,
+			[BCS4]  = XEHPC_GRDOM_BLT4,
+			[BCS5]  = XEHPC_GRDOM_BLT5,
+			[BCS6]  = XEHPC_GRDOM_BLT6,
+			[BCS7]  = XEHPC_GRDOM_BLT7,
+			[BCS8]  = XEHPC_GRDOM_BLT8,
 			[VCS0]  = GEN11_GRDOM_MEDIA,
 			[VCS1]  = GEN11_GRDOM_MEDIA2,
 			[VCS2]  = GEN11_GRDOM_MEDIA3,
@@ -610,8 +675,8 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
 	if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
 		return;
 
-	ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu),
-						ss_per_ccs);
+	ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask,
+						     ss_per_ccs);
 	/*
 	 * If all DSS in a quadrant are fused off, the corresponding CCS
 	 * engine is not available for use.
@@ -622,6 +687,34 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
 	}
 }
 
+static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_gt_info *info = &gt->info;
+	unsigned long meml3_mask;
+	unsigned long quad;
+
+	meml3_mask = intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3);
+	meml3_mask = REG_FIELD_GET(GEN12_MEML3_EN_MASK, meml3_mask);
+
+	/*
+	 * Link Copy engines may be fused off according to meml3_mask. Each
+	 * bit is a quad that houses 2 Link Copy and two Sub Copy engines.
+	 */
+	for_each_clear_bit(quad, &meml3_mask, GEN12_MAX_MSLICES) {
+		unsigned int instance = quad * 2 + 1;
+		intel_engine_mask_t mask = GENMASK(_BCS(instance + 1),
+						   _BCS(instance));
+
+		if (mask & info->engine_mask) {
+			drm_dbg(&i915->drm, "bcs%u fused off\n", instance);
+			drm_dbg(&i915->drm, "bcs%u fused off\n", instance + 1);
+
+			info->engine_mask &= ~mask;
+		}
+	}
+}
+
 /*
  * Determine which engines are fused off in our particular hardware.
  * Note that we have a catch-22 situation where we need to be able to access
@@ -704,6 +797,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
 	GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
 
 	engine_mask_apply_compute_fuses(gt);
+	engine_mask_apply_copy_fuses(gt);
 
 	return info->engine_mask;
 }
@@ -1282,10 +1376,10 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
 	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
 
 	/*
-	 * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+	 * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
 	 * stopped, set ring stop bit and prefetch disable bit to halt CS
 	 */
-	if (GRAPHICS_VER(engine->i915) == 12)
+	if (IS_GRAPHICS_VER(engine->i915, 11, 12))
 		intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
 				      _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
 
@@ -1308,6 +1402,18 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
 		return -ENODEV;
 
 	ENGINE_TRACE(engine, "\n");
+	/*
+	 * TODO: Find out why occasionally stopping the CS times out. Seen
+	 * especially with gem_eio tests.
+	 *
+	 * Occasionally trying to stop the cs times out, but does not adversely
+	 * affect functionality. The timeout is set as a config parameter that
+	 * defaults to 100ms. In most cases the follow up operation is to wait
+	 * for pending MI_FORCE_WAKES. The assumption is that this timeout is
+	 * sufficient for any pending MI_FORCEWAKEs to complete. Once root
+	 * caused, the caller must check and handle the return from this
+	 * function.
+	 */
 	if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
 		ENGINE_TRACE(engine,
 			     "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
@@ -1334,12 +1440,76 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
-static u32
-read_subslice_reg(const struct intel_engine_cs *engine,
-		  int slice, int subslice, i915_reg_t reg)
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+	static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+		[RCS0] = MSG_IDLE_CS,
+		[BCS0] = MSG_IDLE_BCS,
+		[VCS0] = MSG_IDLE_VCS0,
+		[VCS1] = MSG_IDLE_VCS1,
+		[VCS2] = MSG_IDLE_VCS2,
+		[VCS3] = MSG_IDLE_VCS3,
+		[VCS4] = MSG_IDLE_VCS4,
+		[VCS5] = MSG_IDLE_VCS5,
+		[VCS6] = MSG_IDLE_VCS6,
+		[VCS7] = MSG_IDLE_VCS7,
+		[VECS0] = MSG_IDLE_VECS0,
+		[VECS1] = MSG_IDLE_VECS1,
+		[VECS2] = MSG_IDLE_VECS2,
+		[VECS3] = MSG_IDLE_VECS3,
+		[CCS0] = MSG_IDLE_CS,
+		[CCS1] = MSG_IDLE_CS,
+		[CCS2] = MSG_IDLE_CS,
+		[CCS3] = MSG_IDLE_CS,
+	};
+	u32 val;
+
+	if (!_reg[engine->id].reg) {
+		drm_err(&engine->i915->drm,
+			"MSG IDLE undefined for engine id %u\n", engine->id);
+		return 0;
+	}
+
+	val = intel_uncore_read(engine->uncore, _reg[engine->id]);
+
+	/* bits[29:25] & bits[13:9] >> shift */
+	return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
+}
+
+static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
 {
-	return intel_uncore_read_with_mcr_steering(engine->uncore, reg,
-						   slice, subslice);
+	int ret;
+
+	/* Ensure GPM receives fw up/down after CS is stopped */
+	udelay(1);
+
+	/* Wait for forcewake request to complete in GPM */
+	ret =  __intel_wait_for_register_fw(gt->uncore,
+					    GEN9_PWRGT_DOMAIN_STATUS,
+					    fw_mask, fw_mask, 5000, 0, NULL);
+
+	/* Ensure CS receives fw ack from GPM */
+	udelay(1);
+
+	if (ret)
+		GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
+}
+
+/*
+ * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
+ * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
+ * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
+ * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
+ * are concerned only with the gt reset here, we use a logical OR of pending
+ * forcewakeups from all reset domains and then wait for them to complete by
+ * querying PWRGT_DOMAIN_STATUS.
+ */
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
+{
+	u32 fw_pending = __cs_pending_mi_force_wakes(engine);
+
+	if (fw_pending)
+		__gpm_wait_for_fw_complete(engine->gt, fw_pending);
 }
 
 /* NB: please notice the memset */
@@ -1375,28 +1545,33 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
 		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
 			for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) {
 				instdone->sampler[slice][subslice] =
-					read_subslice_reg(engine, slice, subslice,
-							  GEN7_SAMPLER_INSTDONE);
+					intel_gt_mcr_read(engine->gt,
+							  GEN7_SAMPLER_INSTDONE,
+							  slice, subslice);
 				instdone->row[slice][subslice] =
-					read_subslice_reg(engine, slice, subslice,
-							  GEN7_ROW_INSTDONE);
+					intel_gt_mcr_read(engine->gt,
+							  GEN7_ROW_INSTDONE,
+							  slice, subslice);
 			}
 		} else {
 			for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
 				instdone->sampler[slice][subslice] =
-					read_subslice_reg(engine, slice, subslice,
-							  GEN7_SAMPLER_INSTDONE);
+					intel_gt_mcr_read(engine->gt,
+							  GEN7_SAMPLER_INSTDONE,
+							  slice, subslice);
 				instdone->row[slice][subslice] =
-					read_subslice_reg(engine, slice, subslice,
-							  GEN7_ROW_INSTDONE);
+					intel_gt_mcr_read(engine->gt,
+							  GEN7_ROW_INSTDONE,
+							  slice, subslice);
 			}
 		}
 
 		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
 			for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice)
 				instdone->geom_svg[slice][subslice] =
-					read_subslice_reg(engine, slice, subslice,
-							  XEHPG_INSTDONE_GEOM_SVG);
+					intel_gt_mcr_read(engine->gt,
+							  XEHPG_INSTDONE_GEOM_SVG,
+							  slice, subslice);
 		}
 	} else if (GRAPHICS_VER(i915) >= 7) {
 		instdone->instdone =
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index 75a0c55c5aa5..889f0df3940b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -8,6 +8,7 @@
 
 #include "i915_reg_defs.h"
 
+#define RING_EXCC(base)				_MMIO((base) + 0x28)
 #define RING_TAIL(base)				_MMIO((base) + 0x30)
 #define   TAIL_ADDR				0x001FFFF8
 #define RING_HEAD(base)				_MMIO((base) + 0x34)
@@ -133,6 +134,8 @@
 		(REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \
 		 REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1))
 
+#define RING_CSCMDOP(base)			_MMIO((base) + 0x20c)
+
 /*
  * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
  * The lsb of each can be considered a separate enabling bit for encryption.
@@ -149,6 +152,7 @@
 		 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
 
 #define RING_PREDICATE_RESULT(base)		_MMIO((base) + 0x3b8) /* gen12+ */
+
 #define MI_PREDICATE_RESULT_2(base)		_MMIO((base) + 0x3bc)
 #define   LOWER_SLICE_ENABLED			(1 << 0)
 #define   LOWER_SLICE_DISABLED			(0 << 0)
@@ -172,6 +176,7 @@
 #define	  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT	REG_BIT(2)
 #define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	REG_BIT(3)
 #define	  GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE	REG_BIT(8)
+#define RING_CTX_SR_CTL(base)			_MMIO((base) + 0x244)
 #define RING_SEMA_WAIT_POLL(base)		_MMIO((base) + 0x24c)
 #define GEN8_RING_PDP_UDW(base, n)		_MMIO((base) + 0x270 + (n) * 8 + 4)
 #define GEN8_RING_PDP_LDW(base, n)		_MMIO((base) + 0x270 + (n) * 8)
@@ -196,6 +201,7 @@
 #define RING_CTX_TIMESTAMP(base)		_MMIO((base) + 0x3a8) /* gen8+ */
 #define RING_PREDICATE_RESULT(base)		_MMIO((base) + 0x3b8)
 #define RING_FORCE_TO_NONPRIV(base, i)		_MMIO(((base) + 0x4D0) + (i) * 4)
+#define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
 #define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK	REG_GENMASK(25, 2)
 #define   RING_FORCE_TO_NONPRIV_ACCESS_RW	(0 << 28)    /* CFL+ & Gen11+ */
 #define   RING_FORCE_TO_NONPRIV_ACCESS_RD	(1 << 28)
@@ -208,7 +214,9 @@
 #define   RING_FORCE_TO_NONPRIV_RANGE_64	(3 << 0)
 #define   RING_FORCE_TO_NONPRIV_RANGE_MASK	(3 << 0)
 #define   RING_FORCE_TO_NONPRIV_MASK_VALID	\
-	(RING_FORCE_TO_NONPRIV_RANGE_MASK | RING_FORCE_TO_NONPRIV_ACCESS_MASK)
+	(RING_FORCE_TO_NONPRIV_RANGE_MASK | \
+	 RING_FORCE_TO_NONPRIV_ACCESS_MASK | \
+	 RING_FORCE_TO_NONPRIV_DENY)
 #define   RING_MAX_NONPRIV_SLOTS  12
 
 #define RING_EXECLIST_SQ_CONTENTS(base)		_MMIO((base) + 0x510)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 298f2cc7a879..2286f96f5f87 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -35,7 +35,7 @@
 #define OTHER_CLASS		4
 #define COMPUTE_CLASS		5
 #define MAX_ENGINE_CLASS	5
-#define MAX_ENGINE_INSTANCE	7
+#define MAX_ENGINE_INSTANCE	8
 
 #define I915_MAX_SLICES	3
 #define I915_MAX_SUBSLICES 8
@@ -99,6 +99,7 @@ struct i915_ctx_workarounds {
 #define I915_MAX_SFC	(I915_MAX_VCS / 2)
 #define I915_MAX_CCS	4
 #define I915_MAX_RCS	1
+#define I915_MAX_BCS	9
 
 /*
  * Engine IDs definitions.
@@ -107,6 +108,15 @@ struct i915_ctx_workarounds {
 enum intel_engine_id {
 	RCS0 = 0,
 	BCS0,
+	BCS1,
+	BCS2,
+	BCS3,
+	BCS4,
+	BCS5,
+	BCS6,
+	BCS7,
+	BCS8,
+#define _BCS(n) (BCS0 + (n))
 	VCS0,
 	VCS1,
 	VCS2,
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index aa0d2bbbbcc4..4b909cb88cdf 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -480,9 +480,9 @@ __execlists_schedule_in(struct i915_request *rq)
 
 	if (unlikely(intel_context_is_closed(ce) &&
 		     !intel_engine_has_heartbeat(engine)))
-		intel_context_set_banned(ce);
+		intel_context_set_exiting(ce);
 
-	if (unlikely(intel_context_is_banned(ce) || bad_request(rq)))
+	if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq)))
 		reset_active(rq, engine);
 
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
@@ -661,6 +661,16 @@ static inline void execlists_schedule_out(struct i915_request *rq)
 	i915_request_put(rq);
 }
 
+static u32 map_i915_prio_to_lrc_desc_prio(int prio)
+{
+	if (prio > I915_PRIORITY_NORMAL)
+		return GEN12_CTX_PRIORITY_HIGH;
+	else if (prio < I915_PRIORITY_NORMAL)
+		return GEN12_CTX_PRIORITY_LOW;
+	else
+		return GEN12_CTX_PRIORITY_NORMAL;
+}
+
 static u64 execlists_update_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->context;
@@ -669,7 +679,7 @@ static u64 execlists_update_context(struct i915_request *rq)
 
 	desc = ce->lrc.desc;
 	if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
-		desc |= lrc_desc_priority(rq_prio(rq));
+		desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq));
 
 	/*
 	 * WaIdleLiteRestore:bdw,skl
@@ -1233,7 +1243,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
 	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
-		return 1;
+		return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
@@ -2958,6 +2968,13 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	ring_set_paused(engine, 1);
 	intel_engine_stop_cs(engine);
 
+	/*
+	 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
+	 * to wait for any pending mi force wakeups
+	 */
+	if (IS_GRAPHICS_VER(engine->i915, 11, 12))
+		intel_engine_wait_for_pending_mi_fw(engine);
+
 	engine->execlists.reset_ccid = active_ccid(engine);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index e6b2eb122ad7..15a915bb4088 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -3,16 +3,18 @@
  * Copyright © 2020 Intel Corporation
  */
 
-#include <linux/types.h>
 #include <asm/set_memory.h>
 #include <asm/smp.h>
+#include <linux/types.h>
+#include <linux/stop_machine.h>
 
 #include <drm/i915_drm.h>
+#include <drm/intel-gtt.h>
 
 #include "gem/i915_gem_lmem.h"
 
+#include "intel_ggtt_gmch.h"
 #include "intel_gt.h"
-#include "intel_gt_gmch.h"
 #include "intel_gt_regs.h"
 #include "i915_drv.h"
 #include "i915_scatterlist.h"
@@ -22,6 +24,13 @@
 #include "intel_gtt.h"
 #include "gen8_ppgtt.h"
 
+static inline bool suspend_retains_ptes(struct i915_address_space *vm)
+{
+	return GRAPHICS_VER(vm->i915) >= 8 &&
+		!HAS_LMEM(vm->i915) &&
+		vm->is_ggtt;
+}
+
 static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
 				   unsigned long color,
 				   u64 *start,
@@ -93,6 +102,23 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
 	return 0;
 }
 
+/*
+ * Return the value of the last GGTT pte cast to an u64, if
+ * the system is supposed to retain ptes across resume. 0 otherwise.
+ */
+static u64 read_last_pte(struct i915_address_space *vm)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen8_pte_t __iomem *ptep;
+
+	if (!suspend_retains_ptes(vm))
+		return 0;
+
+	GEM_BUG_ON(GRAPHICS_VER(vm->i915) < 8);
+	ptep = (typeof(ptep))ggtt->gsm + (ggtt_total_entries(ggtt) - 1);
+	return readq(ptep);
+}
+
 /**
  * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
  * @vm: The VM to suspend the mappings for
@@ -156,7 +182,10 @@ retry:
 		i915_gem_object_unlock(obj);
 	}
 
-	vm->clear_range(vm, 0, vm->total);
+	if (!suspend_retains_ptes(vm))
+		vm->clear_range(vm, 0, vm->total);
+	else
+		i915_vm_to_ggtt(vm)->probed_pte = read_last_pte(vm);
 
 	vm->skip_pte_rewrite = save_skip_rewrite;
 
@@ -181,7 +210,7 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
 	spin_unlock_irq(&uncore->lock);
 }
 
-void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
+static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
 {
 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 
@@ -218,11 +247,232 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+{
+	writeq(pte, addr);
+}
+
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level level,
+				  u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen8_pte_t __iomem *pte =
+		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
+
+	ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma_resource *vma_res,
+				     enum i915_cache_level level,
+				     u32 flags)
+{
+	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen8_pte_t __iomem *gte;
+	gen8_pte_t __iomem *end;
+	struct sgt_iter iter;
+	dma_addr_t addr;
+
+	/*
+	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+	 * not to allow the user to override access to a read only page.
+	 */
+
+	gte = (gen8_pte_t __iomem *)ggtt->gsm;
+	gte += vma_res->start / I915_GTT_PAGE_SIZE;
+	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+
+	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
+		gen8_set_pte(gte++, pte_encode | addr);
+	GEM_BUG_ON(gte > end);
+
+	/* Fill the allocated but "unused" space beyond the end of the buffer */
+	while (gte < end)
+		gen8_set_pte(gte++, vm->scratch[0]->encode);
+
+	/*
+	 * We want to flush the TLBs only after we're certain all the PTE
+	 * updates have finished.
+	 */
+	ggtt->invalidate(ggtt);
+}
+
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level level,
+				  u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen6_pte_t __iomem *pte =
+		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+	iowrite32(vm->pte_encode(addr, level, flags), pte);
+
+	ggtt->invalidate(ggtt);
+}
+
+/*
+ * Binds an object into the global gtt with the specified cache level.
+ * The object will be accessible to the GPU via commands whose operands
+ * reference offsets within the global GTT as well as accessible by the GPU
+ * through the GMADR mapped BAR (i915->mm.gtt->gtt).
+ */
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma_resource *vma_res,
+				     enum i915_cache_level level,
+				     u32 flags)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	gen6_pte_t __iomem *gte;
+	gen6_pte_t __iomem *end;
+	struct sgt_iter iter;
+	dma_addr_t addr;
+
+	gte = (gen6_pte_t __iomem *)ggtt->gsm;
+	gte += vma_res->start / I915_GTT_PAGE_SIZE;
+	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
+
+	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
+		iowrite32(vm->pte_encode(addr, level, flags), gte++);
+	GEM_BUG_ON(gte > end);
+
+	/* Fill the allocated but "unused" space beyond the end of the buffer */
+	while (gte < end)
+		iowrite32(vm->scratch[0]->encode, gte++);
+
+	/*
+	 * We want to flush the TLBs only after we're certain all the PTE
+	 * updates have finished.
+	 */
+	ggtt->invalidate(ggtt);
+}
+
+static void nop_clear_range(struct i915_address_space *vm,
+			    u64 start, u64 length)
+{
+}
+
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+	gen8_pte_t __iomem *gtt_base =
+		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	for (i = 0; i < num_entries; i++)
+		gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+	/*
+	 * Make sure the internal GAM fifo has been cleared of all GTT
+	 * writes before exiting stop_machine(). This guarantees that
+	 * any aperture accesses waiting to start in another process
+	 * cannot back up behind the GTT writes causing a hang.
+	 * The register can be any arbitrary GAM register.
+	 */
+	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+	struct i915_address_space *vm;
+	dma_addr_t addr;
+	u64 offset;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+	struct insert_page *arg = _arg;
+
+	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+					  dma_addr_t addr,
+					  u64 offset,
+					  enum i915_cache_level level,
+					  u32 unused)
+{
+	struct insert_page arg = { vm, addr, offset, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+	struct i915_address_space *vm;
+	struct i915_vma_resource *vma_res;
+	enum i915_cache_level level;
+	u32 flags;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+	struct insert_entries *arg = _arg;
+
+	gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+					     struct i915_vma_resource *vma_res,
+					     enum i915_cache_level level,
+					     u32 flags)
+{
+	struct insert_entries arg = { vm, vma_res, level, flags };
+
+	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+	gen6_pte_t scratch_pte, __iomem *gtt_base =
+		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
+	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	scratch_pte = vm->scratch[0]->encode;
+	for (i = 0; i < num_entries; i++)
+		iowrite32(scratch_pte, &gtt_base[i]);
+}
+
 void intel_ggtt_bind_vma(struct i915_address_space *vm,
-			  struct i915_vm_pt_stash *stash,
-			  struct i915_vma_resource *vma_res,
-			  enum i915_cache_level cache_level,
-			  u32 flags)
+			 struct i915_vm_pt_stash *stash,
+			 struct i915_vma_resource *vma_res,
+			 enum i915_cache_level cache_level,
+			 u32 flags)
 {
 	u32 pte_flags;
 
@@ -243,7 +493,7 @@ void intel_ggtt_bind_vma(struct i915_address_space *vm,
 }
 
 void intel_ggtt_unbind_vma(struct i915_address_space *vm,
-			    struct i915_vma_resource *vma_res)
+			   struct i915_vma_resource *vma_res)
 {
 	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 }
@@ -299,6 +549,8 @@ static int init_ggtt(struct i915_ggtt *ggtt)
 	struct drm_mm_node *entry;
 	int ret;
 
+	ggtt->pte_lost = true;
+
 	/*
 	 * GuC requires all resources that we're sharing with it to be placed in
 	 * non-WOPCM memory. If GuC is not present or not in use we still need a
@@ -560,12 +812,326 @@ void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
 	dma_resv_fini(&ggtt->vm._resv);
 }
 
-struct resource intel_pci_resource(struct pci_dev *pdev, int bar)
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+	return snb_gmch_ctl << 20;
+}
+
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+{
+	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
+	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
+	if (bdw_gmch_ctl)
+		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
+
+#ifdef CONFIG_X86_32
+	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
+	if (bdw_gmch_ctl > 4)
+		bdw_gmch_ctl = 4;
+#endif
+
+	return bdw_gmch_ctl << 20;
+}
+
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+{
+	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
+	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
+
+	if (gmch_ctrl)
+		return 1 << (20 + gmch_ctrl);
+
+	return 0;
+}
+
+static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
+{
+	/*
+	 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
+	 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
+	 */
+	GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
+	return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
+}
+
+static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
+{
+	return gen6_gttmmadr_size(i915) / 2;
+}
+
+static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+	phys_addr_t phys_addr;
+	u32 pte_flags;
+	int ret;
+
+	GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
+	phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
+
+	/*
+	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
+	 * will be dropped. For WC mappings in general we have 64 byte burst
+	 * writes when the WC buffer is flushed, so we can't use it, but have to
+	 * resort to an uncached mapping. The WC issue is easily caught by the
+	 * readback check when writing GTT PTE entries.
+	 */
+	if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
+		ggtt->gsm = ioremap(phys_addr, size);
+	else
+		ggtt->gsm = ioremap_wc(phys_addr, size);
+	if (!ggtt->gsm) {
+		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
+		return -ENOMEM;
+	}
+
+	kref_init(&ggtt->vm.resv_ref);
+	ret = setup_scratch_page(&ggtt->vm);
+	if (ret) {
+		drm_err(&i915->drm, "Scratch setup failed\n");
+		/* iounmap will also get called at remove, but meh */
+		iounmap(ggtt->gsm);
+		return ret;
+	}
+
+	pte_flags = 0;
+	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
+		pte_flags |= PTE_LM;
+
+	ggtt->vm.scratch[0]->encode =
+		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
+				    I915_CACHE_NONE, pte_flags);
+
+	return 0;
+}
+
+static void gen6_gmch_remove(struct i915_address_space *vm)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+	iounmap(ggtt->gsm);
+	free_scratch(vm);
+}
+
+static struct resource pci_resource(struct pci_dev *pdev, int bar)
 {
 	return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
 					       pci_resource_len(pdev, bar));
 }
 
+static int gen8_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+	unsigned int size;
+	u16 snb_gmch_ctl;
+
+	if (!HAS_LMEM(i915)) {
+		ggtt->gmadr = pci_resource(pdev, 2);
+		ggtt->mappable_end = resource_size(&ggtt->gmadr);
+	}
+
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+	if (IS_CHERRYVIEW(i915))
+		size = chv_get_total_gtt_size(snb_gmch_ctl);
+	else
+		size = gen8_get_total_gtt_size(snb_gmch_ctl);
+
+	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+	ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
+
+	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
+	ggtt->vm.cleanup = gen6_gmch_remove;
+	ggtt->vm.insert_page = gen8_ggtt_insert_page;
+	ggtt->vm.clear_range = nop_clear_range;
+	if (intel_scanout_needs_vtd_wa(i915))
+		ggtt->vm.clear_range = gen8_ggtt_clear_range;
+
+	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+
+	/*
+	 * Serialize GTT updates with aperture access on BXT if VT-d is on,
+	 * and always on CHV.
+	 */
+	if (intel_vm_no_concurrent_access_wa(i915)) {
+		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+
+		/*
+		 * Calling stop_machine() version of GGTT update function
+		 * at error capture/reset path will raise lockdep warning.
+		 * Allow calling gen8_ggtt_insert_* directly at reset path
+		 * which is safe from parallel GGTT updates.
+		 */
+		ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
+		ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries;
+
+		ggtt->vm.bind_async_flags =
+			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
+	}
+
+	ggtt->invalidate = gen8_ggtt_invalidate;
+
+	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
+
+	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+
+	setup_private_pat(ggtt->vm.gt->uncore);
+
+	return ggtt_probe_common(ggtt, size);
+}
+
+static u64 snb_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_L3_LLC:
+	case I915_CACHE_LLC:
+		pte |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		pte |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		MISSING_CASE(level);
+	}
+
+	return pte;
+}
+
+static u64 ivb_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_L3_LLC:
+		pte |= GEN7_PTE_CACHE_L3_LLC;
+		break;
+	case I915_CACHE_LLC:
+		pte |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		pte |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		MISSING_CASE(level);
+	}
+
+	return pte;
+}
+
+static u64 byt_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	if (!(flags & PTE_READ_ONLY))
+		pte |= BYT_PTE_WRITEABLE;
+
+	if (level != I915_CACHE_NONE)
+		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
+
+	return pte;
+}
+
+static u64 hsw_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	if (level != I915_CACHE_NONE)
+		pte |= HSW_WB_LLC_AGE3;
+
+	return pte;
+}
+
+static u64 iris_pte_encode(dma_addr_t addr,
+			   enum i915_cache_level level,
+			   u32 flags)
+{
+	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		break;
+	case I915_CACHE_WT:
+		pte |= HSW_WT_ELLC_LLC_AGE3;
+		break;
+	default:
+		pte |= HSW_WB_ELLC_LLC_AGE3;
+		break;
+	}
+
+	return pte;
+}
+
+static int gen6_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+	unsigned int size;
+	u16 snb_gmch_ctl;
+
+	ggtt->gmadr = pci_resource(pdev, 2);
+	ggtt->mappable_end = resource_size(&ggtt->gmadr);
+
+	/*
+	 * 64/512MB is the current min/max we actually know of, but this is
+	 * just a coarse sanity check.
+	 */
+	if (ggtt->mappable_end < (64 << 20) ||
+	    ggtt->mappable_end > (512 << 20)) {
+		drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
+			&ggtt->mappable_end);
+		return -ENXIO;
+	}
+
+	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+
+	size = gen6_get_total_gtt_size(snb_gmch_ctl);
+	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+
+	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+
+	ggtt->vm.clear_range = nop_clear_range;
+	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+		ggtt->vm.clear_range = gen6_ggtt_clear_range;
+	ggtt->vm.insert_page = gen6_ggtt_insert_page;
+	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+	ggtt->vm.cleanup = gen6_gmch_remove;
+
+	ggtt->invalidate = gen6_ggtt_invalidate;
+
+	if (HAS_EDRAM(i915))
+		ggtt->vm.pte_encode = iris_pte_encode;
+	else if (IS_HASWELL(i915))
+		ggtt->vm.pte_encode = hsw_pte_encode;
+	else if (IS_VALLEYVIEW(i915))
+		ggtt->vm.pte_encode = byt_pte_encode;
+	else if (GRAPHICS_VER(i915) >= 7)
+		ggtt->vm.pte_encode = ivb_pte_encode;
+	else
+		ggtt->vm.pte_encode = snb_pte_encode;
+
+	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
+
+	return ggtt_probe_common(ggtt, size);
+}
+
 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
 {
 	struct drm_i915_private *i915 = gt->i915;
@@ -576,12 +1142,13 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
 	ggtt->vm.dma = i915->drm.dev;
 	dma_resv_init(&ggtt->vm._resv);
 
-	if (GRAPHICS_VER(i915) <= 5)
-		ret = intel_gt_gmch_gen5_probe(ggtt);
-	else if (GRAPHICS_VER(i915) < 8)
-		ret = intel_gt_gmch_gen6_probe(ggtt);
+	if (GRAPHICS_VER(i915) >= 8)
+		ret = gen8_gmch_probe(ggtt);
+	else if (GRAPHICS_VER(i915) >= 6)
+		ret = gen6_gmch_probe(ggtt);
 	else
-		ret = intel_gt_gmch_gen8_probe(ggtt);
+		ret = intel_ggtt_gmch_probe(ggtt);
+
 	if (ret) {
 		dma_resv_fini(&ggtt->vm._resv);
 		return ret;
@@ -635,7 +1202,10 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 
 int i915_ggtt_enable_hw(struct drm_i915_private *i915)
 {
-	return intel_gt_gmch_gen5_enable_hw(i915);
+	if (GRAPHICS_VER(i915) < 6)
+		return intel_ggtt_gmch_enable_hw(i915);
+
+	return 0;
 }
 
 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
@@ -675,11 +1245,20 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 {
 	struct i915_vma *vma;
 	bool write_domain_objs = false;
+	bool retained_ptes;
 
 	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
 
-	/* First fill our portion of the GTT with scratch pages */
-	vm->clear_range(vm, 0, vm->total);
+	/*
+	 * First fill our portion of the GTT with scratch pages if
+	 * they were not retained across suspend.
+	 */
+	retained_ptes = suspend_retains_ptes(vm) &&
+		!i915_vm_to_ggtt(vm)->pte_lost &&
+		!GEM_WARN_ON(i915_vm_to_ggtt(vm)->probed_pte != read_last_pte(vm));
+
+	if (!retained_ptes)
+		vm->clear_range(vm, 0, vm->total);
 
 	/* clflush objects bound into the GGTT and rebind them. */
 	list_for_each_entry(vma, &vm->bound_list, vm_link) {
@@ -688,9 +1267,10 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 			atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
 
 		GEM_BUG_ON(!was_bound);
-		vma->ops->bind_vma(vm, NULL, vma->resource,
-				   obj ? obj->cache_level : 0,
-				   was_bound);
+		if (!retained_ptes)
+			vma->ops->bind_vma(vm, NULL, vma->resource,
+					   obj ? obj->cache_level : 0,
+					   was_bound);
 		if (obj) { /* only used during resume => exclusive access */
 			write_domain_objs |= fetch_and_zero(&obj->write_domain);
 			obj->read_domains |= I915_GEM_DOMAIN_GTT;
@@ -718,3 +1298,8 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
 
 	intel_ggtt_restore_fences(ggtt);
 }
+
+void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val)
+{
+	to_gt(i915)->ggtt->pte_lost = val;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
new file mode 100644
index 000000000000..4e2163a1aa46
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "intel_ggtt_gmch.h"
+
+#include <drm/intel-gtt.h>
+#include <drm/i915_drm.h>
+
+#include <linux/agp_backend.h>
+
+#include "i915_drv.h"
+#include "i915_utils.h"
+#include "intel_gtt.h"
+#include "intel_gt_regs.h"
+#include "intel_gt.h"
+
+static void gmch_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  u64 offset,
+				  enum i915_cache_level cache_level,
+				  u32 unused)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gmch_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
+static void gmch_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct i915_vma_resource *vma_res,
+				     enum i915_cache_level cache_level,
+				     u32 unused)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gmch_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
+					 flags);
+}
+
+static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+	intel_gmch_gtt_flush();
+}
+
+static void gmch_ggtt_clear_range(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	intel_gmch_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
+}
+
+static void gmch_ggtt_remove(struct i915_address_space *vm)
+{
+	intel_gmch_remove();
+}
+
+/*
+ * Certain Gen5 chipsets require idling the GPU before unmapping anything from
+ * the GTT when VT-d is enabled.
+ */
+static bool needs_idle_maps(struct drm_i915_private *i915)
+{
+	/*
+	 * Query intel_iommu to see if we need the workaround. Presumably that
+	 * was loaded first.
+	 */
+	if (!i915_vtd_active(i915))
+		return false;
+
+	if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
+		return true;
+
+	return false;
+}
+
+int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	phys_addr_t gmadr_base;
+	int ret;
+
+	ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL);
+	if (!ret) {
+		drm_err(&i915->drm, "failed to set up gmch\n");
+		return -EIO;
+	}
+
+	intel_gmch_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
+
+	ggtt->gmadr =
+		(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+
+	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
+
+	if (needs_idle_maps(i915)) {
+		drm_notice(&i915->drm,
+			   "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n");
+		ggtt->do_idle_maps = true;
+	}
+
+	ggtt->vm.insert_page = gmch_ggtt_insert_page;
+	ggtt->vm.insert_entries = gmch_ggtt_insert_entries;
+	ggtt->vm.clear_range = gmch_ggtt_clear_range;
+	ggtt->vm.cleanup = gmch_ggtt_remove;
+
+	ggtt->invalidate = gmch_ggtt_invalidate;
+
+	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
+	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
+
+	if (unlikely(ggtt->do_idle_maps))
+		drm_notice(&i915->drm,
+			   "Applying Ironlake quirks for intel_iommu\n");
+
+	return 0;
+}
+
+int intel_ggtt_gmch_enable_hw(struct drm_i915_private *i915)
+{
+	if (!intel_gmch_enable_gtt())
+		return -EIO;
+
+	return 0;
+}
+
+void intel_ggtt_gmch_flush(void)
+{
+	intel_gmch_gtt_flush();
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h
new file mode 100644
index 000000000000..370bf321b4e2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_gmch.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __INTEL_GGTT_GMCH_H__
+#define __INTEL_GGTT_GMCH_H__
+
+#include "intel_gtt.h"
+
+/* For x86 platforms */
+#if IS_ENABLED(CONFIG_X86)
+
+void intel_ggtt_gmch_flush(void);
+int intel_ggtt_gmch_enable_hw(struct drm_i915_private *i915);
+int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt);
+
+/* Stubs for non-x86 platforms */
+#else
+
+static inline void intel_ggtt_gmch_flush(void) { }
+static inline int intel_ggtt_gmch_enable_hw(struct drm_i915_private *i915) { return -ENODEV; }
+static inline int intel_ggtt_gmch_probe(struct i915_ggtt *ggtt) { return -ENODEV; }
+
+#endif
+
+#endif /* __INTEL_GGTT_GMCH_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 556bca3be804..d4e9702d3c8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -236,6 +236,28 @@
 #define   XY_FAST_COLOR_BLT_DW		16
 #define   XY_FAST_COLOR_BLT_MOCS_MASK	GENMASK(27, 21)
 #define   XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
+
+#define   XY_FAST_COPY_BLT_D0_SRC_TILING_MASK     REG_GENMASK(21, 20)
+#define   XY_FAST_COPY_BLT_D0_DST_TILING_MASK     REG_GENMASK(14, 13)
+#define   XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(mode)  \
+	REG_FIELD_PREP(XY_FAST_COPY_BLT_D0_SRC_TILING_MASK, mode)
+#define   XY_FAST_COPY_BLT_D0_DST_TILE_MODE(mode)  \
+	REG_FIELD_PREP(XY_FAST_COPY_BLT_D0_DST_TILING_MASK, mode)
+#define     LINEAR				0
+#define     TILE_X				0x1
+#define     XMAJOR				0x1
+#define     YMAJOR				0x2
+#define     TILE_64			0x3
+#define   XY_FAST_COPY_BLT_D1_SRC_TILE4	REG_BIT(31)
+#define   XY_FAST_COPY_BLT_D1_DST_TILE4	REG_BIT(30)
+#define BLIT_CCTL_SRC_MOCS_MASK  REG_GENMASK(6, 0)
+#define BLIT_CCTL_DST_MOCS_MASK  REG_GENMASK(14, 8)
+/* Note:  MOCS value = (index << 1) */
+#define BLIT_CCTL_SRC_MOCS(idx) \
+	REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (idx) << 1)
+#define BLIT_CCTL_DST_MOCS(idx) \
+	REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (idx) << 1)
+
 #define SRC_COPY_BLT_CMD		(2 << 29 | 0x43 << 22)
 #define GEN9_XY_FAST_COPY_BLT_CMD	(2 << 29 | 0x42 << 22)
 #define XY_SRC_COPY_BLT_CMD		(2 << 29 | 0x53 << 22)
@@ -288,8 +310,11 @@
 #define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
-/* 3D-related flags can't be set on compute engine */
-#define PIPE_CONTROL_3D_FLAGS (\
+/*
+ * 3D-related flags that can't be set on _engines_ that lack access to the 3D
+ * pipeline (i.e., CCS engines).
+ */
+#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
 		PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
 		PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
 		PIPE_CONTROL_TILE_CACHE_FLUSH | \
@@ -300,6 +325,14 @@
 		PIPE_CONTROL_VF_CACHE_INVALIDATE | \
 		PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
 
+/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline */
+#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
+		PIPE_CONTROL_3D_ENGINE_FLAGS | \
+		PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
+		PIPE_CONTROL_FLUSH_ENABLE | \
+		PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
+		PIPE_CONTROL_DC_FLUSH_ENABLE)
+
 #define MI_MATH(x)			MI_INSTR(0x1a, (x) - 1)
 #define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
 /* Opcodes for MI_MATH_INSTR */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 53307ca0eed0..8da3314bb6bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -4,6 +4,7 @@
  */
 
 #include <drm/drm_managed.h>
+#include <drm/intel-gtt.h>
 
 #include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
@@ -12,11 +13,12 @@
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_engine_regs.h"
+#include "intel_ggtt_gmch.h"
 #include "intel_gt.h"
 #include "intel_gt_buffer_pool.h"
 #include "intel_gt_clock_utils.h"
 #include "intel_gt_debugfs.h"
-#include "intel_gt_gmch.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_regs.h"
 #include "intel_gt_requests.h"
@@ -102,78 +104,13 @@ int intel_gt_assign_ggtt(struct intel_gt *gt)
 	return gt->ggtt ? 0 : -ENOMEM;
 }
 
-static const char * const intel_steering_types[] = {
-	"L3BANK",
-	"MSLICE",
-	"LNCF",
-};
-
-static const struct intel_mmio_range icl_l3bank_steering_table[] = {
-	{ 0x00B100, 0x00B3FF },
-	{},
-};
-
-static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
-	{ 0x004000, 0x004AFF },
-	{ 0x00C800, 0x00CFFF },
-	{ 0x00DD00, 0x00DDFF },
-	{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
-	{},
-};
-
-static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
-	{ 0x00B000, 0x00B0FF },
-	{ 0x00D800, 0x00D8FF },
-	{},
-};
-
-static const struct intel_mmio_range dg2_lncf_steering_table[] = {
-	{ 0x00B000, 0x00B0FF },
-	{ 0x00D880, 0x00D8FF },
-	{},
-};
-
-static u16 slicemask(struct intel_gt *gt, int count)
-{
-	u64 dss_mask = intel_sseu_get_subslices(&gt->info.sseu, 0);
-
-	return intel_slicemask_from_dssmask(dss_mask, count);
-}
-
 int intel_gt_init_mmio(struct intel_gt *gt)
 {
-	struct drm_i915_private *i915 = gt->i915;
-
 	intel_gt_init_clock_frequency(gt);
 
 	intel_uc_init_mmio(&gt->uc);
 	intel_sseu_info_init(gt);
-
-	/*
-	 * An mslice is unavailable only if both the meml3 for the slice is
-	 * disabled *and* all of the DSS in the slice (quadrant) are disabled.
-	 */
-	if (HAS_MSLICES(i915))
-		gt->info.mslice_mask =
-			slicemask(gt, GEN_DSS_PER_MSLICE) |
-			(intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
-			 GEN12_MEML3_EN_MASK);
-
-	if (IS_DG2(i915)) {
-		gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
-		gt->steering_table[LNCF] = dg2_lncf_steering_table;
-	} else if (IS_XEHPSDV(i915)) {
-		gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
-		gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
-	} else if (GRAPHICS_VER(i915) >= 11 &&
-		   GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
-		gt->steering_table[L3BANK] = icl_l3bank_steering_table;
-		gt->info.l3bank_mask =
-			~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
-			GEN10_L3BANK_MASK;
-	} else if (HAS_MSLICES(i915)) {
-		MISSING_CASE(INTEL_INFO(i915)->platform);
-	}
+	intel_gt_mcr_init(gt);
 
 	return intel_engines_init_mmio(gt);
 }
@@ -451,7 +388,7 @@ void intel_gt_chipset_flush(struct intel_gt *gt)
 {
 	wmb();
 	if (GRAPHICS_VER(gt->i915) < 6)
-		intel_gt_gmch_gen5_chipset_flush(gt);
+		intel_ggtt_gmch_flush();
 }
 
 void intel_gt_driver_register(struct intel_gt *gt)
@@ -785,6 +722,7 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
 {
 	intel_wakeref_t wakeref;
 
+	intel_gt_sysfs_unregister(gt);
 	intel_rps_driver_unregister(&gt->rps);
 	intel_gsc_fini(&gt->gsc);
 
@@ -834,200 +772,6 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
 	}
 }
 
-/**
- * intel_gt_reg_needs_read_steering - determine whether a register read
- *     requires explicit steering
- * @gt: GT structure
- * @reg: the register to check steering requirements for
- * @type: type of multicast steering to check
- *
- * Determines whether @reg needs explicit steering of a specific type for
- * reads.
- *
- * Returns false if @reg does not belong to a register range of the given
- * steering type, or if the default (subslice-based) steering IDs are suitable
- * for @type steering too.
- */
-static bool intel_gt_reg_needs_read_steering(struct intel_gt *gt,
-					     i915_reg_t reg,
-					     enum intel_steering_type type)
-{
-	const u32 offset = i915_mmio_reg_offset(reg);
-	const struct intel_mmio_range *entry;
-
-	if (likely(!intel_gt_needs_read_steering(gt, type)))
-		return false;
-
-	for (entry = gt->steering_table[type]; entry->end; entry++) {
-		if (offset >= entry->start && offset <= entry->end)
-			return true;
-	}
-
-	return false;
-}
-
-/**
- * intel_gt_get_valid_steering - determines valid IDs for a class of MCR steering
- * @gt: GT structure
- * @type: multicast register type
- * @sliceid: Slice ID returned
- * @subsliceid: Subslice ID returned
- *
- * Determines sliceid and subsliceid values that will steer reads
- * of a specific multicast register class to a valid value.
- */
-static void intel_gt_get_valid_steering(struct intel_gt *gt,
-					enum intel_steering_type type,
-					u8 *sliceid, u8 *subsliceid)
-{
-	switch (type) {
-	case L3BANK:
-		GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */
-
-		*sliceid = 0;		/* unused */
-		*subsliceid = __ffs(gt->info.l3bank_mask);
-		break;
-	case MSLICE:
-		GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
-
-		*sliceid = __ffs(gt->info.mslice_mask);
-		*subsliceid = 0;	/* unused */
-		break;
-	case LNCF:
-		GEM_DEBUG_WARN_ON(!gt->info.mslice_mask); /* should be impossible! */
-
-		/*
-		 * An LNCF is always present if its mslice is present, so we
-		 * can safely just steer to LNCF 0 in all cases.
-		 */
-		*sliceid = __ffs(gt->info.mslice_mask) << 1;
-		*subsliceid = 0;	/* unused */
-		break;
-	default:
-		MISSING_CASE(type);
-		*sliceid = 0;
-		*subsliceid = 0;
-	}
-}
-
-/**
- * intel_gt_read_register_fw - reads a GT register with support for multicast
- * @gt: GT structure
- * @reg: register to read
- *
- * This function will read a GT register.  If the register is a multicast
- * register, the read will be steered to a valid instance (i.e., one that
- * isn't fused off or powered down by power gating).
- *
- * Returns the value from a valid instance of @reg.
- */
-u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg)
-{
-	int type;
-	u8 sliceid, subsliceid;
-
-	for (type = 0; type < NUM_STEERING_TYPES; type++) {
-		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
-			intel_gt_get_valid_steering(gt, type, &sliceid,
-						    &subsliceid);
-			return intel_uncore_read_with_mcr_steering_fw(gt->uncore,
-								      reg,
-								      sliceid,
-								      subsliceid);
-		}
-	}
-
-	return intel_uncore_read_fw(gt->uncore, reg);
-}
-
-/**
- * intel_gt_get_valid_steering_for_reg - get a valid steering for a register
- * @gt: GT structure
- * @reg: register for which the steering is required
- * @sliceid: return variable for slice steering
- * @subsliceid: return variable for subslice steering
- *
- * This function returns a slice/subslice pair that is guaranteed to work for
- * read steering of the given register. Note that a value will be returned even
- * if the register is not replicated and therefore does not actually require
- * steering.
- */
-void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg,
-					 u8 *sliceid, u8 *subsliceid)
-{
-	int type;
-
-	for (type = 0; type < NUM_STEERING_TYPES; type++) {
-		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
-			intel_gt_get_valid_steering(gt, type, sliceid,
-						    subsliceid);
-			return;
-		}
-	}
-
-	*sliceid = gt->default_steering.groupid;
-	*subsliceid = gt->default_steering.instanceid;
-}
-
-u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
-{
-	int type;
-	u8 sliceid, subsliceid;
-
-	for (type = 0; type < NUM_STEERING_TYPES; type++) {
-		if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
-			intel_gt_get_valid_steering(gt, type, &sliceid,
-						    &subsliceid);
-			return intel_uncore_read_with_mcr_steering(gt->uncore,
-								   reg,
-								   sliceid,
-								   subsliceid);
-		}
-	}
-
-	return intel_uncore_read(gt->uncore, reg);
-}
-
-static void report_steering_type(struct drm_printer *p,
-				 struct intel_gt *gt,
-				 enum intel_steering_type type,
-				 bool dump_table)
-{
-	const struct intel_mmio_range *entry;
-	u8 slice, subslice;
-
-	BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES);
-
-	if (!gt->steering_table[type]) {
-		drm_printf(p, "%s steering: uses default steering\n",
-			   intel_steering_types[type]);
-		return;
-	}
-
-	intel_gt_get_valid_steering(gt, type, &slice, &subslice);
-	drm_printf(p, "%s steering: sliceid=0x%x, subsliceid=0x%x\n",
-		   intel_steering_types[type], slice, subslice);
-
-	if (!dump_table)
-		return;
-
-	for (entry = gt->steering_table[type]; entry->end; entry++)
-		drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end);
-}
-
-void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt,
-			      bool dump_table)
-{
-	drm_printf(p, "Default steering: sliceid=0x%x, subsliceid=0x%x\n",
-		   gt->default_steering.groupid,
-		   gt->default_steering.instanceid);
-
-	if (HAS_MSLICES(gt->i915)) {
-		report_steering_type(p, gt, MSLICE, dump_table);
-		report_steering_type(p, gt, LNCF, dump_table);
-	}
-}
-
 static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
 {
 	int ret;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 44c6cb63ccbc..82d6f248d876 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -13,13 +13,6 @@
 struct drm_i915_private;
 struct drm_printer;
 
-struct insert_entries {
-	struct i915_address_space *vm;
-	struct i915_vma_resource *vma_res;
-	enum i915_cache_level level;
-	u32 flags;
-};
-
 #define GT_TRACE(gt, fmt, ...) do {					\
 	const struct intel_gt *gt__ __maybe_unused = (gt);		\
 	GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev),		\
@@ -93,21 +86,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
 	return unlikely(test_bit(I915_WEDGED, &gt->reset.flags));
 }
 
-static inline bool intel_gt_needs_read_steering(struct intel_gt *gt,
-						enum intel_steering_type type)
-{
-	return gt->steering_table[type];
-}
-
-void intel_gt_get_valid_steering_for_reg(struct intel_gt *gt, i915_reg_t reg,
-					 u8 *sliceid, u8 *subsliceid);
-
-u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg);
-u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg);
-
-void intel_gt_report_steering(struct drm_printer *p, struct intel_gt *gt,
-			      bool dump_table);
-
 int intel_gt_probe_all(struct drm_i915_private *i915);
 int intel_gt_tiles_init(struct drm_i915_private *i915);
 void intel_gt_release_all(struct drm_i915_private *i915);
@@ -125,6 +103,4 @@ void intel_gt_watchdog_work(struct work_struct *work);
 
 void intel_gt_invalidate_tlbs(struct intel_gt *gt);
 
-struct resource intel_pci_resource(struct pci_dev *pdev, int bar);
-
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
index d886fdc2c694..dd53641f3637 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_debugfs.c
@@ -9,6 +9,7 @@
 #include "intel_gt.h"
 #include "intel_gt_debugfs.h"
 #include "intel_gt_engines_debugfs.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_pm_debugfs.h"
 #include "intel_sseu_debugfs.h"
 #include "pxp/intel_pxp_debugfs.h"
@@ -64,7 +65,7 @@ static int steering_show(struct seq_file *m, void *data)
 	struct drm_printer p = drm_seq_file_printer(m);
 	struct intel_gt *gt = m->private;
 
-	intel_gt_report_steering(&p, gt, true);
+	intel_gt_mcr_report_steering(&p, gt, true);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c b/drivers/gpu/drm/i915/gt/intel_gt_gmch.c
deleted file mode 100644
index 18e488672d1b..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.c
+++ /dev/null
@@ -1,654 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#include <drm/intel-gtt.h>
-#include <drm/i915_drm.h>
-
-#include <linux/agp_backend.h>
-#include <linux/stop_machine.h>
-
-#include "i915_drv.h"
-#include "intel_gt_gmch.h"
-#include "intel_gt_regs.h"
-#include "intel_gt.h"
-#include "i915_utils.h"
-
-#include "gen8_ppgtt.h"
-
-struct insert_page {
-	struct i915_address_space *vm;
-	dma_addr_t addr;
-	u64 offset;
-	enum i915_cache_level level;
-};
-
-static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
-{
-	writeq(pte, addr);
-}
-
-static void nop_clear_range(struct i915_address_space *vm,
-			    u64 start, u64 length)
-{
-}
-
-static u64 snb_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-	switch (level) {
-	case I915_CACHE_L3_LLC:
-	case I915_CACHE_LLC:
-		pte |= GEN6_PTE_CACHE_LLC;
-		break;
-	case I915_CACHE_NONE:
-		pte |= GEN6_PTE_UNCACHED;
-		break;
-	default:
-		MISSING_CASE(level);
-	}
-
-	return pte;
-}
-
-static u64 ivb_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-	switch (level) {
-	case I915_CACHE_L3_LLC:
-		pte |= GEN7_PTE_CACHE_L3_LLC;
-		break;
-	case I915_CACHE_LLC:
-		pte |= GEN6_PTE_CACHE_LLC;
-		break;
-	case I915_CACHE_NONE:
-		pte |= GEN6_PTE_UNCACHED;
-		break;
-	default:
-		MISSING_CASE(level);
-	}
-
-	return pte;
-}
-
-static u64 byt_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-	if (!(flags & PTE_READ_ONLY))
-		pte |= BYT_PTE_WRITEABLE;
-
-	if (level != I915_CACHE_NONE)
-		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
-
-	return pte;
-}
-
-static u64 hsw_pte_encode(dma_addr_t addr,
-			  enum i915_cache_level level,
-			  u32 flags)
-{
-	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-	if (level != I915_CACHE_NONE)
-		pte |= HSW_WB_LLC_AGE3;
-
-	return pte;
-}
-
-static u64 iris_pte_encode(dma_addr_t addr,
-			   enum i915_cache_level level,
-			   u32 flags)
-{
-	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
-
-	switch (level) {
-	case I915_CACHE_NONE:
-		break;
-	case I915_CACHE_WT:
-		pte |= HSW_WT_ELLC_LLC_AGE3;
-		break;
-	default:
-		pte |= HSW_WB_ELLC_LLC_AGE3;
-		break;
-	}
-
-	return pte;
-}
-
-static void gen5_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level cache_level,
-				  u32 unused)
-{
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
-}
-
-static void gen6_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level level,
-				  u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen6_pte_t __iomem *pte =
-		(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
-	iowrite32(vm->pte_encode(addr, level, flags), pte);
-
-	ggtt->invalidate(ggtt);
-}
-
-static void gen8_ggtt_insert_page(struct i915_address_space *vm,
-				  dma_addr_t addr,
-				  u64 offset,
-				  enum i915_cache_level level,
-				  u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen8_pte_t __iomem *pte =
-		(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
-
-	gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags));
-
-	ggtt->invalidate(ggtt);
-}
-
-static void gen5_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma_resource *vma_res,
-				     enum i915_cache_level cache_level,
-				     u32 unused)
-{
-	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
-
-	intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
-				    flags);
-}
-
-/*
- * Binds an object into the global gtt with the specified cache level.
- * The object will be accessible to the GPU via commands whose operands
- * reference offsets within the global GTT as well as accessible by the GPU
- * through the GMADR mapped BAR (i915->mm.gtt->gtt).
- */
-static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma_resource *vma_res,
-				     enum i915_cache_level level,
-				     u32 flags)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen6_pte_t __iomem *gte;
-	gen6_pte_t __iomem *end;
-	struct sgt_iter iter;
-	dma_addr_t addr;
-
-	gte = (gen6_pte_t __iomem *)ggtt->gsm;
-	gte += vma_res->start / I915_GTT_PAGE_SIZE;
-	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
-
-	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
-		iowrite32(vm->pte_encode(addr, level, flags), gte++);
-	GEM_BUG_ON(gte > end);
-
-	/* Fill the allocated but "unused" space beyond the end of the buffer */
-	while (gte < end)
-		iowrite32(vm->scratch[0]->encode, gte++);
-
-	/*
-	 * We want to flush the TLBs only after we're certain all the PTE
-	 * updates have finished.
-	 */
-	ggtt->invalidate(ggtt);
-}
-
-static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma_resource *vma_res,
-				     enum i915_cache_level level,
-				     u32 flags)
-{
-	const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags);
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	gen8_pte_t __iomem *gte;
-	gen8_pte_t __iomem *end;
-	struct sgt_iter iter;
-	dma_addr_t addr;
-
-	/*
-	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
-	 * not to allow the user to override access to a read only page.
-	 */
-
-	gte = (gen8_pte_t __iomem *)ggtt->gsm;
-	gte += vma_res->start / I915_GTT_PAGE_SIZE;
-	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
-
-	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
-		gen8_set_pte(gte++, pte_encode | addr);
-	GEM_BUG_ON(gte > end);
-
-	/* Fill the allocated but "unused" space beyond the end of the buffer */
-	while (gte < end)
-		gen8_set_pte(gte++, vm->scratch[0]->encode);
-
-	/*
-	 * We want to flush the TLBs only after we're certain all the PTE
-	 * updates have finished.
-	 */
-	ggtt->invalidate(ggtt);
-}
-
-static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
-{
-	/*
-	 * Make sure the internal GAM fifo has been cleared of all GTT
-	 * writes before exiting stop_machine(). This guarantees that
-	 * any aperture accesses waiting to start in another process
-	 * cannot back up behind the GTT writes causing a hang.
-	 * The register can be any arbitrary GAM register.
-	 */
-	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
-}
-
-static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
-{
-	struct insert_page *arg = _arg;
-
-	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
-					  dma_addr_t addr,
-					  u64 offset,
-					  enum i915_cache_level level,
-					  u32 unused)
-{
-	struct insert_page arg = { vm, addr, offset, level };
-
-	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
-}
-
-static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
-{
-	struct insert_entries *arg = _arg;
-
-	gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
-	bxt_vtd_ggtt_wa(arg->vm);
-
-	return 0;
-}
-
-static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
-					     struct i915_vma_resource *vma_res,
-					     enum i915_cache_level level,
-					     u32 flags)
-{
-	struct insert_entries arg = { vm, vma_res, level, flags };
-
-	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
-}
-
-void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt)
-{
-	intel_gtt_chipset_flush();
-}
-
-static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
-{
-	intel_gtt_chipset_flush();
-}
-
-static void gen5_ggtt_clear_range(struct i915_address_space *vm,
-					 u64 start, u64 length)
-{
-	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
-}
-
-static void gen6_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-	gen6_pte_t scratch_pte, __iomem *gtt_base =
-		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	scratch_pte = vm->scratch[0]->encode;
-	for (i = 0; i < num_entries; i++)
-		iowrite32(scratch_pte, &gtt_base[i]);
-}
-
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  u64 start, u64 length)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
-	gen8_pte_t __iomem *gtt_base =
-		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
-	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
-	int i;
-
-	if (WARN(num_entries > max_entries,
-		 "First entry = %d; Num entries = %d (max=%d)\n",
-		 first_entry, num_entries, max_entries))
-		num_entries = max_entries;
-
-	for (i = 0; i < num_entries; i++)
-		gen8_set_pte(&gtt_base[i], scratch_pte);
-}
-
-static void gen5_gmch_remove(struct i915_address_space *vm)
-{
-	intel_gmch_remove();
-}
-
-static void gen6_gmch_remove(struct i915_address_space *vm)
-{
-	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-
-	iounmap(ggtt->gsm);
-	free_scratch(vm);
-}
-
-/*
- * Certain Gen5 chipsets require idling the GPU before
- * unmapping anything from the GTT when VT-d is enabled.
- */
-static bool needs_idle_maps(struct drm_i915_private *i915)
-{
-	/*
-	 * Query intel_iommu to see if we need the workaround. Presumably that
-	 * was loaded first.
-	 */
-	if (!i915_vtd_active(i915))
-		return false;
-
-	if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
-		return true;
-
-	if (GRAPHICS_VER(i915) == 12)
-		return true; /* XXX DMAR fault reason 7 */
-
-	return false;
-}
-
-static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
-{
-	/*
-	 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
-	 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
-	 */
-	GEM_BUG_ON(GRAPHICS_VER(i915) < 6);
-	return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M;
-}
-
-static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
-{
-	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
-	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
-	return snb_gmch_ctl << 20;
-}
-
-static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
-{
-	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
-	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
-	if (bdw_gmch_ctl)
-		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
-
-#ifdef CONFIG_X86_32
-	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
-	if (bdw_gmch_ctl > 4)
-		bdw_gmch_ctl = 4;
-#endif
-
-	return bdw_gmch_ctl << 20;
-}
-
-static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
-{
-	return gen6_gttmmadr_size(i915) / 2;
-}
-
-static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
-	phys_addr_t phys_addr;
-	u32 pte_flags;
-	int ret;
-
-	GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915));
-	phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915);
-
-	/*
-	 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
-	 * will be dropped. For WC mappings in general we have 64 byte burst
-	 * writes when the WC buffer is flushed, so we can't use it, but have to
-	 * resort to an uncached mapping. The WC issue is easily caught by the
-	 * readback check when writing GTT PTE entries.
-	 */
-	if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11)
-		ggtt->gsm = ioremap(phys_addr, size);
-	else
-		ggtt->gsm = ioremap_wc(phys_addr, size);
-	if (!ggtt->gsm) {
-		drm_err(&i915->drm, "Failed to map the ggtt page table\n");
-		return -ENOMEM;
-	}
-
-	kref_init(&ggtt->vm.resv_ref);
-	ret = setup_scratch_page(&ggtt->vm);
-	if (ret) {
-		drm_err(&i915->drm, "Scratch setup failed\n");
-		/* iounmap will also get called at remove, but meh */
-		iounmap(ggtt->gsm);
-		return ret;
-	}
-
-	pte_flags = 0;
-	if (i915_gem_object_is_lmem(ggtt->vm.scratch[0]))
-		pte_flags |= PTE_LM;
-
-	ggtt->vm.scratch[0]->encode =
-		ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
-				    I915_CACHE_NONE, pte_flags);
-
-	return 0;
-}
-
-int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-	phys_addr_t gmadr_base;
-	int ret;
-
-	ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL);
-	if (!ret) {
-		drm_err(&i915->drm, "failed to set up gmch\n");
-		return -EIO;
-	}
-
-	intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
-
-	ggtt->gmadr =
-		(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
-
-	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
-	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
-
-	if (needs_idle_maps(i915)) {
-		drm_notice(&i915->drm,
-			   "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n");
-		ggtt->do_idle_maps = true;
-	}
-
-	ggtt->vm.insert_page = gen5_ggtt_insert_page;
-	ggtt->vm.insert_entries = gen5_ggtt_insert_entries;
-	ggtt->vm.clear_range = gen5_ggtt_clear_range;
-	ggtt->vm.cleanup = gen5_gmch_remove;
-
-	ggtt->invalidate = gmch_ggtt_invalidate;
-
-	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
-
-	if (unlikely(ggtt->do_idle_maps))
-		drm_notice(&i915->drm,
-			   "Applying Ironlake quirks for intel_iommu\n");
-
-	return 0;
-}
-
-int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
-	unsigned int size;
-	u16 snb_gmch_ctl;
-
-	ggtt->gmadr = intel_pci_resource(pdev, 2);
-	ggtt->mappable_end = resource_size(&ggtt->gmadr);
-
-	/*
-	 * 64/512MB is the current min/max we actually know of, but this is
-	 * just a coarse sanity check.
-	 */
-	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
-		drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
-			&ggtt->mappable_end);
-		return -ENXIO;
-	}
-
-	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-
-	size = gen6_get_total_gtt_size(snb_gmch_ctl);
-	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
-
-	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
-	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
-
-	ggtt->vm.clear_range = nop_clear_range;
-	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
-		ggtt->vm.clear_range = gen6_ggtt_clear_range;
-	ggtt->vm.insert_page = gen6_ggtt_insert_page;
-	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
-	ggtt->vm.cleanup = gen6_gmch_remove;
-
-	ggtt->invalidate = gen6_ggtt_invalidate;
-
-	if (HAS_EDRAM(i915))
-		ggtt->vm.pte_encode = iris_pte_encode;
-	else if (IS_HASWELL(i915))
-		ggtt->vm.pte_encode = hsw_pte_encode;
-	else if (IS_VALLEYVIEW(i915))
-		ggtt->vm.pte_encode = byt_pte_encode;
-	else if (GRAPHICS_VER(i915) >= 7)
-		ggtt->vm.pte_encode = ivb_pte_encode;
-	else
-		ggtt->vm.pte_encode = snb_pte_encode;
-
-	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
-
-	return ggtt_probe_common(ggtt, size);
-}
-
-static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
-{
-	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
-	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
-
-	if (gmch_ctrl)
-		return 1 << (20 + gmch_ctrl);
-
-	return 0;
-}
-
-int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt)
-{
-	struct drm_i915_private *i915 = ggtt->vm.i915;
-	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
-	unsigned int size;
-	u16 snb_gmch_ctl;
-
-	/* TODO: We're not aware of mappable constraints on gen8 yet */
-	if (!HAS_LMEM(i915)) {
-		ggtt->gmadr = intel_pci_resource(pdev, 2);
-		ggtt->mappable_end = resource_size(&ggtt->gmadr);
-	}
-
-	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-	if (IS_CHERRYVIEW(i915))
-		size = chv_get_total_gtt_size(snb_gmch_ctl);
-	else
-		size = gen8_get_total_gtt_size(snb_gmch_ctl);
-
-	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
-	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
-	ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
-
-	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
-	ggtt->vm.cleanup = gen6_gmch_remove;
-	ggtt->vm.insert_page = gen8_ggtt_insert_page;
-	ggtt->vm.clear_range = nop_clear_range;
-	if (intel_scanout_needs_vtd_wa(i915))
-		ggtt->vm.clear_range = gen8_ggtt_clear_range;
-
-	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
-
-	/*
-	 * Serialize GTT updates with aperture access on BXT if VT-d is on,
-	 * and always on CHV.
-	 */
-	if (intel_vm_no_concurrent_access_wa(i915)) {
-		ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
-		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
-		ggtt->vm.bind_async_flags =
-			I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
-	}
-
-	ggtt->invalidate = gen8_ggtt_invalidate;
-
-	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
-	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
-
-	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
-
-	setup_private_pat(ggtt->vm.gt->uncore);
-
-	return ggtt_probe_common(ggtt, size);
-}
-
-int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915)
-{
-	if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt())
-		return -EIO;
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h b/drivers/gpu/drm/i915/gt/intel_gt_gmch.h
deleted file mode 100644
index 75ed55c1f30a..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_gt_gmch.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef __INTEL_GT_GMCH_H__
-#define __INTEL_GT_GMCH_H__
-
-#include "intel_gtt.h"
-
-/* For x86 platforms */
-#if IS_ENABLED(CONFIG_X86)
-void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt);
-int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt);
-int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt);
-int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt);
-int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915);
-
-/* Stubs for non-x86 platforms */
-#else
-static inline void intel_gt_gmch_gen5_chipset_flush(struct intel_gt *gt)
-{
-}
-static inline int intel_gt_gmch_gen5_probe(struct i915_ggtt *ggtt)
-{
-	/* No HW should be probed for this case yet, return fail */
-	return -ENODEV;
-}
-static inline int intel_gt_gmch_gen6_probe(struct i915_ggtt *ggtt)
-{
-	/* No HW should be probed for this case yet, return fail */
-	return -ENODEV;
-}
-static inline int intel_gt_gmch_gen8_probe(struct i915_ggtt *ggtt)
-{
-	/* No HW should be probed for this case yet, return fail */
-	return -ENODEV;
-}
-static inline int intel_gt_gmch_gen5_enable_hw(struct drm_i915_private *i915)
-{
-	/* No HW should be enabled for this case yet, return fail */
-	return -ENODEV;
-}
-#endif
-
-#endif /* __INTEL_GT_GMCH_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 88b4becfcb17..3a72d4fd0214 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -193,6 +193,14 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
 	/* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
 	intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK,	~0);
 	intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK,	~0);
+	if (HAS_ENGINE(gt, BCS1) || HAS_ENGINE(gt, BCS2))
+		intel_uncore_write(uncore, XEHPC_BCS1_BCS2_INTR_MASK, ~0);
+	if (HAS_ENGINE(gt, BCS3) || HAS_ENGINE(gt, BCS4))
+		intel_uncore_write(uncore, XEHPC_BCS3_BCS4_INTR_MASK, ~0);
+	if (HAS_ENGINE(gt, BCS5) || HAS_ENGINE(gt, BCS6))
+		intel_uncore_write(uncore, XEHPC_BCS5_BCS6_INTR_MASK, ~0);
+	if (HAS_ENGINE(gt, BCS7) || HAS_ENGINE(gt, BCS8))
+		intel_uncore_write(uncore, XEHPC_BCS7_BCS8_INTR_MASK, ~0);
 	intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK,	~0);
 	intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK,	~0);
 	if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
@@ -248,6 +256,14 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 	/* Unmask irqs on RCS, BCS, VCS and VECS engines. */
 	intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask);
 	intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~smask);
+	if (HAS_ENGINE(gt, BCS1) || HAS_ENGINE(gt, BCS2))
+		intel_uncore_write(uncore, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
+	if (HAS_ENGINE(gt, BCS3) || HAS_ENGINE(gt, BCS4))
+		intel_uncore_write(uncore, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
+	if (HAS_ENGINE(gt, BCS5) || HAS_ENGINE(gt, BCS6))
+		intel_uncore_write(uncore, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
+	if (HAS_ENGINE(gt, BCS7) || HAS_ENGINE(gt, BCS8))
+		intel_uncore_write(uncore, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
 	intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~dmask);
 	intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~dmask);
 	if (HAS_ENGINE(gt, VCS4) || HAS_ENGINE(gt, VCS5))
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
new file mode 100644
index 000000000000..777025d5bd66
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+#include "intel_gt_mcr.h"
+#include "intel_gt_regs.h"
+
+/**
+ * DOC: GT Multicast/Replicated (MCR) Register Support
+ *
+ * Some GT registers are designed as "multicast" or "replicated" registers:
+ * multiple instances of the same register share a single MMIO offset.  MCR
+ * registers are generally used when the hardware needs to potentially track
+ * independent values of a register per hardware unit (e.g., per-subslice,
+ * per-L3bank, etc.).  The specific types of replication that exist vary
+ * per-platform.
+ *
+ * MMIO accesses to MCR registers are controlled according to the settings
+ * programmed in the platform's MCR_SELECTOR register(s).  MMIO writes to MCR
+ * registers can be done in either a (i.e., a single write updates all
+ * instances of the register to the same value) or unicast (a write updates only
+ * one specific instance).  Reads of MCR registers always operate in a unicast
+ * manner regardless of how the multicast/unicast bit is set in MCR_SELECTOR.
+ * Selection of a specific MCR instance for unicast operations is referred to
+ * as "steering."
+ *
+ * If MCR register operations are steered toward a hardware unit that is
+ * fused off or currently powered down due to power gating, the MMIO operation
+ * is "terminated" by the hardware.  Terminated read operations will return a
+ * value of zero and terminated unicast write operations will be silently
+ * ignored.
+ */
+
+#define HAS_MSLICE_STEERING(dev_priv)	(INTEL_INFO(dev_priv)->has_mslice_steering)
+
+static const char * const intel_steering_types[] = {
+	"L3BANK",
+	"MSLICE",
+	"LNCF",
+	"INSTANCE 0",
+};
+
+static const struct intel_mmio_range icl_l3bank_steering_table[] = {
+	{ 0x00B100, 0x00B3FF },
+	{},
+};
+
+static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
+	{ 0x004000, 0x004AFF },
+	{ 0x00C800, 0x00CFFF },
+	{ 0x00DD00, 0x00DDFF },
+	{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
+	{},
+};
+
+static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
+	{ 0x00B000, 0x00B0FF },
+	{ 0x00D800, 0x00D8FF },
+	{},
+};
+
+static const struct intel_mmio_range dg2_lncf_steering_table[] = {
+	{ 0x00B000, 0x00B0FF },
+	{ 0x00D880, 0x00D8FF },
+	{},
+};
+
+/*
+ * We have several types of MCR registers on PVC where steering to (0,0)
+ * will always provide us with a non-terminated value.  We'll stick them
+ * all in the same table for simplicity.
+ */
+static const struct intel_mmio_range pvc_instance0_steering_table[] = {
+	{ 0x004000, 0x004AFF },		/* HALF-BSLICE */
+	{ 0x008800, 0x00887F },		/* CC */
+	{ 0x008A80, 0x008AFF },		/* TILEPSMI */
+	{ 0x00B000, 0x00B0FF },		/* HALF-BSLICE */
+	{ 0x00B100, 0x00B3FF },		/* L3BANK */
+	{ 0x00C800, 0x00CFFF },		/* HALF-BSLICE */
+	{ 0x00D800, 0x00D8FF },		/* HALF-BSLICE */
+	{ 0x00DD00, 0x00DDFF },		/* BSLICE */
+	{ 0x00E900, 0x00E9FF },		/* HALF-BSLICE */
+	{ 0x00EC00, 0x00EEFF },		/* HALF-BSLICE */
+	{ 0x00F000, 0x00FFFF },		/* HALF-BSLICE */
+	{ 0x024180, 0x0241FF },		/* HALF-BSLICE */
+	{},
+};
+
+void intel_gt_mcr_init(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+
+	/*
+	 * An mslice is unavailable only if both the meml3 for the slice is
+	 * disabled *and* all of the DSS in the slice (quadrant) are disabled.
+	 */
+	if (HAS_MSLICE_STEERING(i915)) {
+		gt->info.mslice_mask =
+			intel_slicemask_from_xehp_dssmask(gt->info.sseu.subslice_mask,
+							  GEN_DSS_PER_MSLICE);
+		gt->info.mslice_mask |=
+			(intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
+			 GEN12_MEML3_EN_MASK);
+
+		if (!gt->info.mslice_mask) /* should be impossible! */
+			drm_warn(&i915->drm, "mslice mask all zero!\n");
+	}
+
+	if (IS_PONTEVECCHIO(i915)) {
+		gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
+	} else if (IS_DG2(i915)) {
+		gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
+		gt->steering_table[LNCF] = dg2_lncf_steering_table;
+	} else if (IS_XEHPSDV(i915)) {
+		gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
+		gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
+	} else if (GRAPHICS_VER(i915) >= 11 &&
+		   GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
+		gt->steering_table[L3BANK] = icl_l3bank_steering_table;
+		gt->info.l3bank_mask =
+			~intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
+			GEN10_L3BANK_MASK;
+		if (!gt->info.l3bank_mask) /* should be impossible! */
+			drm_warn(&i915->drm, "L3 bank mask is all zero!\n");
+	} else if (GRAPHICS_VER(i915) >= 11) {
+		/*
+		 * We expect all modern platforms to have at least some
+		 * type of steering that needs to be initialized.
+		 */
+		MISSING_CASE(INTEL_INFO(i915)->platform);
+	}
+}
+
+/*
+ * rw_with_mcr_steering_fw - Access a register with specific MCR steering
+ * @uncore: pointer to struct intel_uncore
+ * @reg: register being accessed
+ * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access
+ * @group: group number (documented as "sliceid" on older platforms)
+ * @instance: instance number (documented as "subsliceid" on older platforms)
+ * @value: register value to be written (ignored for read)
+ *
+ * Return: 0 for write access. register value for read access.
+ *
+ * Caller needs to make sure the relevant forcewake wells are up.
+ */
+static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
+				   i915_reg_t reg, u8 rw_flag,
+				   int group, int instance, u32 value)
+{
+	u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
+
+	lockdep_assert_held(&uncore->lock);
+
+	if (GRAPHICS_VER(uncore->i915) >= 11) {
+		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
+		mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance);
+
+		/*
+		 * Wa_22013088509
+		 *
+		 * The setting of the multicast/unicast bit usually wouldn't
+		 * matter for read operations (which always return the value
+		 * from a single register instance regardless of how that bit
+		 * is set), but some platforms have a workaround requiring us
+		 * to remain in multicast mode for reads.  There's no real
+		 * downside to this, so we'll just go ahead and do so on all
+		 * platforms; we'll only clear the multicast bit from the mask
+		 * when exlicitly doing a write operation.
+		 */
+		if (rw_flag == FW_REG_WRITE)
+			mcr_mask |= GEN11_MCR_MULTICAST;
+	} else {
+		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
+		mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance);
+	}
+
+	old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+
+	mcr &= ~mcr_mask;
+	mcr |= mcr_ss;
+	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+
+	if (rw_flag == FW_REG_READ)
+		val = intel_uncore_read_fw(uncore, reg);
+	else
+		intel_uncore_write_fw(uncore, reg, value);
+
+	mcr &= ~mcr_mask;
+	mcr |= old_mcr & mcr_mask;
+
+	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+
+	return val;
+}
+
+static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
+				i915_reg_t reg, u8 rw_flag,
+				int group, int instance,
+				u32 value)
+{
+	enum forcewake_domains fw_domains;
+	u32 val;
+
+	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
+						    rw_flag);
+	fw_domains |= intel_uncore_forcewake_for_reg(uncore,
+						     GEN8_MCR_SELECTOR,
+						     FW_REG_READ | FW_REG_WRITE);
+
+	spin_lock_irq(&uncore->lock);
+	intel_uncore_forcewake_get__locked(uncore, fw_domains);
+
+	val = rw_with_mcr_steering_fw(uncore, reg, rw_flag, group, instance, value);
+
+	intel_uncore_forcewake_put__locked(uncore, fw_domains);
+	spin_unlock_irq(&uncore->lock);
+
+	return val;
+}
+
+/**
+ * intel_gt_mcr_read - read a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to read
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Returns the value read from an MCR register after steering toward a specific
+ * group/instance.
+ */
+u32 intel_gt_mcr_read(struct intel_gt *gt,
+		      i915_reg_t reg,
+		      int group, int instance)
+{
+	return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0);
+}
+
+/**
+ * intel_gt_mcr_unicast_write - write a specific instance of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ * @group: the MCR group
+ * @instance: the MCR instance
+ *
+ * Write an MCR register in unicast mode after steering toward a specific
+ * group/instance.
+ */
+void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
+				int group, int instance)
+{
+	rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value);
+}
+
+/**
+ * intel_gt_mcr_multicast_write - write a value to all instances of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ *
+ * Write an MCR register in multicast mode to update all instances.
+ */
+void intel_gt_mcr_multicast_write(struct intel_gt *gt,
+				i915_reg_t reg, u32 value)
+{
+	intel_uncore_write(gt->uncore, reg, value);
+}
+
+/**
+ * intel_gt_mcr_multicast_write_fw - write a value to all instances of an MCR register
+ * @gt: GT structure
+ * @reg: the MCR register to write
+ * @value: value to write
+ *
+ * Write an MCR register in multicast mode to update all instances.  This
+ * function assumes the caller is already holding any necessary forcewake
+ * domains; use intel_gt_mcr_multicast_write() in cases where forcewake should
+ * be obtained automatically.
+ */
+void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value)
+{
+	intel_uncore_write_fw(gt->uncore, reg, value);
+}
+
+/*
+ * reg_needs_read_steering - determine whether a register read requires
+ *     explicit steering
+ * @gt: GT structure
+ * @reg: the register to check steering requirements for
+ * @type: type of multicast steering to check
+ *
+ * Determines whether @reg needs explicit steering of a specific type for
+ * reads.
+ *
+ * Returns false if @reg does not belong to a register range of the given
+ * steering type, or if the default (subslice-based) steering IDs are suitable
+ * for @type steering too.
+ */
+static bool reg_needs_read_steering(struct intel_gt *gt,
+				    i915_reg_t reg,
+				    enum intel_steering_type type)
+{
+	const u32 offset = i915_mmio_reg_offset(reg);
+	const struct intel_mmio_range *entry;
+
+	if (likely(!gt->steering_table[type]))
+		return false;
+
+	for (entry = gt->steering_table[type]; entry->end; entry++) {
+		if (offset >= entry->start && offset <= entry->end)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * get_nonterminated_steering - determines valid IDs for a class of MCR steering
+ * @gt: GT structure
+ * @type: multicast register type
+ * @group: Group ID returned
+ * @instance: Instance ID returned
+ *
+ * Determines group and instance values that will steer reads of the specified
+ * MCR class to a non-terminated instance.
+ */
+static void get_nonterminated_steering(struct intel_gt *gt,
+				       enum intel_steering_type type,
+				       u8 *group, u8 *instance)
+{
+	switch (type) {
+	case L3BANK:
+		*group = 0;		/* unused */
+		*instance = __ffs(gt->info.l3bank_mask);
+		break;
+	case MSLICE:
+		GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915));
+		*group = __ffs(gt->info.mslice_mask);
+		*instance = 0;	/* unused */
+		break;
+	case LNCF:
+		/*
+		 * An LNCF is always present if its mslice is present, so we
+		 * can safely just steer to LNCF 0 in all cases.
+		 */
+		GEM_WARN_ON(!HAS_MSLICE_STEERING(gt->i915));
+		*group = __ffs(gt->info.mslice_mask) << 1;
+		*instance = 0;	/* unused */
+		break;
+	case INSTANCE0:
+		/*
+		 * There are a lot of MCR types for which instance (0, 0)
+		 * will always provide a non-terminated value.
+		 */
+		*group = 0;
+		*instance = 0;
+		break;
+	default:
+		MISSING_CASE(type);
+		*group = 0;
+		*instance = 0;
+	}
+}
+
+/**
+ * intel_gt_mcr_get_nonterminated_steering - find group/instance values that
+ *    will steer a register to a non-terminated instance
+ * @gt: GT structure
+ * @reg: register for which the steering is required
+ * @group: return variable for group steering
+ * @instance: return variable for instance steering
+ *
+ * This function returns a group/instance pair that is guaranteed to work for
+ * read steering of the given register. Note that a value will be returned even
+ * if the register is not replicated and therefore does not actually require
+ * steering.
+ */
+void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
+					     i915_reg_t reg,
+					     u8 *group, u8 *instance)
+{
+	int type;
+
+	for (type = 0; type < NUM_STEERING_TYPES; type++) {
+		if (reg_needs_read_steering(gt, reg, type)) {
+			get_nonterminated_steering(gt, type, group, instance);
+			return;
+		}
+	}
+
+	*group = gt->default_steering.groupid;
+	*instance = gt->default_steering.instanceid;
+}
+
+/**
+ * intel_gt_mcr_read_any_fw - reads one instance of an MCR register
+ * @gt: GT structure
+ * @reg: register to read
+ *
+ * Reads a GT MCR register.  The read will be steered to a non-terminated
+ * instance (i.e., one that isn't fused off or powered down by power gating).
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains; use intel_gt_mcr_read_any() in cases where forcewake should be
+ * obtained automatically.
+ *
+ * Returns the value from a non-terminated instance of @reg.
+ */
+u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
+{
+	int type;
+	u8 group, instance;
+
+	for (type = 0; type < NUM_STEERING_TYPES; type++) {
+		if (reg_needs_read_steering(gt, reg, type)) {
+			get_nonterminated_steering(gt, type, &group, &instance);
+			return rw_with_mcr_steering_fw(gt->uncore, reg,
+						       FW_REG_READ,
+						       group, instance, 0);
+		}
+	}
+
+	return intel_uncore_read_fw(gt->uncore, reg);
+}
+
+/**
+ * intel_gt_mcr_read_any - reads one instance of an MCR register
+ * @gt: GT structure
+ * @reg: register to read
+ *
+ * Reads a GT MCR register.  The read will be steered to a non-terminated
+ * instance (i.e., one that isn't fused off or powered down by power gating).
+ *
+ * Returns the value from a non-terminated instance of @reg.
+ */
+u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
+{
+	int type;
+	u8 group, instance;
+
+	for (type = 0; type < NUM_STEERING_TYPES; type++) {
+		if (reg_needs_read_steering(gt, reg, type)) {
+			get_nonterminated_steering(gt, type, &group, &instance);
+			return rw_with_mcr_steering(gt->uncore, reg,
+						    FW_REG_READ,
+						    group, instance, 0);
+		}
+	}
+
+	return intel_uncore_read(gt->uncore, reg);
+}
+
+static void report_steering_type(struct drm_printer *p,
+				 struct intel_gt *gt,
+				 enum intel_steering_type type,
+				 bool dump_table)
+{
+	const struct intel_mmio_range *entry;
+	u8 group, instance;
+
+	BUILD_BUG_ON(ARRAY_SIZE(intel_steering_types) != NUM_STEERING_TYPES);
+
+	if (!gt->steering_table[type]) {
+		drm_printf(p, "%s steering: uses default steering\n",
+			   intel_steering_types[type]);
+		return;
+	}
+
+	get_nonterminated_steering(gt, type, &group, &instance);
+	drm_printf(p, "%s steering: group=0x%x, instance=0x%x\n",
+		   intel_steering_types[type], group, instance);
+
+	if (!dump_table)
+		return;
+
+	for (entry = gt->steering_table[type]; entry->end; entry++)
+		drm_printf(p, "\t0x%06x - 0x%06x\n", entry->start, entry->end);
+}
+
+void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
+				  bool dump_table)
+{
+	drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
+		   gt->default_steering.groupid,
+		   gt->default_steering.instanceid);
+
+	if (IS_PONTEVECCHIO(gt->i915)) {
+		report_steering_type(p, gt, INSTANCE0, dump_table);
+	} else if (HAS_MSLICE_STEERING(gt->i915)) {
+		report_steering_type(p, gt, MSLICE, dump_table);
+		report_steering_type(p, gt, LNCF, dump_table);
+	}
+}
+
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
new file mode 100644
index 000000000000..506b0cbc8db3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_MCR__
+#define __INTEL_GT_MCR__
+
+#include "intel_gt_types.h"
+
+void intel_gt_mcr_init(struct intel_gt *gt);
+
+u32 intel_gt_mcr_read(struct intel_gt *gt,
+		      i915_reg_t reg,
+		      int group, int instance);
+u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg);
+u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg);
+
+void intel_gt_mcr_unicast_write(struct intel_gt *gt,
+				i915_reg_t reg, u32 value,
+				int group, int instance);
+void intel_gt_mcr_multicast_write(struct intel_gt *gt,
+				  i915_reg_t reg, u32 value);
+void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt,
+				     i915_reg_t reg, u32 value);
+
+void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
+					     i915_reg_t reg,
+					     u8 *group, u8 *instance);
+
+void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
+				  bool dump_table);
+
+#endif /* __INTEL_GT_MCR__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 90a440865037..40bdd4cb629f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -100,14 +100,16 @@ static int vlv_drpc(struct seq_file *m)
 {
 	struct intel_gt *gt = m->private;
 	struct intel_uncore *uncore = gt->uncore;
-	u32 rcctl1, pw_status;
+	u32 rcctl1, pw_status, mt_fwake_req;
 
+	mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
 	pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS);
 	rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
 
 	seq_printf(m, "RC6 Enabled: %s\n",
 		   str_yes_no(rcctl1 & (GEN7_RC_CTL_TO_MODE |
 					GEN6_RC_CTL_EI_MODE(1))));
+	seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req);
 	seq_printf(m, "Render Power Well: %s\n",
 		   (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down");
 	seq_printf(m, "Media Power Well: %s\n",
@@ -124,9 +126,10 @@ static int gen6_drpc(struct seq_file *m)
 	struct intel_gt *gt = m->private;
 	struct drm_i915_private *i915 = gt->i915;
 	struct intel_uncore *uncore = gt->uncore;
-	u32 gt_core_status, rcctl1, rc6vids = 0;
+	u32 gt_core_status, mt_fwake_req, rcctl1, rc6vids = 0;
 	u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
 
+	mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
 	gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS);
 
 	rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
@@ -178,6 +181,7 @@ static int gen6_drpc(struct seq_file *m)
 
 	seq_printf(m, "Core Power Down: %s\n",
 		   str_yes_no(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
+	seq_printf(m, "Multi-threaded Forcewake Request: 0x%x\n", mt_fwake_req);
 	if (GRAPHICS_VER(i915) >= 9) {
 		seq_printf(m, "Render Power Well: %s\n",
 			   (gen9_powergate_status &
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index a0a49c16babd..37c1095d8603 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -140,6 +140,7 @@
 #define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
 #define   GEN9_TSG_BARRIER_ACK_DISABLE		(1 << 8)
 #define   GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE	(1 << 10)
+#define   GEN12_PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
 
 #define GEN9_CS_DEBUG_MODE1			_MMIO(0x20ec)
 #define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
@@ -323,8 +324,11 @@
 
 #define GEN12_PAT_INDEX(index)			_MMIO(0x4800 + (index) * 4)
 
-#define XEHPSDV_FLAT_CCS_BASE_ADDR		_MMIO(0x4910)
-#define   XEHPSDV_CCS_BASE_SHIFT		8
+#define XEHP_TILE0_ADDR_RANGE			_MMIO(0x4900)
+#define   XEHP_TILE_LMEM_RANGE_SHIFT		8
+
+#define XEHP_FLAT_CCS_BASE_ADDR			_MMIO(0x4910)
+#define   XEHP_CCS_BASE_SHIFT			8
 
 #define GAMTARBMODE				_MMIO(0x4a08)
 #define   ARB_MODE_BWGTLB_DISABLE		(1 << 9)
@@ -561,6 +565,7 @@
 #define   GEN11_GT_VEBOX_DISABLE_MASK		(0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
 
 #define GEN12_GT_COMPUTE_DSS_ENABLE		_MMIO(0x9144)
+#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT		_MMIO(0x9148)
 
 #define GEN6_UCGCTL1				_MMIO(0x9400)
 #define   GEN6_GAMUNIT_CLOCK_GATE_DISABLE	(1 << 22)
@@ -597,24 +602,32 @@
 /* GEN11 changed all bit defs except for FULL & RENDER */
 #define   GEN11_GRDOM_FULL			GEN6_GRDOM_FULL
 #define   GEN11_GRDOM_RENDER			GEN6_GRDOM_RENDER
-#define   GEN11_GRDOM_BLT			(1 << 2)
-#define   GEN11_GRDOM_GUC			(1 << 3)
-#define   GEN11_GRDOM_MEDIA			(1 << 5)
-#define   GEN11_GRDOM_MEDIA2			(1 << 6)
-#define   GEN11_GRDOM_MEDIA3			(1 << 7)
-#define   GEN11_GRDOM_MEDIA4			(1 << 8)
-#define   GEN11_GRDOM_MEDIA5			(1 << 9)
-#define   GEN11_GRDOM_MEDIA6			(1 << 10)
-#define   GEN11_GRDOM_MEDIA7			(1 << 11)
-#define   GEN11_GRDOM_MEDIA8			(1 << 12)
-#define   GEN11_GRDOM_VECS			(1 << 13)
-#define   GEN11_GRDOM_VECS2			(1 << 14)
-#define   GEN11_GRDOM_VECS3			(1 << 15)
-#define   GEN11_GRDOM_VECS4			(1 << 16)
-#define   GEN11_GRDOM_SFC0			(1 << 17)
-#define   GEN11_GRDOM_SFC1			(1 << 18)
-#define   GEN11_GRDOM_SFC2			(1 << 19)
-#define   GEN11_GRDOM_SFC3			(1 << 20)
+#define   XEHPC_GRDOM_BLT8			REG_BIT(31)
+#define   XEHPC_GRDOM_BLT7			REG_BIT(30)
+#define   XEHPC_GRDOM_BLT6			REG_BIT(29)
+#define   XEHPC_GRDOM_BLT5			REG_BIT(28)
+#define   XEHPC_GRDOM_BLT4			REG_BIT(27)
+#define   XEHPC_GRDOM_BLT3			REG_BIT(26)
+#define   XEHPC_GRDOM_BLT2			REG_BIT(25)
+#define   XEHPC_GRDOM_BLT1			REG_BIT(24)
+#define   GEN11_GRDOM_SFC3			REG_BIT(20)
+#define   GEN11_GRDOM_SFC2			REG_BIT(19)
+#define   GEN11_GRDOM_SFC1			REG_BIT(18)
+#define   GEN11_GRDOM_SFC0			REG_BIT(17)
+#define   GEN11_GRDOM_VECS4			REG_BIT(16)
+#define   GEN11_GRDOM_VECS3			REG_BIT(15)
+#define   GEN11_GRDOM_VECS2			REG_BIT(14)
+#define   GEN11_GRDOM_VECS			REG_BIT(13)
+#define   GEN11_GRDOM_MEDIA8			REG_BIT(12)
+#define   GEN11_GRDOM_MEDIA7			REG_BIT(11)
+#define   GEN11_GRDOM_MEDIA6			REG_BIT(10)
+#define   GEN11_GRDOM_MEDIA5			REG_BIT(9)
+#define   GEN11_GRDOM_MEDIA4			REG_BIT(8)
+#define   GEN11_GRDOM_MEDIA3			REG_BIT(7)
+#define   GEN11_GRDOM_MEDIA2			REG_BIT(6)
+#define   GEN11_GRDOM_MEDIA			REG_BIT(5)
+#define   GEN11_GRDOM_GUC			REG_BIT(3)
+#define   GEN11_GRDOM_BLT			REG_BIT(2)
 #define   GEN11_VCS_SFC_RESET_BIT(instance)	(GEN11_GRDOM_SFC0 << ((instance) >> 1))
 #define   GEN11_VECS_SFC_RESET_BIT(instance)	(GEN11_GRDOM_SFC0 << (instance))
 
@@ -622,6 +635,7 @@
 
 #define GEN7_MISCCPCTL				_MMIO(0x9424)
 #define   GEN7_DOP_CLOCK_GATE_ENABLE		(1 << 0)
+#define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE	REG_BIT(1)
 #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE	(1 << 2)
 #define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE	(1 << 4)
 #define   GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE	(1 << 6)
@@ -732,6 +746,7 @@
 #define   GEN6_AGGRESSIVE_TURBO			(0 << 15)
 #define   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT	23
 #define   GEN9_IGNORE_SLICE_RATIO		(0 << 0)
+#define   GEN12_MEDIA_FREQ_RATIO		REG_BIT(13)
 
 #define GEN6_RC_VIDEO_FREQ			_MMIO(0xa00c)
 #define   GEN6_RC_CTL_RC6pp_ENABLE		(1 << 16)
@@ -969,6 +984,11 @@
 #define XEHP_L3SCQREG7				_MMIO(0xb188)
 #define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
 
+#define XEHPC_L3SCRUB				_MMIO(0xb18c)
+#define   SCRUB_CL_DWNGRADE_SHARED		REG_BIT(12)
+#define   SCRUB_RATE_PER_BANK_MASK		REG_GENMASK(2, 0)
+#define   SCRUB_RATE_4B_PER_CLK			REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
+
 #define L3SQCREG1_CCS0				_MMIO(0xb200)
 #define   FLUSHALLNONCOH			REG_BIT(5)
 
@@ -1060,8 +1080,10 @@
 #define   GEN9_ENABLE_GPGPU_PREEMPTION		REG_BIT(2)
 
 #define GEN10_CACHE_MODE_SS			_MMIO(0xe420)
-#define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
+#define   ENABLE_EU_COUNT_FOR_TDL_FLUSH		REG_BIT(10)
+#define   DISABLE_ECC				REG_BIT(5)
 #define   FLOAT_BLEND_OPTIMIZATION_ENABLE	REG_BIT(4)
+#define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
 
 #define EU_PERF_CNTL0				_MMIO(0xe458)
 #define EU_PERF_CNTL4				_MMIO(0xe45c)
@@ -1476,6 +1498,14 @@
 #define   GEN11_KCR				(19)
 #define   GEN11_GTPM				(16)
 #define   GEN11_BCS				(15)
+#define   XEHPC_BCS1				(14)
+#define   XEHPC_BCS2				(13)
+#define   XEHPC_BCS3				(12)
+#define   XEHPC_BCS4				(11)
+#define   XEHPC_BCS5				(10)
+#define   XEHPC_BCS6				(9)
+#define   XEHPC_BCS7				(8)
+#define   XEHPC_BCS8				(23)
 #define   GEN12_CCS3				(7)
 #define   GEN12_CCS2				(6)
 #define   GEN12_CCS1				(5)
@@ -1521,6 +1551,10 @@
 #define GEN11_GUNIT_CSME_INTR_MASK		_MMIO(0x1900f4)
 #define GEN12_CCS0_CCS1_INTR_MASK		_MMIO(0x190100)
 #define GEN12_CCS2_CCS3_INTR_MASK		_MMIO(0x190104)
+#define XEHPC_BCS1_BCS2_INTR_MASK		_MMIO(0x190110)
+#define XEHPC_BCS3_BCS4_INTR_MASK		_MMIO(0x190114)
+#define XEHPC_BCS5_BCS6_INTR_MASK		_MMIO(0x190118)
+#define XEHPC_BCS7_BCS8_INTR_MASK		_MMIO(0x19011c)
 
 #define GEN12_SFC_DONE(n)			_MMIO(0x1cc000 + (n) * 0x1000)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 8ec8bc660c8c..9e4ebf53379b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -24,7 +24,7 @@ bool is_object_gt(struct kobject *kobj)
 
 static struct intel_gt *kobj_to_gt(struct kobject *kobj)
 {
-	return container_of(kobj, struct kobj_gt, base)->gt;
+	return container_of(kobj, struct intel_gt, sysfs_gt);
 }
 
 struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
@@ -72,9 +72,9 @@ static struct attribute *id_attrs[] = {
 };
 ATTRIBUTE_GROUPS(id);
 
+/* A kobject needs a release() method even if it does nothing */
 static void kobj_gt_release(struct kobject *kobj)
 {
-	kfree(kobj);
 }
 
 static struct kobj_type kobj_gt_type = {
@@ -85,8 +85,6 @@ static struct kobj_type kobj_gt_type = {
 
 void intel_gt_sysfs_register(struct intel_gt *gt)
 {
-	struct kobj_gt *kg;
-
 	/*
 	 * We need to make things right with the
 	 * ABI compatibility. The files were originally
@@ -98,25 +96,22 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
 	if (gt_is_root(gt))
 		intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
 
-	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
-	if (!kg)
+	/* init and xfer ownership to sysfs tree */
+	if (kobject_init_and_add(&gt->sysfs_gt, &kobj_gt_type,
+				 gt->i915->sysfs_gt, "gt%d", gt->info.id))
 		goto exit_fail;
 
-	kobject_init(&kg->base, &kobj_gt_type);
-	kg->gt = gt;
-
-	/* xfer ownership to sysfs tree */
-	if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
-		goto exit_kobj_put;
-
-	intel_gt_sysfs_pm_init(gt, &kg->base);
+	intel_gt_sysfs_pm_init(gt, &gt->sysfs_gt);
 
 	return;
 
-exit_kobj_put:
-	kobject_put(&kg->base);
-
 exit_fail:
+	kobject_put(&gt->sysfs_gt);
 	drm_warn(&gt->i915->drm,
 		 "failed to initialize gt%d sysfs root\n", gt->info.id);
 }
+
+void intel_gt_sysfs_unregister(struct intel_gt *gt)
+{
+	kobject_put(&gt->sysfs_gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
index 9471b26752cf..a99aa7e8b01a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -13,11 +13,6 @@
 
 struct intel_gt;
 
-struct kobj_gt {
-	struct kobject base;
-	struct intel_gt *gt;
-};
-
 bool is_object_gt(struct kobject *kobj);
 
 struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
@@ -28,6 +23,7 @@ intel_gt_create_kobj(struct intel_gt *gt,
 		     const char *name);
 
 void intel_gt_sysfs_register(struct intel_gt *gt);
+void intel_gt_sysfs_unregister(struct intel_gt *gt);
 struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
 					    const char *name);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index f76b6cf8040e..73a8b46e0234 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -14,6 +14,7 @@
 #include "intel_gt_regs.h"
 #include "intel_gt_sysfs.h"
 #include "intel_gt_sysfs_pm.h"
+#include "intel_pcode.h"
 #include "intel_rc6.h"
 #include "intel_rps.h"
 
@@ -558,6 +559,174 @@ static const struct attribute *freq_attrs[] = {
 	NULL
 };
 
+/*
+ * Scaling for multipliers (aka frequency factors).
+ * The format of the value in the register is u8.8.
+ *
+ * The presentation to userspace is inspired by the perf event framework.
+ * See:
+ *   Documentation/ABI/testing/sysfs-bus-event_source-devices-events
+ * for description of:
+ *   /sys/bus/event_source/devices/<pmu>/events/<event>.scale
+ *
+ * Summary: Expose two sysfs files for each multiplier.
+ *
+ * 1. File <attr> contains a raw hardware value.
+ * 2. File <attr>.scale contains the multiplicative scale factor to be
+ *    used by userspace to compute the actual value.
+ *
+ * So userspace knows that to get the frequency_factor it multiplies the
+ * provided value by the specified scale factor and vice-versa.
+ *
+ * That way there is no precision loss in the kernel interface and API
+ * is future proof should one day the hardware register change to u16.u16,
+ * on some platform. (Or any other fixed point representation.)
+ *
+ * Example:
+ * File <attr> contains the value 2.5, represented as u8.8 0x0280, which
+ * is comprised of:
+ * - an integer part of 2
+ * - a fractional part of 0x80 (representing 0x80 / 2^8 == 0x80 / 256).
+ * File <attr>.scale contains a string representation of floating point
+ * value 0.00390625 (which is (1 / 256)).
+ * Userspace computes the actual value:
+ *   0x0280 * 0.00390625 -> 2.5
+ * or converts an actual value to the value to be written into <attr>:
+ *   2.5 / 0.00390625 -> 0x0280
+ */
+
+#define U8_8_VAL_MASK           0xffff
+#define U8_8_SCALE_TO_VALUE     "0.00390625"
+
+static ssize_t freq_factor_scale_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buff)
+{
+	return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE);
+}
+
+static u32 media_ratio_mode_to_factor(u32 mode)
+{
+	/* 0 -> 0, 1 -> 256, 2 -> 128 */
+	return !mode ? mode : 256 / mode;
+}
+
+static ssize_t media_freq_factor_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buff)
+{
+	struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+	struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+	intel_wakeref_t wakeref;
+	u32 mode;
+
+	/*
+	 * Retrieve media_ratio_mode from GEN6_RPNSWREQ bit 13 set by
+	 * GuC. GEN6_RPNSWREQ:13 value 0 represents 1:2 and 1 represents 1:1
+	 */
+	if (IS_XEHPSDV(gt->i915) &&
+	    slpc->media_ratio_mode == SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL) {
+		/*
+		 * For XEHPSDV dynamic mode GEN6_RPNSWREQ:13 does not contain
+		 * the media_ratio_mode, just return the cached media ratio
+		 */
+		mode = slpc->media_ratio_mode;
+	} else {
+		with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+			mode = intel_uncore_read(gt->uncore, GEN6_RPNSWREQ);
+		mode = REG_FIELD_GET(GEN12_MEDIA_FREQ_RATIO, mode) ?
+			SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE :
+			SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO;
+	}
+
+	return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode));
+}
+
+static ssize_t media_freq_factor_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buff, size_t count)
+{
+	struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+	struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+	u32 factor, mode;
+	int err;
+
+	err = kstrtou32(buff, 0, &factor);
+	if (err)
+		return err;
+
+	for (mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL;
+	     mode <= SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO; mode++)
+		if (factor == media_ratio_mode_to_factor(mode))
+			break;
+
+	if (mode > SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO)
+		return -EINVAL;
+
+	err = intel_guc_slpc_set_media_ratio_mode(slpc, mode);
+	if (!err) {
+		slpc->media_ratio_mode = mode;
+		DRM_DEBUG("Set slpc->media_ratio_mode to %d", mode);
+	}
+	return err ?: count;
+}
+
+static ssize_t media_RP0_freq_mhz_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buff)
+{
+	struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+	u32 val;
+	int err;
+
+	err = snb_pcode_read_p(gt->uncore, XEHP_PCODE_FREQUENCY_CONFIG,
+			       PCODE_MBOX_FC_SC_READ_FUSED_P0,
+			       PCODE_MBOX_DOMAIN_MEDIAFF, &val);
+
+	if (err)
+		return err;
+
+	/* Fused media RP0 read from pcode is in units of 50 MHz */
+	val *= GT_FREQUENCY_MULTIPLIER;
+
+	return sysfs_emit(buff, "%u\n", val);
+}
+
+static ssize_t media_RPn_freq_mhz_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buff)
+{
+	struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+	u32 val;
+	int err;
+
+	err = snb_pcode_read_p(gt->uncore, XEHP_PCODE_FREQUENCY_CONFIG,
+			       PCODE_MBOX_FC_SC_READ_FUSED_PN,
+			       PCODE_MBOX_DOMAIN_MEDIAFF, &val);
+
+	if (err)
+		return err;
+
+	/* Fused media RPn read from pcode is in units of 50 MHz */
+	val *= GT_FREQUENCY_MULTIPLIER;
+
+	return sysfs_emit(buff, "%u\n", val);
+}
+
+static DEVICE_ATTR_RW(media_freq_factor);
+static struct device_attribute dev_attr_media_freq_factor_scale =
+	__ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL);
+static DEVICE_ATTR_RO(media_RP0_freq_mhz);
+static DEVICE_ATTR_RO(media_RPn_freq_mhz);
+
+static const struct attribute *media_perf_power_attrs[] = {
+	&dev_attr_media_freq_factor.attr,
+	&dev_attr_media_freq_factor_scale.attr,
+	&dev_attr_media_RP0_freq_mhz.attr,
+	&dev_attr_media_RPn_freq_mhz.attr,
+	NULL
+};
+
 static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
 				const struct attribute * const *attrs)
 {
@@ -599,4 +768,12 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
 		drm_warn(&gt->i915->drm,
 			 "failed to create gt%u throttle sysfs files (%pe)",
 			 gt->info.id, ERR_PTR(ret));
+
+	if (HAS_MEDIA_RATIO_MODE(gt->i915) && intel_uc_uses_guc_slpc(&gt->uc)) {
+		ret = sysfs_create_files(kobj, media_perf_power_attrs);
+		if (ret)
+			drm_warn(&gt->i915->drm,
+				 "failed to create gt%u media_perf_power_attrs sysfs (%pe)\n",
+				 gt->info.id, ERR_PTR(ret));
+	}
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index b06611c1d4ad..df708802889d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -59,6 +59,13 @@ enum intel_steering_type {
 	MSLICE,
 	LNCF,
 
+	/*
+	 * On some platforms there are multiple types of MCR registers that
+	 * will always return a non-terminated value at instance (0, 0).  We'll
+	 * lump those all into a single category to keep things simple.
+	 */
+	INSTANCE0,
+
 	NUM_STEERING_TYPES
 };
 
@@ -221,9 +228,13 @@ struct intel_gt {
 
 	struct {
 		u8 uc_index;
+		u8 wb_index; /* Only used on HAS_L3_CCS_READ() platforms */
 	} mocs;
 
 	struct intel_pxp pxp;
+
+	/* gt/gtN sysfs */
+	struct kobject sysfs_gt;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index a40d928b3888..e639434e97fd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -306,6 +306,15 @@ struct i915_address_space {
 			       struct i915_vma_resource *vma_res,
 			       enum i915_cache_level cache_level,
 			       u32 flags);
+	void (*raw_insert_page)(struct i915_address_space *vm,
+				dma_addr_t addr,
+				u64 offset,
+				enum i915_cache_level cache_level,
+				u32 flags);
+	void (*raw_insert_entries)(struct i915_address_space *vm,
+				   struct i915_vma_resource *vma_res,
+				   enum i915_cache_level cache_level,
+				   u32 flags);
 	void (*cleanup)(struct i915_address_space *vm);
 
 	void (*foreach)(struct i915_address_space *vm,
@@ -345,6 +354,19 @@ struct i915_ggtt {
 
 	bool do_idle_maps;
 
+	/**
+	 * @pte_lost: Are ptes lost on resume?
+	 *
+	 * Whether the system was recently restored from hibernate and
+	 * thus may have lost pte content.
+	 */
+	bool pte_lost;
+
+	/**
+	 * @probed_pte: Probed pte value on suspend. Re-checked on resume.
+	 */
+	u64 probed_pte;
+
 	int mtrr;
 
 	/** Bit 6 swizzling required for X tiling */
@@ -548,14 +570,13 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 
 void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
 		unsigned long lmem_pt_obj_flags);
-
 void intel_ggtt_bind_vma(struct i915_address_space *vm,
-			  struct i915_vm_pt_stash *stash,
-			  struct i915_vma_resource *vma_res,
-			  enum i915_cache_level cache_level,
-			  u32 flags);
+			 struct i915_vm_pt_stash *stash,
+			 struct i915_vma_resource *vma_res,
+			 enum i915_cache_level cache_level,
+			 u32 flags);
 void intel_ggtt_unbind_vma(struct i915_address_space *vm,
-			    struct i915_vma_resource *vma_res);
+			   struct i915_vma_resource *vma_res);
 
 int i915_ggtt_probe_hw(struct drm_i915_private *i915);
 int i915_ggtt_init_hw(struct drm_i915_private *i915);
@@ -581,6 +602,17 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm);
 void i915_ggtt_suspend(struct i915_ggtt *gtt);
 void i915_ggtt_resume(struct i915_ggtt *ggtt);
 
+/**
+ * i915_ggtt_mark_pte_lost - Mark ggtt ptes as lost or clear such a marking
+ * @i915 The device private.
+ * @val whether the ptes should be marked as lost.
+ *
+ * In some cases pte content is retained across suspend, but typically lost
+ * across hibernate. Typically they should be marked as lost on
+ * hibernation restore and such marking cleared on suspend.
+ */
+void i915_ggtt_mark_pte_lost(struct drm_i915_private *i915, bool val);
+
 void
 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count);
 
@@ -627,7 +659,6 @@ release_pd_entry(struct i915_page_directory * const pd,
 		 struct i915_page_table * const pt,
 		 const struct drm_i915_gem_object * const scratch);
 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
-void gen8_ggtt_invalidate(struct i915_ggtt *ggtt);
 
 void ppgtt_bind_vma(struct i915_address_space *vm,
 		    struct i915_vm_pt_stash *stash,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 31be734010db..a390f0813c8b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -111,16 +111,6 @@ enum {
 #define XEHP_SW_COUNTER_SHIFT			58
 #define XEHP_SW_COUNTER_WIDTH			6
 
-static inline u32 lrc_desc_priority(int prio)
-{
-	if (prio > I915_PRIORITY_NORMAL)
-		return GEN12_CTX_PRIORITY_HIGH;
-	else if (prio < I915_PRIORITY_NORMAL)
-		return GEN12_CTX_PRIORITY_LOW;
-	else
-		return GEN12_CTX_PRIORITY_NORMAL;
-}
-
 static inline void lrc_runtime_start(struct intel_context *ce)
 {
 	struct intel_context_stats *stats = &ce->stats;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index c4c37585ae8c..c6ebe2781076 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
 	unsigned int n_entries;
 	const struct drm_i915_mocs_entry *table;
 	u8 uc_index;
+	u8 wb_index; /* Only used on HAS_L3_CCS_READ() platforms */
 	u8 unused_entries_index;
 };
 
@@ -47,6 +48,7 @@ struct drm_i915_mocs_table {
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+#define PVC_NUM_MOCS_ENTRIES	3
 
 /* (e)LLC caching options */
 /*
@@ -394,6 +396,17 @@ static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
 	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
 };
 
+static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
+	/* Error */
+	MOCS_ENTRY(0, 0, L3_3_WB),
+
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+
+	/* WB */
+	MOCS_ENTRY(2, 0, L3_3_WB),
+};
+
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
 	HAS_ENGINE_MOCS = BIT(1),
@@ -423,7 +436,14 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 	memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
 	table->unused_entries_index = I915_MOCS_PTE;
-	if (IS_DG2(i915)) {
+	if (IS_PONTEVECCHIO(i915)) {
+		table->size = ARRAY_SIZE(pvc_mocs_table);
+		table->table = pvc_mocs_table;
+		table->n_entries = PVC_NUM_MOCS_ENTRIES;
+		table->uc_index = 1;
+		table->wb_index = 2;
+		table->unused_entries_index = 2;
+	} else if (IS_DG2(i915)) {
 		if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
 			table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
 			table->table = dg2_mocs_table_g10_ax;
@@ -622,6 +642,8 @@ void intel_set_mocs_index(struct intel_gt *gt)
 
 	get_mocs_settings(gt->i915, &table);
 	gt->mocs.uc_index = table.uc_index;
+	if (HAS_L3_CCS_READ(gt->i915))
+		gt->mocs.wb_index = table.wb_index;
 }
 
 void intel_mocs_init(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index f5111c0a0060..d09b996a9759 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -12,6 +12,7 @@
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
 #include "gt/intel_gt_regs.h"
 
 static int
@@ -101,14 +102,24 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
 		return ERR_PTR(-ENODEV);
 
 	if (HAS_FLAT_CCS(i915)) {
+		resource_size_t lmem_range;
 		u64 tile_stolen, flat_ccs_base;
 
-		lmem_size = pci_resource_len(pdev, 2);
-		flat_ccs_base = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR);
-		flat_ccs_base = (flat_ccs_base >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K;
+		lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
+		lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT;
+		lmem_size *= SZ_1G;
+
+		flat_ccs_base = intel_gt_mcr_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		flat_ccs_base = (flat_ccs_base >> XEHP_CCS_BASE_SHIFT) * SZ_64K;
+
+		/* FIXME: Remove this when we have small-bar enabled */
+		if (pci_resource_len(pdev, 2) < lmem_size) {
+			drm_err(&i915->drm, "System requires small-BAR support, which is currently unsupported on this kernel\n");
+			return ERR_PTR(-EINVAL);
+		}
 
 		if (GEM_WARN_ON(lmem_size < flat_ccs_base))
-			return ERR_PTR(-ENODEV);
+			return ERR_PTR(-EIO);
 
 		tile_stolen = lmem_size - flat_ccs_base;
 
@@ -131,7 +142,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
 	io_start = pci_resource_start(pdev, 2);
 	io_size = min(pci_resource_len(pdev, 2), lmem_size);
 	if (!io_size)
-		return ERR_PTR(-ENODEV);
+		return ERR_PTR(-EIO);
 
 	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
 						I915_GTT_PAGE_SIZE_4K;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 5423bfd301ad..d5d6f1fadcae 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -117,7 +117,9 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
 		return;
 
 	/* ring should be idle before issuing a sync flush*/
-	GEM_DEBUG_WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+	if ((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0)
+		drm_warn(&engine->i915->drm, "%s not idle before sync flush!\n",
+			 engine->name);
 
 	ENGINE_WRITE_FW(engine, RING_INSTPM,
 			_MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
@@ -596,8 +598,9 @@ static void ring_context_reset(struct intel_context *ce)
 	clear_bit(CONTEXT_VALID_BIT, &ce->flags);
 }
 
-static void ring_context_ban(struct intel_context *ce,
-			     struct i915_request *rq)
+static void ring_context_revoke(struct intel_context *ce,
+				struct i915_request *rq,
+				unsigned int preempt_timeout_ms)
 {
 	struct intel_engine_cs *engine;
 
@@ -632,7 +635,7 @@ static const struct intel_context_ops ring_context_ops = {
 
 	.cancel_request = ring_context_cancel_request,
 
-	.ban = ring_context_ban,
+	.revoke = ring_context_revoke,
 
 	.pre_pin = ring_context_pre_pin,
 	.pin = ring_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 9b991df2cfbb..fb3f57ee450b 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1075,7 +1075,9 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps)
 	struct drm_i915_private *i915 = rps_to_i915(rps);
 	struct intel_uncore *uncore = rps_to_uncore(rps);
 
-	if (IS_XEHPSDV(i915))
+	if (IS_PONTEVECCHIO(i915))
+		return intel_uncore_read(uncore, PVC_RP_STATE_CAP);
+	else if (IS_XEHPSDV(i915))
 		return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP);
 	else if (IS_GEN9_LP(i915))
 		return intel_uncore_read(uncore, BXT_RP_STATE_CAP);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index fdd25691beda..c6d3050604c8 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -16,11 +16,6 @@ void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
 	sseu->max_slices = max_slices;
 	sseu->max_subslices = max_subslices;
 	sseu->max_eus_per_subslice = max_eus_per_subslice;
-
-	sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
-	GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
-	sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
-	GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
 }
 
 unsigned int
@@ -28,152 +23,240 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
 {
 	unsigned int i, total = 0;
 
-	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
-		total += hweight8(sseu->subslice_mask[i]);
+	if (sseu->has_xehp_dss)
+		return bitmap_weight(sseu->subslice_mask.xehp,
+				     XEHP_BITMAP_BITS(sseu->subslice_mask));
+
+	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask.hsw); i++)
+		total += hweight8(sseu->subslice_mask.hsw[i]);
 
 	return total;
 }
 
-static u32
-sseu_get_subslices(const struct sseu_dev_info *sseu,
-		   const u8 *subslice_mask, u8 slice)
+unsigned int
+intel_sseu_get_hsw_subslices(const struct sseu_dev_info *sseu, u8 slice)
 {
-	int i, offset = slice * sseu->ss_stride;
-	u32 mask = 0;
-
-	GEM_BUG_ON(slice >= sseu->max_slices);
-
-	for (i = 0; i < sseu->ss_stride; i++)
-		mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
+	WARN_ON(sseu->has_xehp_dss);
+	if (WARN_ON(slice >= sseu->max_slices))
+		return 0;
 
-	return mask;
+	return sseu->subslice_mask.hsw[slice];
 }
 
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
+			int subslice)
 {
-	return sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+	if (sseu->has_xehp_dss) {
+		WARN_ON(slice > 0);
+		return sseu->eu_mask.xehp[subslice];
+	} else {
+		return sseu->eu_mask.hsw[slice][subslice];
+	}
 }
 
-static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu)
+static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
+			 u16 eu_mask)
 {
-	return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0);
+	GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice);
+	if (sseu->has_xehp_dss) {
+		GEM_WARN_ON(slice > 0);
+		sseu->eu_mask.xehp[subslice] = eu_mask;
+	} else {
+		sseu->eu_mask.hsw[slice][subslice] = eu_mask;
+	}
 }
 
-u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
+static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 {
-	return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
-}
+	int s, ss, total = 0;
 
-void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
-			      u8 *subslice_mask, u32 ss_mask)
-{
-	int offset = slice * sseu->ss_stride;
+	for (s = 0; s < sseu->max_slices; s++)
+		for (ss = 0; ss < sseu->max_subslices; ss++)
+			if (sseu->has_xehp_dss)
+				total += hweight16(sseu->eu_mask.xehp[ss]);
+			else
+				total += hweight16(sseu->eu_mask.hsw[s][ss]);
 
-	memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
+	return total;
 }
 
-unsigned int
-intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
+/**
+ * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer
+ * @to: Pointer to userspace buffer to copy to
+ * @sseu: SSEU structure containing EU mask to copy
+ *
+ * Copies the EU mask to a userspace buffer in the format expected by
+ * the query ioctl's topology queries.
+ *
+ * Returns the result of the copy_to_user() operation.
+ */
+int intel_sseu_copy_eumask_to_user(void __user *to,
+				   const struct sseu_dev_info *sseu)
 {
-	return hweight32(intel_sseu_get_subslices(sseu, slice));
-}
+	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {};
+	int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+	int len = sseu->max_slices * sseu->max_subslices * eu_stride;
+	int s, ss, i;
 
-static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
-		       int subslice)
-{
-	int slice_stride = sseu->max_subslices * sseu->eu_stride;
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			int uapi_offset =
+				s * sseu->max_subslices * eu_stride +
+				ss * eu_stride;
+			u16 mask = sseu_get_eus(sseu, s, ss);
+
+			for (i = 0; i < eu_stride; i++)
+				eu_mask[uapi_offset + i] =
+					(mask >> (BITS_PER_BYTE * i)) & 0xff;
+		}
+	}
 
-	return slice * slice_stride + subslice * sseu->eu_stride;
+	return copy_to_user(to, eu_mask, len);
 }
 
-static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
-			int subslice)
+/**
+ * intel_sseu_copy_ssmask_to_user - Copy subslice mask into a userspace buffer
+ * @to: Pointer to userspace buffer to copy to
+ * @sseu: SSEU structure containing subslice mask to copy
+ *
+ * Copies the subslice mask to a userspace buffer in the format expected by
+ * the query ioctl's topology queries.
+ *
+ * Returns the result of the copy_to_user() operation.
+ */
+int intel_sseu_copy_ssmask_to_user(void __user *to,
+				   const struct sseu_dev_info *sseu)
 {
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
-	u16 eu_mask = 0;
+	u8 ss_mask[GEN_SS_MASK_SIZE] = {};
+	int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
+	int len = sseu->max_slices * ss_stride;
+	int s, ss, i;
 
-	for (i = 0; i < sseu->eu_stride; i++)
-		eu_mask |=
-			((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			i = s * ss_stride * BITS_PER_BYTE + ss;
 
-	return eu_mask;
-}
+			if (!intel_sseu_has_subslice(sseu, s, ss))
+				continue;
 
-static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
-			 u16 eu_mask)
-{
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
+			ss_mask[i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE);
+		}
+	}
 
-	for (i = 0; i < sseu->eu_stride; i++)
-		sseu->eu_mask[offset + i] =
-			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+	return copy_to_user(to, ss_mask, len);
 }
 
-static u16 compute_eu_total(const struct sseu_dev_info *sseu)
+static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
+				    u32 ss_en, u16 eu_en)
 {
-	u16 i, total = 0;
+	u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0);
+	int ss;
 
-	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
-		total += hweight8(sseu->eu_mask[i]);
+	sseu->slice_mask |= BIT(0);
+	sseu->subslice_mask.hsw[0] = ss_en & valid_ss_mask;
 
-	return total;
+	for (ss = 0; ss < sseu->max_subslices; ss++)
+		if (intel_sseu_has_subslice(sseu, 0, ss))
+			sseu_set_eus(sseu, 0, ss, eu_en);
+
+	sseu->eu_per_subslice = hweight16(eu_en);
+	sseu->eu_total = compute_eu_total(sseu);
 }
 
-static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
+static void xehp_compute_sseu_info(struct sseu_dev_info *sseu,
+				   u16 eu_en)
 {
-	u32 ss_mask;
+	int ss;
 
-	ss_mask = ss_en >> (s * sseu->max_subslices);
-	ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
+	sseu->slice_mask |= BIT(0);
 
-	return ss_mask;
+	bitmap_or(sseu->subslice_mask.xehp,
+		  sseu->compute_subslice_mask.xehp,
+		  sseu->geometry_subslice_mask.xehp,
+		  XEHP_BITMAP_BITS(sseu->subslice_mask));
+
+	for (ss = 0; ss < sseu->max_subslices; ss++)
+		if (intel_sseu_has_subslice(sseu, 0, ss))
+			sseu_set_eus(sseu, 0, ss, eu_en);
+
+	sseu->eu_per_subslice = hweight16(eu_en);
+	sseu->eu_total = compute_eu_total(sseu);
 }
 
-static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
-				    u32 g_ss_en, u32 c_ss_en, u16 eu_en)
+static void
+xehp_load_dss_mask(struct intel_uncore *uncore,
+		   intel_sseu_ss_mask_t *ssmask,
+		   int numregs,
+		   ...)
 {
-	int s, ss;
+	va_list argp;
+	u32 fuse_val[I915_MAX_SS_FUSE_REGS] = {};
+	int i;
 
-	/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
-	GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
-		   sizeof(g_ss_en) * BITS_PER_BYTE);
+	if (WARN_ON(numregs > I915_MAX_SS_FUSE_REGS))
+		numregs = I915_MAX_SS_FUSE_REGS;
 
-	for (s = 0; s < sseu->max_slices; s++) {
-		if ((s_en & BIT(s)) == 0)
-			continue;
+	va_start(argp, numregs);
+	for (i = 0; i < numregs; i++)
+		fuse_val[i] = intel_uncore_read(uncore, va_arg(argp, i915_reg_t));
+	va_end(argp);
 
-		sseu->slice_mask |= BIT(s);
-
-		/*
-		 * XeHP introduces the concept of compute vs geometry DSS. To
-		 * reduce variation between GENs around subslice usage, store a
-		 * mask for both the geometry and compute enabled masks since
-		 * userspace will need to be able to query these masks
-		 * independently.  Also compute a total enabled subslice count
-		 * for the purposes of selecting subslices to use in a
-		 * particular GEM context.
-		 */
-		intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
-					 get_ss_stride_mask(sseu, s, c_ss_en));
-		intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
-					 get_ss_stride_mask(sseu, s, g_ss_en));
-		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-					 get_ss_stride_mask(sseu, s,
-							    g_ss_en | c_ss_en));
+	bitmap_from_arr32(ssmask->xehp, fuse_val, numregs * 32);
+}
 
-		for (ss = 0; ss < sseu->max_subslices; ss++)
-			if (intel_sseu_has_subslice(sseu, s, ss))
-				sseu_set_eus(sseu, s, ss, eu_en);
+static void xehp_sseu_info_init(struct intel_gt *gt)
+{
+	struct sseu_dev_info *sseu = &gt->info.sseu;
+	struct intel_uncore *uncore = gt->uncore;
+	u16 eu_en = 0;
+	u8 eu_en_fuse;
+	int num_compute_regs, num_geometry_regs;
+	int eu;
+
+	if (IS_PONTEVECCHIO(gt->i915)) {
+		num_geometry_regs = 0;
+		num_compute_regs = 2;
+	} else {
+		num_geometry_regs = 1;
+		num_compute_regs = 1;
 	}
-	sseu->eu_per_subslice = hweight16(eu_en);
-	sseu->eu_total = compute_eu_total(sseu);
+
+	/*
+	 * The concept of slice has been removed in Xe_HP.  To be compatible
+	 * with prior generations, assume a single slice across the entire
+	 * device. Then calculate out the DSS for each workload type within
+	 * that software slice.
+	 */
+	intel_sseu_set_info(sseu, 1,
+			    32 * max(num_geometry_regs, num_compute_regs),
+			    HAS_ONE_EU_PER_FUSE_BIT(gt->i915) ? 8 : 16);
+	sseu->has_xehp_dss = 1;
+
+	xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask,
+			   num_geometry_regs,
+			   GEN12_GT_GEOMETRY_DSS_ENABLE);
+	xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask,
+			   num_compute_regs,
+			   GEN12_GT_COMPUTE_DSS_ENABLE,
+			   XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
+
+	eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
+
+	if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915))
+		eu_en = eu_en_fuse;
+	else
+		for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
+			if (eu_en_fuse & BIT(eu))
+				eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
+
+	xehp_compute_sseu_info(sseu, eu_en);
 }
 
 static void gen12_sseu_info_init(struct intel_gt *gt)
 {
 	struct sseu_dev_info *sseu = &gt->info.sseu;
 	struct intel_uncore *uncore = gt->uncore;
-	u32 g_dss_en, c_dss_en = 0;
+	u32 g_dss_en;
 	u16 eu_en = 0;
 	u8 eu_en_fuse;
 	u8 s_en;
@@ -183,43 +266,28 @@ static void gen12_sseu_info_init(struct intel_gt *gt)
 	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
 	 * Instead of splitting these, provide userspace with an array
 	 * of DSS to more closely represent the hardware resource.
-	 *
-	 * In addition, the concept of slice has been removed in Xe_HP.
-	 * To be compatible with prior generations, assume a single slice
-	 * across the entire device. Then calculate out the DSS for each
-	 * workload type within that software slice.
 	 */
-	if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
-		intel_sseu_set_info(sseu, 1, 32, 16);
-	else
-		intel_sseu_set_info(sseu, 1, 6, 16);
+	intel_sseu_set_info(sseu, 1, 6, 16);
 
 	/*
-	 * As mentioned above, Xe_HP does not have the concept of a slice.
-	 * Enable one for software backwards compatibility.
+	 * Although gen12 architecture supported multiple slices, TGL, RKL,
+	 * DG1, and ADL only had a single slice.
 	 */
-	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-		s_en = 0x1;
-	else
-		s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
-		       GEN11_GT_S_ENA_MASK;
+	s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
+		GEN11_GT_S_ENA_MASK;
+	drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
 
 	g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
-	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-		c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
 
 	/* one bit per pair of EUs */
-	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-		eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
-	else
-		eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
-			       GEN11_EU_DIS_MASK);
+	eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
+		       GEN11_EU_DIS_MASK);
 
 	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
 		if (eu_en_fuse & BIT(eu))
 			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
 
-	gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
+	gen11_compute_sseu_info(sseu, g_dss_en, eu_en);
 
 	/* TGL only supports slice-level power gating */
 	sseu->has_slice_pg = 1;
@@ -238,14 +306,20 @@ static void gen11_sseu_info_init(struct intel_gt *gt)
 	else
 		intel_sseu_set_info(sseu, 1, 8, 8);
 
+	/*
+	 * Although gen11 architecture supported multiple slices, ICL and
+	 * EHL/JSL only had a single slice in practice.
+	 */
 	s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
 		GEN11_GT_S_ENA_MASK;
+	drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
+
 	ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
 
 	eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
 		  GEN11_EU_DIS_MASK);
 
-	gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en);
+	gen11_compute_sseu_info(sseu, ss_en, eu_en);
 
 	/* ICL has no power gating restrictions. */
 	sseu->has_slice_pg = 1;
@@ -257,7 +331,6 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
 {
 	struct sseu_dev_info *sseu = &gt->info.sseu;
 	u32 fuse;
-	u8 subslice_mask = 0;
 
 	fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
 
@@ -271,8 +344,8 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
 			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
 			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
 
-		subslice_mask |= BIT(0);
-		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
+		sseu->subslice_mask.hsw[0] |= BIT(0);
+		sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF);
 	}
 
 	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
@@ -282,12 +355,10 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
 			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
 			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
 
-		subslice_mask |= BIT(1);
-		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
+		sseu->subslice_mask.hsw[0] |= BIT(1);
+		sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF);
 	}
 
-	intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
-
 	sseu->eu_total = compute_eu_total(sseu);
 
 	/*
@@ -342,8 +413,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
 			/* skip disabled slice */
 			continue;
 
-		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-					 subslice_mask);
+		sseu->subslice_mask.hsw[s] = subslice_mask;
 
 		eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
@@ -356,7 +426,7 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
 
 			eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
 
-			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & eu_mask);
 
 			eu_per_ss = sseu->max_eus_per_subslice -
 				hweight8(eu_disabled_mask);
@@ -400,8 +470,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
 
 	if (IS_GEN9_LP(i915)) {
-#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
-		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
+#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask.hsw[0] & BIT(ss)))
+		info->has_pooled_eu = hweight8(sseu->subslice_mask.hsw[0]) == 3;
 
 		sseu->min_eu_in_pool = 0;
 		if (info->has_pooled_eu) {
@@ -455,8 +525,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
 			/* skip disabled slice */
 			continue;
 
-		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-					 subslice_mask);
+		sseu->subslice_mask.hsw[s] = subslice_mask;
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			u8 eu_disabled_mask;
@@ -469,7 +538,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
 			eu_disabled_mask =
 				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
 
-			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & 0xFF);
 
 			n_disabled = hweight8(eu_disabled_mask);
 
@@ -553,8 +622,7 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
 			    sseu->eu_per_subslice);
 
 	for (s = 0; s < sseu->max_slices; s++) {
-		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-					 subslice_mask);
+		sseu->subslice_mask.hsw[s] = subslice_mask;
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			sseu_set_eus(sseu, s, ss,
@@ -574,18 +642,20 @@ void intel_sseu_info_init(struct intel_gt *gt)
 {
 	struct drm_i915_private *i915 = gt->i915;
 
-	if (IS_HASWELL(i915))
-		hsw_sseu_info_init(gt);
-	else if (IS_CHERRYVIEW(i915))
-		cherryview_sseu_info_init(gt);
-	else if (IS_BROADWELL(i915))
-		bdw_sseu_info_init(gt);
-	else if (GRAPHICS_VER(i915) == 9)
-		gen9_sseu_info_init(gt);
-	else if (GRAPHICS_VER(i915) == 11)
-		gen11_sseu_info_init(gt);
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+		xehp_sseu_info_init(gt);
 	else if (GRAPHICS_VER(i915) >= 12)
 		gen12_sseu_info_init(gt);
+	else if (GRAPHICS_VER(i915) >= 11)
+		gen11_sseu_info_init(gt);
+	else if (GRAPHICS_VER(i915) >= 9)
+		gen9_sseu_info_init(gt);
+	else if (IS_BROADWELL(i915))
+		bdw_sseu_info_init(gt);
+	else if (IS_CHERRYVIEW(i915))
+		cherryview_sseu_info_init(gt);
+	else if (IS_HASWELL(i915))
+		hsw_sseu_info_init(gt);
 }
 
 u32 intel_sseu_make_rpcs(struct intel_gt *gt,
@@ -641,7 +711,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
 	 */
 	if (GRAPHICS_VER(i915) == 11 &&
 	    slices == 1 &&
-	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
+	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask.hsw[0]) / 2)) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
@@ -707,14 +777,29 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 {
 	int s;
 
-	drm_printf(p, "slice total: %u, mask=%04x\n",
-		   hweight8(sseu->slice_mask), sseu->slice_mask);
-	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
-	for (s = 0; s < sseu->max_slices; s++) {
-		drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
-			   s, intel_sseu_subslices_per_slice(sseu, s),
-			   intel_sseu_get_subslices(sseu, s));
+	if (sseu->has_xehp_dss) {
+		drm_printf(p, "subslice total: %u\n",
+			   intel_sseu_subslice_total(sseu));
+		drm_printf(p, "geometry dss mask=%*pb\n",
+			   XEHP_BITMAP_BITS(sseu->geometry_subslice_mask),
+			   sseu->geometry_subslice_mask.xehp);
+		drm_printf(p, "compute dss mask=%*pb\n",
+			   XEHP_BITMAP_BITS(sseu->compute_subslice_mask),
+			   sseu->compute_subslice_mask.xehp);
+	} else {
+		drm_printf(p, "slice total: %u, mask=%04x\n",
+			   hweight8(sseu->slice_mask), sseu->slice_mask);
+		drm_printf(p, "subslice total: %u\n",
+			   intel_sseu_subslice_total(sseu));
+
+		for (s = 0; s < sseu->max_slices; s++) {
+			u8 ss_mask = sseu->subslice_mask.hsw[s];
+
+			drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
+				   s, hweight8(ss_mask), ss_mask);
+		}
 	}
+
 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
 	drm_printf(p, "has slice power gating: %s\n",
@@ -731,9 +816,10 @@ static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
 	int s, ss;
 
 	for (s = 0; s < sseu->max_slices; s++) {
+		u8 ss_mask = sseu->subslice_mask.hsw[s];
+
 		drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
-			   s, intel_sseu_subslices_per_slice(sseu, s),
-			   intel_sseu_get_subslices(sseu, s));
+			   s, hweight8(ss_mask), ss_mask);
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
@@ -747,16 +833,14 @@ static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
 static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
 				     struct drm_printer *p)
 {
-	u32 g_dss_mask = sseu_get_geometry_subslices(sseu);
-	u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu);
 	int dss;
 
 	for (dss = 0; dss < sseu->max_subslices; dss++) {
 		u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
 
 		drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
-			   str_yes_no(g_dss_mask & BIT(dss)),
-			   str_yes_no(c_dss_mask & BIT(dss)),
+			   str_yes_no(test_bit(dss, sseu->geometry_subslice_mask.xehp)),
+			   str_yes_no(test_bit(dss, sseu->compute_subslice_mask.xehp)),
 			   hweight16(enabled_eus), enabled_eus);
 	}
 }
@@ -774,20 +858,44 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
 	}
 }
 
-u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
+void intel_sseu_print_ss_info(const char *type,
+			      const struct sseu_dev_info *sseu,
+			      struct seq_file *m)
 {
-	u16 slice_mask = 0;
+	int s;
+
+	if (sseu->has_xehp_dss) {
+		seq_printf(m, "  %s Geometry DSS: %u\n", type,
+			   bitmap_weight(sseu->geometry_subslice_mask.xehp,
+					 XEHP_BITMAP_BITS(sseu->geometry_subslice_mask)));
+		seq_printf(m, "  %s Compute DSS: %u\n", type,
+			   bitmap_weight(sseu->compute_subslice_mask.xehp,
+					 XEHP_BITMAP_BITS(sseu->compute_subslice_mask)));
+	} else {
+		for (s = 0; s < fls(sseu->slice_mask); s++)
+			seq_printf(m, "  %s Slice%i subslices: %u\n", type,
+				   s, hweight8(sseu->subslice_mask.hsw[s]));
+	}
+}
+
+u16 intel_slicemask_from_xehp_dssmask(intel_sseu_ss_mask_t dss_mask,
+				      int dss_per_slice)
+{
+	intel_sseu_ss_mask_t per_slice_mask = {};
+	unsigned long slice_mask = 0;
 	int i;
 
-	WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask));
+	WARN_ON(DIV_ROUND_UP(XEHP_BITMAP_BITS(dss_mask), dss_per_slice) >
+		8 * sizeof(slice_mask));
 
-	for (i = 0; dss_mask; i++) {
-		if (dss_mask & GENMASK(dss_per_slice - 1, 0))
+	bitmap_fill(per_slice_mask.xehp, dss_per_slice);
+	for (i = 0; !bitmap_empty(dss_mask.xehp, XEHP_BITMAP_BITS(dss_mask)); i++) {
+		if (bitmap_intersects(dss_mask.xehp, per_slice_mask.xehp, dss_per_slice))
 			slice_mask |= BIT(i);
 
-		dss_mask >>= dss_per_slice;
+		bitmap_shift_right(dss_mask.xehp, dss_mask.xehp, dss_per_slice,
+				   XEHP_BITMAP_BITS(dss_mask));
 	}
 
 	return slice_mask;
 }
-
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 5c078df4729c..aa87d3832d60 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -25,12 +25,16 @@ struct drm_printer;
 /*
  * Maximum number of subslices that can exist within a HSW-style slice.  This
  * is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the
- * GEN_MAX_DSS value below).
+ * I915_MAX_SS_FUSE_BITS value below).
  */
 #define GEN_MAX_SS_PER_HSW_SLICE	6
 
-/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */
-#define GEN_MAX_DSS			32
+/*
+ * Maximum number of 32-bit registers used by hardware to express the
+ * enabled/disabled subslices.
+ */
+#define I915_MAX_SS_FUSE_REGS	2
+#define I915_MAX_SS_FUSE_BITS	(I915_MAX_SS_FUSE_REGS * 32)
 
 /* Maximum number of EUs that can exist within a subslice or DSS. */
 #define GEN_MAX_EUS_PER_SS		16
@@ -38,7 +42,7 @@ struct drm_printer;
 #define SSEU_MAX(a, b)			((a) > (b) ? (a) : (b))
 
 /* The maximum number of bits needed to express each subslice/DSS independently */
-#define GEN_SS_MASK_SIZE		SSEU_MAX(GEN_MAX_DSS, \
+#define GEN_SS_MASK_SIZE		SSEU_MAX(I915_MAX_SS_FUSE_BITS, \
 						 GEN_MAX_HSW_SLICES * GEN_MAX_SS_PER_HSW_SLICE)
 
 #define GEN_SSEU_STRIDE(max_entries)	DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
@@ -49,15 +53,28 @@ struct drm_printer;
 #define GEN_DSS_PER_CSLICE	8
 #define GEN_DSS_PER_MSLICE	8
 
-#define GEN_MAX_GSLICES		(GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
-#define GEN_MAX_CSLICES		(GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
+#define GEN_MAX_GSLICES		(I915_MAX_SS_FUSE_BITS / GEN_DSS_PER_GSLICE)
+#define GEN_MAX_CSLICES		(I915_MAX_SS_FUSE_BITS / GEN_DSS_PER_CSLICE)
+
+typedef union {
+	u8 hsw[GEN_MAX_HSW_SLICES];
+
+	/* Bitmap compatible with linux/bitmap.h; may exceed size of u64 */
+	unsigned long xehp[BITS_TO_LONGS(I915_MAX_SS_FUSE_BITS)];
+} intel_sseu_ss_mask_t;
+
+#define XEHP_BITMAP_BITS(mask)	((int)BITS_PER_TYPE(typeof(mask.xehp)))
 
 struct sseu_dev_info {
 	u8 slice_mask;
-	u8 subslice_mask[GEN_SS_MASK_SIZE];
-	u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
-	u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
-	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
+	intel_sseu_ss_mask_t subslice_mask;
+	intel_sseu_ss_mask_t geometry_subslice_mask;
+	intel_sseu_ss_mask_t compute_subslice_mask;
+	union {
+		u16 hsw[GEN_MAX_HSW_SLICES][GEN_MAX_SS_PER_HSW_SLICE];
+		u16 xehp[I915_MAX_SS_FUSE_BITS];
+	} eu_mask;
+
 	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
@@ -66,14 +83,16 @@ struct sseu_dev_info {
 	u8 has_slice_pg:1;
 	u8 has_subslice_pg:1;
 	u8 has_eu_pg:1;
+	/*
+	 * For Xe_HP and beyond, the hardware no longer has traditional slices
+	 * so we just report the entire DSS pool under a fake "slice 0."
+	 */
+	u8 has_xehp_dss:1;
 
 	/* Topology fields */
 	u8 max_slices;
 	u8 max_subslices;
 	u8 max_eus_per_subslice;
-
-	u8 ss_stride;
-	u8 eu_stride;
 };
 
 /*
@@ -91,7 +110,7 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
 {
 	struct intel_sseu value = {
 		.slice_mask = sseu->slice_mask,
-		.subslice_mask = sseu->subslice_mask[0],
+		.subslice_mask = sseu->subslice_mask.hsw[0],
 		.min_eus_per_subslice = sseu->max_eus_per_subslice,
 		.max_eus_per_subslice = sseu->max_eus_per_subslice,
 	};
@@ -103,18 +122,28 @@ static inline bool
 intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
 			int subslice)
 {
-	u8 mask;
-	int ss_idx = subslice / BITS_PER_BYTE;
-
 	if (slice >= sseu->max_slices ||
 	    subslice >= sseu->max_subslices)
 		return false;
 
-	GEM_BUG_ON(ss_idx >= sseu->ss_stride);
-
-	mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
+	if (sseu->has_xehp_dss)
+		return test_bit(subslice, sseu->subslice_mask.xehp);
+	else
+		return sseu->subslice_mask.hsw[slice] & BIT(subslice);
+}
 
-	return mask & BIT(subslice % BITS_PER_BYTE);
+/*
+ * Used to obtain the index of the first DSS.  Can start searching from the
+ * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
+ * groupsize and groupnum are non-zero.
+ */
+static inline unsigned int
+intel_sseu_find_first_xehp_dss(const struct sseu_dev_info *sseu, int groupsize,
+			       int groupnum)
+{
+	return find_next_bit(sseu->subslice_mask.xehp,
+			     XEHP_BITMAP_BITS(sseu->subslice_mask),
+			     groupnum * groupsize);
 }
 
 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
@@ -124,14 +153,10 @@ unsigned int
 intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
 
 unsigned int
-intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
+intel_sseu_get_hsw_subslices(const struct sseu_dev_info *sseu, u8 slice);
 
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
-
-u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
-
-void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
-			      u8 *subslice_mask, u32 ss_mask);
+intel_sseu_ss_mask_t
+intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
 
 void intel_sseu_info_init(struct intel_gt *gt);
 
@@ -143,6 +168,15 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
 			       const struct sseu_dev_info *sseu,
 			       struct drm_printer *p);
 
-u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
+u16 intel_slicemask_from_xehp_dssmask(intel_sseu_ss_mask_t dss_mask, int dss_per_slice);
+
+int intel_sseu_copy_eumask_to_user(void __user *to,
+				   const struct sseu_dev_info *sseu);
+int intel_sseu_copy_ssmask_to_user(void __user *to,
+				   const struct sseu_dev_info *sseu);
+
+void intel_sseu_print_ss_info(const char *type,
+			      const struct sseu_dev_info *sseu,
+			      struct seq_file *m);
 
 #endif /* __INTEL_SSEU_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
index 2d5d011e01db..c2ee5e1826b5 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
@@ -4,6 +4,7 @@
  * Copyright © 2020 Intel Corporation
  */
 
+#include <linux/bitmap.h>
 #include <linux/string_helpers.h>
 
 #include "i915_drv.h"
@@ -11,14 +12,6 @@
 #include "intel_gt_regs.h"
 #include "intel_sseu_debugfs.h"
 
-static void sseu_copy_subslices(const struct sseu_dev_info *sseu,
-				int slice, u8 *to_mask)
-{
-	int offset = slice * sseu->ss_stride;
-
-	memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride);
-}
-
 static void cherryview_sseu_device_status(struct intel_gt *gt,
 					  struct sseu_dev_info *sseu)
 {
@@ -41,7 +34,7 @@ static void cherryview_sseu_device_status(struct intel_gt *gt,
 			continue;
 
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask[0] |= BIT(ss);
+		sseu->subslice_mask.hsw[0] |= BIT(ss);
 		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -92,7 +85,7 @@ static void gen11_sseu_device_status(struct intel_gt *gt,
 			continue;
 
 		sseu->slice_mask |= BIT(s);
-		sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask);
+		sseu->subslice_mask.hsw[s] = info->sseu.subslice_mask.hsw[s];
 
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
@@ -147,21 +140,17 @@ static void gen9_sseu_device_status(struct intel_gt *gt,
 		sseu->slice_mask |= BIT(s);
 
 		if (IS_GEN9_BC(gt->i915))
-			sseu_copy_subslices(&info->sseu, s,
-					    sseu->subslice_mask);
+			sseu->subslice_mask.hsw[s] = info->sseu.subslice_mask.hsw[s];
 
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
-			u8 ss_idx = s * info->sseu.ss_stride +
-				    ss / BITS_PER_BYTE;
 
 			if (IS_GEN9_LP(gt->i915)) {
 				if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 					/* skip disabled subslice */
 					continue;
 
-				sseu->subslice_mask[ss_idx] |=
-					BIT(ss % BITS_PER_BYTE);
+				sseu->subslice_mask.hsw[s] |= BIT(ss);
 			}
 
 			eu_cnt = eu_reg[2 * s + ss / 2] & eu_mask[ss % 2];
@@ -188,8 +177,7 @@ static void bdw_sseu_device_status(struct intel_gt *gt,
 	if (sseu->slice_mask) {
 		sseu->eu_per_subslice = info->sseu.eu_per_subslice;
 		for (s = 0; s < fls(sseu->slice_mask); s++)
-			sseu_copy_subslices(&info->sseu, s,
-					    sseu->subslice_mask);
+			sseu->subslice_mask.hsw[s] = info->sseu.subslice_mask.hsw[s];
 		sseu->eu_total = sseu->eu_per_subslice *
 				 intel_sseu_subslice_total(sseu);
 
@@ -208,7 +196,6 @@ static void i915_print_sseu_info(struct seq_file *m,
 				 const struct sseu_dev_info *sseu)
 {
 	const char *type = is_available_info ? "Available" : "Enabled";
-	int s;
 
 	seq_printf(m, "  %s Slice Mask: %04x\n", type,
 		   sseu->slice_mask);
@@ -216,10 +203,7 @@ static void i915_print_sseu_info(struct seq_file *m,
 		   hweight8(sseu->slice_mask));
 	seq_printf(m, "  %s Subslice Total: %u\n", type,
 		   intel_sseu_subslice_total(sseu));
-	for (s = 0; s < fls(sseu->slice_mask); s++) {
-		seq_printf(m, "  %s Slice%i subslices: %u\n", type,
-			   s, intel_sseu_subslices_per_slice(sseu, s));
-	}
+	intel_sseu_print_ss_info(type, sseu, m);
 	seq_printf(m, "  %s EU Total: %u\n", type,
 		   sseu->eu_total);
 	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index a05c4b99b3fb..3213c593a55f 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -9,6 +9,7 @@
 #include "intel_engine_regs.h"
 #include "intel_gpu_commands.h"
 #include "intel_gt.h"
+#include "intel_gt_mcr.h"
 #include "intel_gt_regs.h"
 #include "intel_ring.h"
 #include "intel_workarounds.h"
@@ -776,7 +777,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
 	if (engine->class != RENDER_CLASS)
 		goto done;
 
-	if (IS_DG2(i915))
+	if (IS_PONTEVECCHIO(i915))
+		; /* noop; none at this time */
+	else if (IS_DG2(i915))
 		dg2_ctx_workarounds_init(engine, wal);
 	else if (IS_XEHPSDV(i915))
 		; /* noop; none at this time */
@@ -948,8 +951,8 @@ gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 	 * on s/ss combo, the read should be done with read_subslice_reg.
 	 */
 	slice = ffs(sseu->slice_mask) - 1;
-	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
-	subslice = ffs(intel_sseu_get_subslices(sseu, slice));
+	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
+	subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
 	GEM_BUG_ON(!subslice);
 	subslice--;
 
@@ -1080,18 +1083,17 @@ static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
 	gt->default_steering.instanceid = subslice;
 
 	if (drm_debug_enabled(DRM_UT_DRIVER))
-		intel_gt_report_steering(&p, gt, false);
+		intel_gt_mcr_report_steering(&p, gt, false);
 }
 
 static void
 icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
 {
 	const struct sseu_dev_info *sseu = &gt->info.sseu;
-	unsigned int slice, subslice;
+	unsigned int subslice;
 
 	GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
 	GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
-	slice = 0;
 
 	/*
 	 * Although a platform may have subslices, we need to always steer
@@ -1102,7 +1104,7 @@ icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
 	 * one of the higher subslices, we run the risk of reading back 0's or
 	 * random garbage.
 	 */
-	subslice = __ffs(intel_sseu_get_subslices(sseu, slice));
+	subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
 
 	/*
 	 * If the subslice we picked above also steers us to a valid L3 bank,
@@ -1112,7 +1114,7 @@ icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
 	if (gt->info.l3bank_mask & BIT(subslice))
 		gt->steering_table[L3BANK] = NULL;
 
-	__add_mcr_wa(gt, wal, slice, subslice);
+	__add_mcr_wa(gt, wal, 0, subslice);
 }
 
 static void
@@ -1120,7 +1122,6 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
 {
 	const struct sseu_dev_info *sseu = &gt->info.sseu;
 	unsigned long slice, subslice = 0, slice_mask = 0;
-	u64 dss_mask = 0;
 	u32 lncf_mask = 0;
 	int i;
 
@@ -1151,8 +1152,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
 	 */
 
 	/* Find the potential gslice candidates */
-	dss_mask = intel_sseu_get_subslices(sseu, 0);
-	slice_mask = intel_slicemask_from_dssmask(dss_mask, GEN_DSS_PER_GSLICE);
+	slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
+						       GEN_DSS_PER_GSLICE);
 
 	/*
 	 * Find the potential LNCF candidates.  Either LNCF within a valid
@@ -1177,9 +1178,8 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
 	}
 
 	slice = __ffs(slice_mask);
-	subslice = __ffs(dss_mask >> (slice * GEN_DSS_PER_GSLICE));
-	WARN_ON(subslice > GEN_DSS_PER_GSLICE);
-	WARN_ON(dss_mask >> (slice * GEN_DSS_PER_GSLICE) == 0);
+	subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
+		GEN_DSS_PER_GSLICE;
 
 	__add_mcr_wa(gt, wal, slice, subslice);
 
@@ -1197,6 +1197,20 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
 }
 
 static void
+pvc_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+	unsigned int dss;
+
+	/*
+	 * Setup implicit steering for COMPUTE and DSS ranges to the first
+	 * non-fused-off DSS.  All other types of MCR registers will be
+	 * explicitly steered.
+	 */
+	dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
+	__add_mcr_wa(gt, wal, dss / GEN_DSS_PER_CSLICE, dss % GEN_DSS_PER_CSLICE);
+}
+
+static void
 icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = gt->i915;
@@ -1487,6 +1501,18 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	 * performance guide section.
 	 */
 	wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
+
+	/* Wa_14015795083 */
+	wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+}
+
+static void
+pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+	pvc_init_mcr(gt, wal);
+
+	/* Wa_14015795083 */
+	wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
 }
 
 static void
@@ -1494,7 +1520,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = gt->i915;
 
-	if (IS_DG2(i915))
+	if (IS_PONTEVECCHIO(i915))
+		pvc_gt_workarounds_init(gt, wal);
+	else if (IS_DG2(i915))
 		dg2_gt_workarounds_init(gt, wal);
 	else if (IS_XEHPSDV(i915))
 		xehpsdv_gt_workarounds_init(gt, wal);
@@ -1596,13 +1624,13 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
 		u32 val, old = 0;
 
 		/* open-coded rmw due to steering */
-		old = wa->clr ? intel_gt_read_register_fw(gt, wa->reg) : 0;
+		old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0;
 		val = (old & ~wa->clr) | wa->set;
 		if (val != old || !wa->clr)
 			intel_uncore_write_fw(uncore, wa->reg, val);
 
 		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-			wa_verify(wa, intel_gt_read_register_fw(gt, wa->reg),
+			wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg),
 				  wal->name, "application");
 	}
 
@@ -1633,7 +1661,7 @@ static bool wa_list_verify(struct intel_gt *gt,
 
 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
 		ok &= wa_verify(wa,
-				intel_gt_read_register_fw(gt, wa->reg),
+				intel_gt_mcr_read_any_fw(gt, wa->reg),
 				wal->name, from);
 
 	intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1924,6 +1952,32 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)
 	}
 }
 
+static void blacklist_trtt(struct intel_engine_cs *engine)
+{
+	struct i915_wa_list *w = &engine->whitelist;
+
+	/*
+	 * Prevent read/write access to [0x4400, 0x4600) which covers
+	 * the TRTT range across all engines. Note that normally userspace
+	 * cannot access the other engines' trtt control, but for simplicity
+	 * we cover the entire range on each engine.
+	 */
+	whitelist_reg_ext(w, _MMIO(0x4400),
+			  RING_FORCE_TO_NONPRIV_DENY |
+			  RING_FORCE_TO_NONPRIV_RANGE_64);
+	whitelist_reg_ext(w, _MMIO(0x4500),
+			  RING_FORCE_TO_NONPRIV_DENY |
+			  RING_FORCE_TO_NONPRIV_RANGE_64);
+}
+
+static void pvc_whitelist_build(struct intel_engine_cs *engine)
+{
+	allow_read_ctx_timestamp(engine);
+
+	/* Wa_16014440446:pvc */
+	blacklist_trtt(engine);
+}
+
 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = engine->i915;
@@ -1931,7 +1985,9 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
 
 	wa_init_start(w, "whitelist", engine->name);
 
-	if (IS_DG2(i915))
+	if (IS_PONTEVECCHIO(i915))
+		pvc_whitelist_build(engine);
+	else if (IS_DG2(i915))
 		dg2_whitelist_build(engine);
 	else if (IS_XEHPSDV(i915))
 		xehpsdv_whitelist_build(engine);
@@ -1994,27 +2050,44 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
 static void
 engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
-	u8 mocs;
+	u8 mocs_w, mocs_r;
 
 	/*
-	 * RING_CMD_CCTL are need to be programed to un-cached
-	 * for memory writes and reads outputted by Command
-	 * Streamers on Gen12 onward platforms.
+	 * RING_CMD_CCTL specifies the default MOCS entry that will be used
+	 * by the command streamer when executing commands that don't have
+	 * a way to explicitly specify a MOCS setting.  The default should
+	 * usually reference whichever MOCS entry corresponds to uncached
+	 * behavior, although use of a WB cached entry is recommended by the
+	 * spec in certain circumstances on specific platforms.
 	 */
 	if (GRAPHICS_VER(engine->i915) >= 12) {
-		mocs = engine->gt->mocs.uc_index;
+		mocs_r = engine->gt->mocs.uc_index;
+		mocs_w = engine->gt->mocs.uc_index;
+
+		if (HAS_L3_CCS_READ(engine->i915) &&
+		    engine->class == COMPUTE_CLASS) {
+			mocs_r = engine->gt->mocs.wb_index;
+
+			/*
+			 * Even on the few platforms where MOCS 0 is a
+			 * legitimate table entry, it's never the correct
+			 * setting to use here; we can assume the MOCS init
+			 * just forgot to initialize wb_index.
+			 */
+			drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
+		}
+
 		wa_masked_field_set(wal,
 				    RING_CMD_CCTL(engine->mmio_base),
 				    CMD_CCTL_MOCS_MASK,
-				    CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
+				    CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
 	}
 }
 
 static bool needs_wa_1308578152(struct intel_engine_cs *engine)
 {
-	u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0);
-
-	return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0;
+	return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
+		GEN_DSS_PER_GSLICE;
 }
 
 static void
@@ -2023,9 +2096,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	struct drm_i915_private *i915 = engine->i915;
 
 	if (IS_DG2(i915)) {
-		/* Wa_14015227452:dg2 */
-		wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
-
 		/* Wa_1509235366:dg2 */
 		wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
 			    GLOBAL_INVALIDATION_MODE);
@@ -2036,12 +2106,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		 * performance guide section.
 		 */
 		wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
-
-		/* Wa_18018781329:dg2 */
-		wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
-		wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
-		wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
-		wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 	}
 
 	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
@@ -2160,6 +2224,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
 	}
 
+	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
+	    IS_DG2_G10(i915)) {
+		/* Wa_22014600077:dg2 */
+		wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+		       _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
+		       0 /* Wa_14012342262 :write-only reg, so skip
+			    verification */,
+		       true);
+	}
+
 	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
 	    IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
 		/*
@@ -2583,6 +2657,15 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	}
 }
 
+static void
+ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+	if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
+		/* Wa_14014999345:pvc */
+		wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
+	}
+}
+
 /*
  * The workarounds in this function apply to shared registers in
  * the general render reset domain that aren't tied to a
@@ -2597,6 +2680,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 {
 	struct drm_i915_private *i915 = engine->i915;
 
+	if (IS_PONTEVECCHIO(i915)) {
+		/*
+		 * The following is not actually a "workaround" but rather
+		 * a recommended tuning setting documented in the bspec's
+		 * performance guide section.
+		 */
+		wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
+	}
+
 	if (IS_XEHPSDV(i915)) {
 		/* Wa_1409954639 */
 		wa_masked_en(wal,
@@ -2629,9 +2721,21 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 				GLOBAL_INVALIDATION_MODE);
 	}
 
-	if (IS_DG2(i915)) {
-		/* Wa_22014226127:dg2 */
+	if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
+		/* Wa_14015227452:dg2,pvc */
+		wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+
+		/* Wa_22014226127:dg2,pvc */
 		wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+
+		/* Wa_16015675438:dg2,pvc */
+		wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
+
+		/* Wa_18018781329:dg2,pvc */
+		wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+		wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 	}
 }
 
@@ -2651,7 +2755,9 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
 	if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
 		general_render_compute_wa_init(engine, wal);
 
-	if (engine->class == RENDER_CLASS)
+	if (engine->class == COMPUTE_CLASS)
+		ccs_engine_wa_init(engine, wal);
+	else if (engine->class == RENDER_CLASS)
 		rcs_engine_wa_init(engine, wal);
 	else
 		xcs_engine_wa_init(engine, wal);
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 83ff4c2e57c5..6493265d5f64 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -976,6 +976,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 {
 	struct i915_gpu_error *global = &gt->i915->gpu_error;
 	struct intel_engine_cs *engine, *other;
+	struct active_engine *threads;
 	enum intel_engine_id id, tmp;
 	struct hang h;
 	int err = 0;
@@ -996,8 +997,11 @@ static int __igt_reset_engines(struct intel_gt *gt,
 			h.ctx->sched.priority = 1024;
 	}
 
+	threads = kmalloc_array(I915_NUM_ENGINES, sizeof(*threads), GFP_KERNEL);
+	if (!threads)
+		return -ENOMEM;
+
 	for_each_engine(engine, gt, id) {
-		struct active_engine threads[I915_NUM_ENGINES] = {};
 		unsigned long device = i915_reset_count(global);
 		unsigned long count = 0, reported;
 		bool using_guc = intel_engine_uses_guc(engine);
@@ -1016,7 +1020,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
 			break;
 		}
 
-		memset(threads, 0, sizeof(threads));
+		memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES);
 		for_each_engine(other, gt, tmp) {
 			struct task_struct *tsk;
 
@@ -1236,6 +1240,7 @@ unwind:
 			break;
 		}
 	}
+	kfree(threads);
 
 	if (intel_gt_is_wedged(gt))
 		err = -EIO;
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
index 62cb4254a77a..4c840a2639dc 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
@@ -122,6 +122,12 @@ enum slpc_param_id {
 	SLPC_MAX_PARAM = 32,
 };
 
+enum slpc_media_ratio_mode {
+	SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL = 0,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_ONE = 1,
+	SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
+};
+
 enum slpc_event_id {
 	SLPC_EVENT_RESET = 0,
 	SLPC_EVENT_SHUTDOWN = 1,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2c4ad4a65089..2706a8c65090 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -310,8 +310,8 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
 	if (IS_DG2(gt->i915))
 		flags |= GUC_WA_DUAL_QUEUE;
 
-	/* Wa_22011802037: graphics version 12 */
-	if (GRAPHICS_VER(gt->i915) == 12)
+	/* Wa_22011802037: graphics version 11/12 */
+	if (IS_GRAPHICS_VER(gt->i915, 11, 12))
 		flags |= GUC_WA_PRE_PARSER;
 
 	/* Wa_16011777198:dg2 */
@@ -327,6 +327,10 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
 	    IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
 		flags |= GUC_WA_CONTEXT_ISOLATION;
 
+	/* Wa_16015675438 */
+	if (!RCS_MASK(gt))
+		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+
 	return flags;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 966e69a8b1c1..d0d99f178f2d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -230,6 +230,14 @@ struct intel_guc {
 		 * @shift: Right shift value for the gpm timestamp
 		 */
 		u32 shift;
+
+		/**
+		 * @last_stat_jiffies: jiffies at last actual stats collection time
+		 * We use this timestamp to ensure we don't oversample the
+		 * stats because runtime power management events can trigger
+		 * stats collection at much higher rates than required.
+		 */
+		unsigned long last_stat_jiffies;
 	} timestamp;
 
 #ifdef CONFIG_DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 3eabf4cf8eec..ba7541f3ca61 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -7,6 +7,7 @@
 
 #include "gt/intel_engine_regs.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc.h"
 #include "gt/shmem_utils.h"
@@ -313,7 +314,7 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
 	 * tracking, it is easier to just program the default steering for all
 	 * regs that don't need a non-default one.
 	 */
-	intel_gt_get_valid_steering_for_reg(gt, reg, &group, &inst);
+	intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
 	entry.flags |= GUC_REGSET_STEERING(group, inst);
 
 	slot = __mmio_reg_add(regset, &entry);
@@ -457,7 +458,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
 {
 	info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], RCS_MASK(gt));
 	info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt));
-	info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1);
+	info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], BCS_MASK(gt));
 	info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt));
 	info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt));
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index c4e25966d3e9..97a32e610c30 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -420,72 +420,6 @@ guc_capture_get_device_reglist(struct intel_guc *guc)
 	return default_lists;
 }
 
-static const char *
-__stringify_owner(u32 owner)
-{
-	switch (owner) {
-	case GUC_CAPTURE_LIST_INDEX_PF:
-		return "PF";
-	case GUC_CAPTURE_LIST_INDEX_VF:
-		return "VF";
-	default:
-		return "unknown";
-	}
-
-	return "";
-}
-
-static const char *
-__stringify_type(u32 type)
-{
-	switch (type) {
-	case GUC_CAPTURE_LIST_TYPE_GLOBAL:
-		return "Global";
-	case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
-		return "Class";
-	case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
-		return "Instance";
-	default:
-		return "unknown";
-	}
-
-	return "";
-}
-
-static const char *
-__stringify_engclass(u32 class)
-{
-	switch (class) {
-	case GUC_RENDER_CLASS:
-		return "Render";
-	case GUC_VIDEO_CLASS:
-		return "Video";
-	case GUC_VIDEOENHANCE_CLASS:
-		return "VideoEnhance";
-	case GUC_BLITTER_CLASS:
-		return "Blitter";
-	case GUC_COMPUTE_CLASS:
-		return "Compute";
-	default:
-		return "unknown";
-	}
-
-	return "";
-}
-
-static void
-guc_capture_warn_with_list_info(struct drm_i915_private *i915, char *msg,
-				u32 owner, u32 type, u32 classid)
-{
-	if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
-		drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers.\n", msg,
-			__stringify_owner(owner), __stringify_type(type));
-	else
-		drm_dbg(&i915->drm, "GuC-capture: %s for %s %s-Registers on %s-Engine\n", msg,
-			__stringify_owner(owner), __stringify_type(type),
-			__stringify_engclass(classid));
-}
-
 static int
 guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
 		      struct guc_mmio_reg *ptr, u16 num_entries)
@@ -501,11 +435,8 @@ guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
 		return -ENODEV;
 
 	match = guc_capture_get_one_list(reglists, owner, type, classid);
-	if (!match) {
-		guc_capture_warn_with_list_info(i915, "Missing register list init", owner, type,
-						classid);
+	if (!match)
 		return -ENODATA;
-	}
 
 	for (i = 0; i < num_entries && i < match->num_regs; ++i) {
 		ptr[i].offset = match->list[i].reg.reg;
@@ -556,7 +487,6 @@ int
 intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
 			      size_t *size)
 {
-	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
 	struct intel_guc_state_capture *gc = guc->capture;
 	struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
 	int num_regs;
@@ -570,11 +500,8 @@ intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 cl
 	}
 
 	num_regs = guc_cap_list_num_regs(gc, owner, type, classid);
-	if (!num_regs) {
-		guc_capture_warn_with_list_info(i915, "Missing register list size",
-						owner, type, classid);
+	if (!num_regs)
 		return -ENODATA;
-	}
 
 	*size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
 			   (num_regs * sizeof(struct guc_mmio_reg)));
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 42cb7a9a6199..b3c9a9327f76 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -105,6 +105,7 @@
 #define   GUC_WA_PRE_PARSER		BIT(14)
 #define   GUC_WA_HOLD_CCS_SWITCHOUT	BIT(17)
 #define   GUC_WA_POLLCS			BIT(18)
+#define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST	BIT(21)
 
 #define GUC_CTL_FEATURE			2
 #define   GUC_CTL_ENABLE_SLPC		BIT(2)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
index 79c66b6b51a3..4781fccc2687 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_hwconfig.c
@@ -94,9 +94,9 @@ static int guc_hwconfig_fill_buffer(struct intel_guc *guc, struct intel_hwconfig
 
 static bool has_table(struct drm_i915_private *i915)
 {
-	if (IS_ALDERLAKE_P(i915))
+	if (IS_ALDERLAKE_P(i915) && !IS_ADLP_N(i915))
 		return true;
-	if (IS_DG2(i915))
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
 		return true;
 
 	return false;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
index e00661fb0853..8f8dd05835c5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
@@ -49,7 +49,6 @@ static int guc_action_control_gucrc(struct intel_guc *guc, bool enable)
 static int __guc_rc_control(struct intel_guc *guc, bool enable)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
-	struct drm_device *drm = &guc_to_gt(guc)->i915->drm;
 	int ret;
 
 	if (!intel_uc_uses_guc_rc(&gt->uc))
@@ -60,8 +59,8 @@ static int __guc_rc_control(struct intel_guc *guc, bool enable)
 
 	ret = guc_action_control_gucrc(guc, enable);
 	if (ret) {
-		drm_err(drm, "Failed to %s GuC RC (%pe)\n",
-			str_enable_disable(enable), ERR_PTR(ret));
+		i915_probe_error(guc_to_gt(guc)->i915, "Failed to %s GuC RC (%pe)\n",
+				 str_enable_disable(enable), ERR_PTR(ret));
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index ad570fa002a6..8dc063f087eb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -96,6 +96,7 @@
 
 #define GUC_SHIM_CONTROL2		_MMIO(0xc068)
 #define   GUC_IS_PRIVILEGED		(1<<29)
+#define   GSC_LOADS_HUC			(1<<30)
 
 #define GUC_SEND_INTERRUPT		_MMIO(0xc4c8)
 #define   GUC_SEND_TRIGGER		  (1<<0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 1db833da42df..ec9c4ca0f615 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc)
 	return data->header.global_state;
 }
 
+static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value)
+{
+	u32 request[] = {
+		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+		SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2),
+		id,
+		value,
+	};
+	int ret;
+
+	ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0);
+
+	return ret > 0 ? -EPROTO : ret;
+}
+
+static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value)
+{
+	struct intel_guc *guc = slpc_to_guc(slpc);
+
+	GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+	return guc_action_slpc_set_param_nb(guc, id, value);
+}
+
 static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
 {
 	u32 request[] = {
@@ -208,12 +232,14 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 	 */
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		ret = slpc_set_param(slpc,
-				     SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
-				     freq);
+		/* Non-blocking request will avoid stalls */
+		ret = slpc_set_param_nb(slpc,
+					SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+					freq);
 		if (ret)
-			i915_probe_error(i915, "Unable to force min freq to %u: %d",
-					 freq, ret);
+			drm_notice(&i915->drm,
+				   "Failed to send set_param for min freq(%d): (%d)\n",
+				   freq, ret);
 	}
 
 	return ret;
@@ -222,6 +248,7 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
 static void slpc_boost_work(struct work_struct *work)
 {
 	struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
+	int err;
 
 	/*
 	 * Raise min freq to boost. It's possible that
@@ -231,8 +258,9 @@ static void slpc_boost_work(struct work_struct *work)
 	 */
 	mutex_lock(&slpc->lock);
 	if (atomic_read(&slpc->num_waiters)) {
-		slpc_force_min_freq(slpc, slpc->boost_freq);
-		slpc->num_boosts++;
+		err = slpc_force_min_freq(slpc, slpc->boost_freq);
+		if (!err)
+			slpc->num_boosts++;
 	}
 	mutex_unlock(&slpc->lock);
 }
@@ -260,6 +288,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
 	slpc->boost_freq = 0;
 	atomic_set(&slpc->num_waiters, 0);
 	slpc->num_boosts = 0;
+	slpc->media_ratio_mode = SLPC_MEDIA_RATIO_MODE_DYNAMIC_CONTROL;
 
 	mutex_init(&slpc->lock);
 	INIT_WORK(&slpc->boost_work, slpc_boost_work);
@@ -506,6 +535,22 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
 	return ret;
 }
 
+int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
+{
+	struct drm_i915_private *i915 = slpc_to_i915(slpc);
+	intel_wakeref_t wakeref;
+	int ret = 0;
+
+	if (!HAS_MEDIA_RATIO_MODE(i915))
+		return -ENODEV;
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		ret = slpc_set_param(slpc,
+				     SLPC_PARAM_MEDIA_FF_RATIO_MODE,
+				     val);
+	return ret;
+}
+
 void intel_guc_pm_intrmsk_enable(struct intel_gt *gt)
 {
 	u32 pm_intrmsk_mbz = 0;
@@ -654,6 +699,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
 		return ret;
 	}
 
+	/* Set cached media freq ratio mode */
+	intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 0caa8fee3c04..82a98f78f96c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -38,6 +38,7 @@ int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val);
 int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val);
 int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val);
 int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p);
+int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val);
 void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
 void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
 void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
index bf5b9a563c09..73d208123528 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
@@ -29,6 +29,9 @@ struct intel_guc_slpc {
 	u32 min_freq_softlimit;
 	u32 max_freq_softlimit;
 
+	/* cached media ratio mode */
+	u32 media_ratio_mode;
+
 	/* Protects set/reset of boost freq
 	 * and value of num_waiters
 	 */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 1726f0f19901..40f726c61e95 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1314,6 +1314,8 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
 	unsigned long flags;
 	ktime_t unused;
 
+	guc->timestamp.last_stat_jiffies = jiffies;
+
 	spin_lock_irqsave(&guc->timestamp.lock, flags);
 
 	guc_update_pm_timestamp(guc, &unused);
@@ -1386,6 +1388,17 @@ void intel_guc_busyness_park(struct intel_gt *gt)
 		return;
 
 	cancel_delayed_work(&guc->timestamp.work);
+
+	/*
+	 * Before parking, we should sample engine busyness stats if we need to.
+	 * We can skip it if we are less than half a ping from the last time we
+	 * sampled the busyness stats.
+	 */
+	if (guc->timestamp.last_stat_jiffies &&
+	    !time_after(jiffies, guc->timestamp.last_stat_jiffies +
+			(guc->timestamp.ping_delay / 2)))
+		return;
+
 	__update_guc_busyness_stats(guc);
 }
 
@@ -1527,87 +1540,18 @@ static void guc_reset_state(struct intel_context *ce, u32 head, bool scrub)
 	lrc_update_regs(ce, engine, head);
 }
 
-static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
-{
-	static const i915_reg_t _reg[I915_NUM_ENGINES] = {
-		[RCS0] = MSG_IDLE_CS,
-		[BCS0] = MSG_IDLE_BCS,
-		[VCS0] = MSG_IDLE_VCS0,
-		[VCS1] = MSG_IDLE_VCS1,
-		[VCS2] = MSG_IDLE_VCS2,
-		[VCS3] = MSG_IDLE_VCS3,
-		[VCS4] = MSG_IDLE_VCS4,
-		[VCS5] = MSG_IDLE_VCS5,
-		[VCS6] = MSG_IDLE_VCS6,
-		[VCS7] = MSG_IDLE_VCS7,
-		[VECS0] = MSG_IDLE_VECS0,
-		[VECS1] = MSG_IDLE_VECS1,
-		[VECS2] = MSG_IDLE_VECS2,
-		[VECS3] = MSG_IDLE_VECS3,
-		[CCS0] = MSG_IDLE_CS,
-		[CCS1] = MSG_IDLE_CS,
-		[CCS2] = MSG_IDLE_CS,
-		[CCS3] = MSG_IDLE_CS,
-	};
-	u32 val;
-
-	if (!_reg[engine->id].reg)
-		return 0;
-
-	val = intel_uncore_read(engine->uncore, _reg[engine->id]);
-
-	/* bits[29:25] & bits[13:9] >> shift */
-	return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
-}
-
-static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
-{
-	int ret;
-
-	/* Ensure GPM receives fw up/down after CS is stopped */
-	udelay(1);
-
-	/* Wait for forcewake request to complete in GPM */
-	ret =  __intel_wait_for_register_fw(gt->uncore,
-					    GEN9_PWRGT_DOMAIN_STATUS,
-					    fw_mask, fw_mask, 5000, 0, NULL);
-
-	/* Ensure CS receives fw ack from GPM */
-	udelay(1);
-
-	if (ret)
-		GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
-}
-
-/*
- * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
- * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
- * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the
- * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
- * are concerned only with the gt reset here, we use a logical OR of pending
- * forcewakeups from all reset domains and then wait for them to complete by
- * querying PWRGT_DOMAIN_STATUS.
- */
 static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
 {
-	u32 fw_pending;
-
-	if (GRAPHICS_VER(engine->i915) != 12)
+	if (!IS_GRAPHICS_VER(engine->i915, 11, 12))
 		return;
 
-	/*
-	 * Wa_22011802037
-	 * TODO: Occasionally trying to stop the cs times out, but does not
-	 * adversely affect functionality. The timeout is set as a config
-	 * parameter that defaults to 100ms. Assuming that this timeout is
-	 * sufficient for any pending MI_FORCEWAKEs to complete, ignore the
-	 * timeout returned here until it is root caused.
-	 */
 	intel_engine_stop_cs(engine);
 
-	fw_pending = __cs_pending_mi_force_wakes(engine);
-	if (fw_pending)
-		__gpm_wait_for_fw_complete(engine->gt, fw_pending);
+	/*
+	 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
+	 * to wait for any pending mi force wakeups
+	 */
+	intel_engine_wait_for_pending_mi_fw(engine);
 }
 
 static void guc_reset_nop(struct intel_engine_cs *engine)
@@ -2394,6 +2338,26 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop)
 	return ret;
 }
 
+static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
+{
+	/*
+	 * this matches the mapping we do in map_i915_prio_to_guc_prio()
+	 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
+	 */
+	switch (prio) {
+	default:
+		MISSING_CASE(prio);
+		fallthrough;
+	case GUC_CLIENT_PRIORITY_KMD_NORMAL:
+		return GEN12_CTX_PRIORITY_NORMAL;
+	case GUC_CLIENT_PRIORITY_NORMAL:
+		return GEN12_CTX_PRIORITY_LOW;
+	case GUC_CLIENT_PRIORITY_HIGH:
+	case GUC_CLIENT_PRIORITY_KMD_HIGH:
+		return GEN12_CTX_PRIORITY_HIGH;
+	}
+}
+
 static void prepare_context_registration_info(struct intel_context *ce,
 					      struct guc_ctxt_registration_info *info)
 {
@@ -2420,6 +2384,8 @@ static void prepare_context_registration_info(struct intel_context *ce,
 	 */
 	info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
 	info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
+	if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
+		info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
 	info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
 
 	/*
@@ -2768,7 +2734,9 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
 	__guc_context_set_context_policies(guc, &policy, true);
 }
 
-static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
+static void
+guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
+		   unsigned int preempt_timeout_ms)
 {
 	struct intel_guc *guc = ce_to_guc(ce);
 	struct intel_runtime_pm *runtime_pm =
@@ -2807,7 +2775,8 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
 		 * gets kicked off the HW ASAP.
 		 */
 		with_intel_runtime_pm(runtime_pm, wakeref) {
-			__guc_context_set_preemption_timeout(guc, guc_id, 1);
+			__guc_context_set_preemption_timeout(guc, guc_id,
+							     preempt_timeout_ms);
 			__guc_context_sched_disable(guc, ce, guc_id);
 		}
 	} else {
@@ -2815,7 +2784,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
 			with_intel_runtime_pm(runtime_pm, wakeref)
 				__guc_context_set_preemption_timeout(guc,
 								     ce->guc_id.id,
-								     1);
+								     preempt_timeout_ms);
 		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 	}
 }
@@ -3168,7 +3137,7 @@ static const struct intel_context_ops guc_context_ops = {
 	.unpin = guc_context_unpin,
 	.post_unpin = guc_context_post_unpin,
 
-	.ban = guc_context_ban,
+	.revoke = guc_context_revoke,
 
 	.cancel_request = guc_context_cancel_request,
 
@@ -3417,7 +3386,7 @@ static const struct intel_context_ops virtual_guc_context_ops = {
 	.unpin = guc_virtual_context_unpin,
 	.post_unpin = guc_context_post_unpin,
 
-	.ban = guc_context_ban,
+	.revoke = guc_context_revoke,
 
 	.cancel_request = guc_context_cancel_request,
 
@@ -3506,7 +3475,7 @@ static const struct intel_context_ops virtual_parent_context_ops = {
 	.unpin = guc_parent_context_unpin,
 	.post_unpin = guc_context_post_unpin,
 
-	.ban = guc_context_ban,
+	.revoke = guc_context_revoke,
 
 	.cancel_request = guc_context_cancel_request,
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 556829de9c17..3bb8838e325a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 
 #include "gt/intel_gt.h"
+#include "intel_guc_reg.h"
 #include "intel_huc.h"
 #include "i915_drv.h"
 
@@ -17,11 +18,15 @@
  * capabilities by adding HuC specific commands to batch buffers.
  *
  * The kernel driver is only responsible for loading the HuC firmware and
- * triggering its security authentication, which is performed by the GuC. For
- * The GuC to correctly perform the authentication, the HuC binary must be
- * loaded before the GuC one. Loading the HuC is optional; however, not using
- * the HuC might negatively impact power usage and/or performance of media
- * workloads, depending on the use-cases.
+ * triggering its security authentication, which is performed by the GuC on
+ * older platforms and by the GSC on newer ones. For the GuC to correctly
+ * perform the authentication, the HuC binary must be loaded before the GuC one.
+ * Loading the HuC is optional; however, not using the HuC might negatively
+ * impact power usage and/or performance of media workloads, depending on the
+ * use-cases.
+ * HuC must be reloaded on events that cause the WOPCM to lose its contents
+ * (S3/S4, FLR); GuC-authenticated HuC must also be reloaded on GuC/GT reset,
+ * while GSC-managed HuC will survive that.
  *
  * See https://github.com/intel/media-driver for the latest details on HuC
  * functionality.
@@ -54,11 +59,51 @@ void intel_huc_init_early(struct intel_huc *huc)
 	}
 }
 
+#define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy")
+static int check_huc_loading_mode(struct intel_huc *huc)
+{
+	struct intel_gt *gt = huc_to_gt(huc);
+	bool fw_needs_gsc = intel_huc_is_loaded_by_gsc(huc);
+	bool hw_uses_gsc = false;
+
+	/*
+	 * The fuse for HuC load via GSC is only valid on platforms that have
+	 * GuC deprivilege.
+	 */
+	if (HAS_GUC_DEPRIVILEGE(gt->i915))
+		hw_uses_gsc = intel_uncore_read(gt->uncore, GUC_SHIM_CONTROL2) &
+			      GSC_LOADS_HUC;
+
+	if (fw_needs_gsc != hw_uses_gsc) {
+		drm_err(&gt->i915->drm,
+			"mismatch between HuC FW (%s) and HW (%s) load modes\n",
+			HUC_LOAD_MODE_STRING(fw_needs_gsc),
+			HUC_LOAD_MODE_STRING(hw_uses_gsc));
+		return -ENOEXEC;
+	}
+
+	/* make sure we can access the GSC via the mei driver if we need it */
+	if (!(IS_ENABLED(CONFIG_INTEL_MEI_PXP) && IS_ENABLED(CONFIG_INTEL_MEI_GSC)) &&
+	    fw_needs_gsc) {
+		drm_info(&gt->i915->drm,
+			 "Can't load HuC due to missing MEI modules\n");
+		return -EIO;
+	}
+
+	drm_dbg(&gt->i915->drm, "GSC loads huc=%s\n", str_yes_no(fw_needs_gsc));
+
+	return 0;
+}
+
 int intel_huc_init(struct intel_huc *huc)
 {
 	struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
 	int err;
 
+	err = check_huc_loading_mode(huc);
+	if (err)
+		goto out;
+
 	err = intel_uc_fw_init(&huc->fw);
 	if (err)
 		goto out;
@@ -68,7 +113,7 @@ int intel_huc_init(struct intel_huc *huc)
 	return 0;
 
 out:
-	i915_probe_error(i915, "failed with %d\n", err);
+	drm_info(&i915->drm, "HuC init failed with %d\n", err);
 	return err;
 }
 
@@ -96,17 +141,20 @@ int intel_huc_auth(struct intel_huc *huc)
 	struct intel_guc *guc = &gt->uc.guc;
 	int ret;
 
-	GEM_BUG_ON(intel_huc_is_authenticated(huc));
-
 	if (!intel_uc_fw_is_loaded(&huc->fw))
 		return -ENOEXEC;
 
+	/* GSC will do the auth */
+	if (intel_huc_is_loaded_by_gsc(huc))
+		return -ENODEV;
+
 	ret = i915_inject_probe_error(gt->i915, -ENXIO);
 	if (ret)
 		goto fail;
 
-	ret = intel_guc_auth_huc(guc,
-				 intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
+	GEM_BUG_ON(intel_uc_fw_is_running(&huc->fw));
+
+	ret = intel_guc_auth_huc(guc, intel_guc_ggtt_offset(guc, huc->fw.rsa_data));
 	if (ret) {
 		DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
 		goto fail;
@@ -133,6 +181,18 @@ fail:
 	return ret;
 }
 
+static bool huc_is_authenticated(struct intel_huc *huc)
+{
+	struct intel_gt *gt = huc_to_gt(huc);
+	intel_wakeref_t wakeref;
+	u32 status = 0;
+
+	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+		status = intel_uncore_read(gt->uncore, huc->status.reg);
+
+	return (status & huc->status.mask) == huc->status.value;
+}
+
 /**
  * intel_huc_check_status() - check HuC status
  * @huc: intel_huc structure
@@ -150,10 +210,6 @@ fail:
  */
 int intel_huc_check_status(struct intel_huc *huc)
 {
-	struct intel_gt *gt = huc_to_gt(huc);
-	intel_wakeref_t wakeref;
-	u32 status = 0;
-
 	switch (__intel_uc_fw_status(&huc->fw)) {
 	case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
 		return -ENODEV;
@@ -167,10 +223,17 @@ int intel_huc_check_status(struct intel_huc *huc)
 		break;
 	}
 
-	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
-		status = intel_uncore_read(gt->uncore, huc->status.reg);
+	return huc_is_authenticated(huc);
+}
 
-	return (status & huc->status.mask) == huc->status.value;
+void intel_huc_update_auth_status(struct intel_huc *huc)
+{
+	if (!intel_uc_fw_is_loadable(&huc->fw))
+		return;
+
+	if (huc_is_authenticated(huc))
+		intel_uc_fw_change_status(&huc->fw,
+					  INTEL_UC_FIRMWARE_RUNNING);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 73ec670800f2..d7e25b6e879e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -27,6 +27,7 @@ int intel_huc_init(struct intel_huc *huc);
 void intel_huc_fini(struct intel_huc *huc);
 int intel_huc_auth(struct intel_huc *huc);
 int intel_huc_check_status(struct intel_huc *huc);
+void intel_huc_update_auth_status(struct intel_huc *huc);
 
 static inline int intel_huc_sanitize(struct intel_huc *huc)
 {
@@ -50,9 +51,9 @@ static inline bool intel_huc_is_used(struct intel_huc *huc)
 	return intel_uc_fw_is_available(&huc->fw);
 }
 
-static inline bool intel_huc_is_authenticated(struct intel_huc *huc)
+static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
 {
-	return intel_uc_fw_is_running(&huc->fw);
+	return huc->fw.loaded_via_gsc;
 }
 
 void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index e5ef509c70e8..9d6ab1e01639 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -8,7 +8,7 @@
 #include "i915_drv.h"
 
 /**
- * intel_huc_fw_upload() - load HuC uCode to device
+ * intel_huc_fw_upload() - load HuC uCode to device via DMA transfer
  * @huc: intel_huc structure
  *
  * Called from intel_uc_init_hw() during driver load, resume from sleep and
@@ -21,6 +21,9 @@
  */
 int intel_huc_fw_upload(struct intel_huc *huc)
 {
+	if (intel_huc_is_loaded_by_gsc(huc))
+		return -ENODEV;
+
 	/* HW doesn't look at destination address for HuC, so set it to 0 */
 	return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index e8f099360e01..f2e7c82985ef 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -45,6 +45,10 @@ static void uc_expand_default_options(struct intel_uc *uc)
 
 	/* Default: enable HuC authentication and GuC submission */
 	i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION;
+
+	/* XEHPSDV and PVC do not use HuC */
+	if (IS_XEHPSDV(i915) || IS_PONTEVECCHIO(i915))
+		i915->params.enable_guc &= ~ENABLE_GUC_LOAD_HUC;
 }
 
 /* Reset GuC providing us with fresh state for both GuC and HuC.
@@ -323,17 +327,10 @@ static int __uc_init(struct intel_uc *uc)
 	if (ret)
 		return ret;
 
-	if (intel_uc_uses_huc(uc)) {
-		ret = intel_huc_init(huc);
-		if (ret)
-			goto out_guc;
-	}
+	if (intel_uc_uses_huc(uc))
+		intel_huc_init(huc);
 
 	return 0;
-
-out_guc:
-	intel_guc_fini(guc);
-	return ret;
 }
 
 static void __uc_fini(struct intel_uc *uc)
@@ -509,7 +506,16 @@ static int __uc_init_hw(struct intel_uc *uc)
 	if (ret)
 		goto err_log_capture;
 
-	intel_huc_auth(huc);
+	/*
+	 * GSC-loaded HuC is authenticated by the GSC, so we don't need to
+	 * trigger the auth here. However, given that the HuC loaded this way
+	 * survive GT reset, we still need to update our SW bookkeeping to make
+	 * sure it reflects the correct HW status.
+	 */
+	if (intel_huc_is_loaded_by_gsc(huc))
+		intel_huc_update_auth_status(huc);
+	else
+		intel_huc_auth(huc);
 
 	if (intel_uc_uses_guc_submission(uc))
 		intel_guc_submission_enable(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index d078f884b5e3..c06e83872c34 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -156,7 +156,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
 		[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
 		[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
 	};
-	static const struct uc_fw_platform_requirement *fw_blobs;
+	const struct uc_fw_platform_requirement *fw_blobs;
 	enum intel_platform p = INTEL_INFO(i915)->platform;
 	u32 fw_count;
 	u8 rev = INTEL_REVID(i915);
@@ -301,45 +301,31 @@ static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e)
 	}
 }
 
-/**
- * intel_uc_fw_fetch - fetch uC firmware
- * @uc_fw: uC firmware
- *
- * Fetch uC firmware into GEM obj.
- *
- * Return: 0 on success, a negative errno code on failure.
- */
-int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
+static int check_gsc_manifest(const struct firmware *fw,
+			      struct intel_uc_fw *uc_fw)
 {
-	struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
-	struct device *dev = i915->drm.dev;
-	struct drm_i915_gem_object *obj;
-	const struct firmware *fw = NULL;
-	struct uc_css_header *css;
-	size_t size;
-	int err;
+	u32 *dw = (u32 *)fw->data;
+	u32 version = dw[HUC_GSC_VERSION_DW];
 
-	GEM_BUG_ON(!i915->wopcm.size);
-	GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw));
-
-	err = i915_inject_probe_error(i915, -ENXIO);
-	if (err)
-		goto fail;
+	uc_fw->major_ver_found = FIELD_GET(HUC_GSC_MAJOR_VER_MASK, version);
+	uc_fw->minor_ver_found = FIELD_GET(HUC_GSC_MINOR_VER_MASK, version);
 
-	__force_fw_fetch_failures(uc_fw, -EINVAL);
-	__force_fw_fetch_failures(uc_fw, -ESTALE);
+	return 0;
+}
 
-	err = request_firmware(&fw, uc_fw->path, dev);
-	if (err)
-		goto fail;
+static int check_ccs_header(struct drm_i915_private *i915,
+			    const struct firmware *fw,
+			    struct intel_uc_fw *uc_fw)
+{
+	struct uc_css_header *css;
+	size_t size;
 
 	/* Check the size of the blob before examining buffer contents */
 	if (unlikely(fw->size < sizeof(struct uc_css_header))) {
 		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 fw->size, sizeof(struct uc_css_header));
-		err = -ENODATA;
-		goto fail;
+		return -ENODATA;
 	}
 
 	css = (struct uc_css_header *)fw->data;
@@ -352,8 +338,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 			 "%s firmware %s: unexpected header size: %zu != %zu\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 fw->size, sizeof(struct uc_css_header));
-		err = -EPROTO;
-		goto fail;
+		return -EPROTO;
 	}
 
 	/* uCode size must calculated from other sizes */
@@ -368,8 +353,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu < %zu\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 fw->size, size);
-		err = -ENOEXEC;
-		goto fail;
+		return -ENOEXEC;
 	}
 
 	/* Sanity check whether this fw is not larger than whole WOPCM memory */
@@ -378,8 +362,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 		drm_warn(&i915->drm, "%s firmware %s: invalid size: %zu > %zu\n",
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
 			 size, (size_t)i915->wopcm.size);
-		err = -E2BIG;
-		goto fail;
+		return -E2BIG;
 	}
 
 	/* Get version numbers from the CSS header */
@@ -388,6 +371,49 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 	uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
 					   css->sw_version);
 
+	if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
+		uc_fw->private_data_size = css->private_data_size;
+
+	return 0;
+}
+
+/**
+ * intel_uc_fw_fetch - fetch uC firmware
+ * @uc_fw: uC firmware
+ *
+ * Fetch uC firmware into GEM obj.
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
+{
+	struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
+	struct device *dev = i915->drm.dev;
+	struct drm_i915_gem_object *obj;
+	const struct firmware *fw = NULL;
+	int err;
+
+	GEM_BUG_ON(!i915->wopcm.size);
+	GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw));
+
+	err = i915_inject_probe_error(i915, -ENXIO);
+	if (err)
+		goto fail;
+
+	__force_fw_fetch_failures(uc_fw, -EINVAL);
+	__force_fw_fetch_failures(uc_fw, -ESTALE);
+
+	err = request_firmware(&fw, uc_fw->path, dev);
+	if (err)
+		goto fail;
+
+	if (uc_fw->loaded_via_gsc)
+		err = check_gsc_manifest(fw, uc_fw);
+	else
+		err = check_ccs_header(i915, fw, uc_fw);
+	if (err)
+		goto fail;
+
 	if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
 	    uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
 		drm_notice(&i915->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
@@ -400,9 +426,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
 		}
 	}
 
-	if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
-		uc_fw->private_data_size = css->private_data_size;
-
 	if (HAS_LMEM(i915)) {
 		obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
 		if (!IS_ERR(obj))
@@ -470,7 +493,10 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
 	if (i915_gem_object_is_lmem(obj))
 		pte_flags |= PTE_LM;
 
-	ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
+	if (ggtt->vm.raw_insert_entries)
+		ggtt->vm.raw_insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
+	else
+		ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags);
 }
 
 static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 3229018877d3..4f169035f504 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -102,6 +102,8 @@ struct intel_uc_fw {
 	u32 ucode_size;
 
 	u32 private_data_size;
+
+	bool loaded_via_gsc;
 };
 
 #ifdef CONFIG_DRM_I915_DEBUG_GUC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index e41ffc7a7fbc..b05e0e35b734 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -39,6 +39,11 @@
  * 3. Length info of each component can be found in header, in dwords.
  * 4. Modulus and exponent key are not required by driver. They may not appear
  *    in fw. So driver will load a truncated firmware in this case.
+ *
+ * Starting from DG2, the HuC is loaded by the GSC instead of i915. The GSC
+ * firmware performs all the required integrity checks, we just need to check
+ * the version. Note that the header for GSC-managed blobs is different from the
+ * CSS used for dma-loaded firmwares.
  */
 
 struct uc_css_header {
@@ -78,4 +83,8 @@ struct uc_css_header {
 } __packed;
 static_assert(sizeof(struct uc_css_header) == 128);
 
+#define HUC_GSC_VERSION_DW		44
+#define   HUC_GSC_MAJOR_VER_MASK	(0xFF << 0)
+#define   HUC_GSC_MINOR_VER_MASK	(0xFF << 16)
+
 #endif /* _INTEL_UC_FW_ABI_H */
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index b9eb75a2b400..0ba2a3455d99 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -428,7 +428,7 @@ struct cmd_info {
 #define R_VECS	BIT(VECS0)
 #define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS)
 	/* rings that support this cmd: BLT/RCS/VCS/VECS */
-	u16 rings;
+	intel_engine_mask_t rings;
 
 	/* devices that support this cmd: SNB/IVB/HSW/... */
 	u16 devices;
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index b47746152d97..0e224761d0ed 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -100,6 +100,9 @@
 #include "intel_region_ttm.h"
 #include "vlv_suspend.h"
 
+/* Intel Rapid Start Technology ACPI device name */
+static const char irst_name[] = "INT3392";
+
 static const struct drm_driver i915_drm_driver;
 
 static int i915_get_bridge_dev(struct drm_i915_private *dev_priv)
@@ -520,6 +523,22 @@ mask_err:
 	return ret;
 }
 
+static int i915_pcode_init(struct drm_i915_private *i915)
+{
+	struct intel_gt *gt;
+	int id, ret;
+
+	for_each_gt(gt, i915, id) {
+		ret = intel_pcode_init(gt->uncore);
+		if (ret) {
+			drm_err(&gt->i915->drm, "gt%d: intel_pcode_init failed %d\n", id, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * i915_driver_hw_probe - setup state requiring device access
  * @dev_priv: device private
@@ -629,7 +648,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
 	intel_opregion_setup(dev_priv);
 
-	ret = intel_pcode_init(&dev_priv->uncore);
+	ret = i915_pcode_init(dev_priv);
 	if (ret)
 		goto err_msi;
 
@@ -1251,7 +1270,7 @@ static int i915_drm_resume(struct drm_device *dev)
 
 	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
-	ret = intel_pcode_init(&dev_priv->uncore);
+	ret = i915_pcode_init(dev_priv);
 	if (ret)
 		return ret;
 
@@ -1425,6 +1444,8 @@ static int i915_pm_suspend(struct device *kdev)
 		return -ENODEV;
 	}
 
+	i915_ggtt_mark_pte_lost(i915, false);
+
 	if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
@@ -1477,6 +1498,14 @@ static int i915_pm_resume(struct device *kdev)
 	if (i915->drm.switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
+	/*
+	 * If IRST is enabled, or if we can't detect whether it's enabled,
+	 * then we must assume we lost the GGTT page table entries, since
+	 * they are not retained if IRST decided to enter S4.
+	 */
+	if (!IS_ENABLED(CONFIG_ACPI) || acpi_dev_present(irst_name, NULL, -1))
+		i915_ggtt_mark_pte_lost(i915, true);
+
 	return i915_drm_resume(&i915->drm);
 }
 
@@ -1536,6 +1565,9 @@ static int i915_pm_restore_early(struct device *kdev)
 
 static int i915_pm_restore(struct device *kdev)
 {
+	struct drm_i915_private *i915 = kdev_to_i915(kdev);
+
+	i915_ggtt_mark_pte_lost(i915, true);
 	return i915_pm_resume(kdev);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 18d38cb59923..b09d1d386574 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -116,8 +116,9 @@ show_client_class(struct seq_file *m,
 		total += busy_add(ctx, class);
 	rcu_read_unlock();
 
-	seq_printf(m, "drm-engine-%s:\t%llu ns\n",
-		   uabi_class_names[class], total);
+	if (capacity)
+		seq_printf(m, "drm-engine-%s:\t%llu ns\n",
+			   uabi_class_names[class], total);
 
 	if (capacity > 1)
 		seq_printf(m, "drm-engine-capacity-%s:\t%u\n",
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index f796c5e8e060..69496af996d9 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -11,7 +11,7 @@
 #include <linux/spinlock.h>
 #include <linux/xarray.h>
 
-#include "gt/intel_engine_types.h"
+#include <uapi/drm/i915_drm.h>
 
 #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4d57609d619a..c22f29c3faa0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -879,6 +879,7 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915)
 #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step)
 #define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step)
 #define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step)
+#define INTEL_BASEDIE_STEP(__i915) (RUNTIME_INFO(__i915)->step.basedie_step)
 
 #define IS_DISPLAY_STEP(__i915, since, until) \
 	(drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \
@@ -892,6 +893,10 @@ static inline struct intel_gt *to_gt(struct drm_i915_private *i915)
 	(drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \
 	 INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until))
 
+#define IS_BASEDIE_STEP(__i915, since, until) \
+	(drm_WARN_ON(&(__i915)->drm, INTEL_BASEDIE_STEP(__i915) == STEP_NONE), \
+	 INTEL_BASEDIE_STEP(__i915) >= (since) && INTEL_BASEDIE_STEP(__i915) < (until))
+
 static __always_inline unsigned int
 __platform_mask_index(const struct intel_runtime_info *info,
 		      enum intel_platform p)
@@ -1144,6 +1149,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 	(IS_DG2(__i915) && \
 	 IS_DISPLAY_STEP(__i915, since, until))
 
+#define IS_PVC_BD_STEP(__i915, since, until) \
+	(IS_PONTEVECCHIO(__i915) && \
+	 IS_BASEDIE_STEP(__i915, since, until))
+
+#define IS_PVC_CT_STEP(__i915, since, until) \
+	(IS_PONTEVECCHIO(__i915) && \
+	 IS_GRAPHICS_STEP(__i915, since, until))
+
 #define IS_LP(dev_priv)		(INTEL_INFO(dev_priv)->is_lp)
 #define IS_GEN9_LP(dev_priv)	(GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv))
 #define IS_GEN9_BC(dev_priv)	(GRAPHICS_VER(dev_priv) == 9 && !IS_LP(dev_priv))
@@ -1159,6 +1172,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 })
 #define RCS_MASK(gt) \
 	ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS)
+#define BCS_MASK(gt) \
+	ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS)
 #define VDBOX_MASK(gt) \
 	ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS)
 #define VEBOX_MASK(gt) \
@@ -1267,9 +1282,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
 #define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
 
-#define HAS_MSLICES(dev_priv) \
-	(INTEL_INFO(dev_priv)->has_mslices)
-
 /*
  * Set this flag, when platform requires 64K GTT page sizes or larger for
  * device local memory access.
@@ -1308,6 +1320,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10))
 
+#define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read)
+
 /* DPF == dynamic parity feature */
 #define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf)
 #define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \
@@ -1341,6 +1355,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_MBUS_JOINING(i915) (IS_ALDERLAKE_P(i915))
 
+#define HAS_3D_PIPELINE(i915)	(INTEL_INFO(i915)->has_3d_pipeline)
+
+#define HAS_ONE_EU_PER_FUSE_BIT(i915)	(INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
+
 /* i915_gem.c */
 void i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index c12a0adefda5..6fd15b39570c 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -148,14 +148,21 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = intel_engines_has_context_isolation(i915);
 		break;
 	case I915_PARAM_SLICE_MASK:
+		/* Not supported from Xe_HP onward; use topology queries */
+		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+			return -EINVAL;
+
 		value = sseu->slice_mask;
 		if (!value)
 			return -ENODEV;
 		break;
 	case I915_PARAM_SUBSLICE_MASK:
+		/* Not supported from Xe_HP onward; use topology queries */
+		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+			return -EINVAL;
+
 		/* Only copy bits from the first slice */
-		memcpy(&value, sseu->subslice_mask,
-		       min(sseu->ss_stride, (u8)sizeof(value)));
+		value = intel_sseu_get_hsw_subslices(sseu, 0);
 		if (!value)
 			return -ENODEV;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 0512c66fa4f3..f9b1969ed7ed 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -581,6 +581,15 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 		err_printf(m, "  RC PSMI: 0x%08x\n", ee->rc_psmi);
 		err_printf(m, "  FAULT_REG: 0x%08x\n", ee->fault_reg);
 	}
+	if (GRAPHICS_VER(m->i915) >= 11) {
+		err_printf(m, "  NOPID: 0x%08x\n", ee->nopid);
+		err_printf(m, "  EXCC: 0x%08x\n", ee->excc);
+		err_printf(m, "  CMD_CCTL: 0x%08x\n", ee->cmd_cctl);
+		err_printf(m, "  CSCMDOP: 0x%08x\n", ee->cscmdop);
+		err_printf(m, "  CTX_SR_CTL: 0x%08x\n", ee->ctx_sr_ctl);
+		err_printf(m, "  DMA_FADDR_HI: 0x%08x\n", ee->dma_faddr_hi);
+		err_printf(m, "  DMA_FADDR_LO: 0x%08x\n", ee->dma_faddr_lo);
+	}
 	if (HAS_PPGTT(m->i915)) {
 		err_printf(m, "  GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode);
 
@@ -1095,8 +1104,12 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 
 		for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {
 			mutex_lock(&ggtt->error_mutex);
-			ggtt->vm.insert_page(&ggtt->vm, dma, slot,
-					     I915_CACHE_NONE, 0);
+			if (ggtt->vm.raw_insert_page)
+				ggtt->vm.raw_insert_page(&ggtt->vm, dma, slot,
+							 I915_CACHE_NONE, 0);
+			else
+				ggtt->vm.insert_page(&ggtt->vm, dma, slot,
+						     I915_CACHE_NONE, 0);
 			mb();
 
 			s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
@@ -1224,6 +1237,16 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
 		ee->ipehr = ENGINE_READ(engine, IPEHR);
 	}
 
+	if (GRAPHICS_VER(i915) >= 11) {
+		ee->cmd_cctl = ENGINE_READ(engine, RING_CMD_CCTL);
+		ee->cscmdop = ENGINE_READ(engine, RING_CSCMDOP);
+		ee->ctx_sr_ctl = ENGINE_READ(engine, RING_CTX_SR_CTL);
+		ee->dma_faddr_hi = ENGINE_READ(engine, RING_DMA_FADD_UDW);
+		ee->dma_faddr_lo = ENGINE_READ(engine, RING_DMA_FADD);
+		ee->nopid = ENGINE_READ(engine, RING_NOPID);
+		ee->excc = ENGINE_READ(engine, RING_EXCC);
+	}
+
 	intel_engine_get_instdone(engine, &ee->instdone);
 
 	ee->instpm = ENGINE_READ(engine, RING_INSTPM);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index a611abacd9c2..55a143b92d10 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -84,6 +84,13 @@ struct intel_engine_coredump {
 	u32 fault_reg;
 	u64 faddr;
 	u32 rc_psmi; /* sleep state */
+	u32 nopid;
+	u32 excc;
+	u32 cmd_cctl;
+	u32 cscmdop;
+	u32 ctx_sr_ctl;
+	u32 dma_faddr_hi;
+	u32 dma_faddr_lo;
 	struct intel_instdone instdone;
 
 	/* GuC matched capture-lists info */
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 47b2a2631c5b..d6d875b2d379 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -171,6 +171,7 @@
 	.display.overlay_needs_physical = 1, \
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
+	.has_3d_pipeline = 1, \
 	.hws_needs_physical = 1, \
 	.unfenced_needs_alignment = 1, \
 	.platform_engine_mask = BIT(RCS0), \
@@ -190,6 +191,7 @@
 	.display.has_overlay = 1, \
 	.display.overlay_needs_physical = 1, \
 	.display.has_gmch = 1, \
+	.has_3d_pipeline = 1, \
 	.gpu_reset_clobbers_display = true, \
 	.hws_needs_physical = 1, \
 	.unfenced_needs_alignment = 1, \
@@ -232,6 +234,7 @@ static const struct intel_device_info i865g_info = {
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
 	.platform_engine_mask = BIT(RCS0), \
+	.has_3d_pipeline = 1, \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
 	.dma_mask_size = 32, \
@@ -323,6 +326,7 @@ static const struct intel_device_info pnv_m_info = {
 	.display.has_gmch = 1, \
 	.gpu_reset_clobbers_display = true, \
 	.platform_engine_mask = BIT(RCS0), \
+	.has_3d_pipeline = 1, \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
 	.dma_mask_size = 36, \
@@ -374,6 +378,7 @@ static const struct intel_device_info gm45_info = {
 	.display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B), \
 	.display.has_hotplug = 1, \
 	.platform_engine_mask = BIT(RCS0) | BIT(VCS0), \
+	.has_3d_pipeline = 1, \
 	.has_snoop = true, \
 	.has_coherent_ggtt = true, \
 	/* ilk does support rc6, but we do not implement [power] contexts */ \
@@ -405,6 +410,7 @@ static const struct intel_device_info ilk_m_info = {
 	.display.has_hotplug = 1, \
 	.display.fbc_mask = BIT(INTEL_FBC_A), \
 	.platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
+	.has_3d_pipeline = 1, \
 	.has_coherent_ggtt = true, \
 	.has_llc = 1, \
 	.has_rc6 = 1, \
@@ -456,6 +462,7 @@ static const struct intel_device_info snb_m_gt2_info = {
 	.display.has_hotplug = 1, \
 	.display.fbc_mask = BIT(INTEL_FBC_A), \
 	.platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \
+	.has_3d_pipeline = 1, \
 	.has_coherent_ggtt = true, \
 	.has_llc = 1, \
 	.has_rc6 = 1, \
@@ -692,6 +699,7 @@ static const struct intel_device_info skl_gt4_info = {
 	.display.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) | \
 		BIT(TRANSCODER_C) | BIT(TRANSCODER_EDP) | \
 		BIT(TRANSCODER_DSI_A) | BIT(TRANSCODER_DSI_C), \
+	.has_3d_pipeline = 1, \
 	.has_64bit_reloc = 1, \
 	.display.has_ddi = 1, \
 	.display.has_fpga_dbg = 1, \
@@ -1005,6 +1013,7 @@ static const struct intel_device_info adl_p_info = {
 	.graphics.rel = 50, \
 	XE_HP_PAGE_SIZES, \
 	.dma_mask_size = 46, \
+	.has_3d_pipeline = 1, \
 	.has_64bit_reloc = 1, \
 	.has_flat_ccs = 1, \
 	.has_global_mocs = 1, \
@@ -1012,7 +1021,7 @@ static const struct intel_device_info adl_p_info = {
 	.has_llc = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_logical_ring_elsq = 1, \
-	.has_mslices = 1, \
+	.has_mslice_steering = 1, \
 	.has_rc6 = 1, \
 	.has_reset_engine = 1, \
 	.has_rps = 1, \
@@ -1079,7 +1088,12 @@ static const struct intel_device_info ats_m_info = {
 
 #define XE_HPC_FEATURES \
 	XE_HP_FEATURES, \
-	.dma_mask_size = 52
+	.dma_mask_size = 52, \
+	.has_3d_pipeline = 0, \
+	.has_guc_deprivilege = 1, \
+	.has_l3_ccs_read = 1, \
+	.has_mslice_steering = 0, \
+	.has_one_eu_per_fuse_bit = 1
 
 __maybe_unused
 static const struct intel_device_info pvc_info = {
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 7584cec53d5d..0094f67c63f2 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -31,10 +31,12 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
 
 static int fill_topology_info(const struct sseu_dev_info *sseu,
 			      struct drm_i915_query_item *query_item,
-			      const u8 *subslice_mask)
+			      intel_sseu_ss_mask_t subslice_mask)
 {
 	struct drm_i915_query_topology_info topo;
 	u32 slice_length, subslice_length, eu_length, total_length;
+	int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
+	int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
 	int ret;
 
 	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
@@ -43,8 +45,8 @@ static int fill_topology_info(const struct sseu_dev_info *sseu,
 		return -ENODEV;
 
 	slice_length = sizeof(sseu->slice_mask);
-	subslice_length = sseu->max_slices * sseu->ss_stride;
-	eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
+	subslice_length = sseu->max_slices * ss_stride;
+	eu_length = sseu->max_slices * sseu->max_subslices * eu_stride;
 	total_length = sizeof(topo) + slice_length + subslice_length +
 		       eu_length;
 
@@ -59,9 +61,9 @@ static int fill_topology_info(const struct sseu_dev_info *sseu,
 	topo.max_eus_per_subslice = sseu->max_eus_per_subslice;
 
 	topo.subslice_offset = slice_length;
-	topo.subslice_stride = sseu->ss_stride;
+	topo.subslice_stride = ss_stride;
 	topo.eu_offset = slice_length + subslice_length;
-	topo.eu_stride = sseu->eu_stride;
+	topo.eu_stride = eu_stride;
 
 	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
 			 &topo, sizeof(topo)))
@@ -71,15 +73,15 @@ static int fill_topology_info(const struct sseu_dev_info *sseu,
 			 &sseu->slice_mask, slice_length))
 		return -EFAULT;
 
-	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-					 sizeof(topo) + slice_length),
-			 subslice_mask, subslice_length))
+	if (intel_sseu_copy_ssmask_to_user(u64_to_user_ptr(query_item->data_ptr +
+							   sizeof(topo) + slice_length),
+					   sseu))
 		return -EFAULT;
 
-	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-					 sizeof(topo) +
-					 slice_length + subslice_length),
-			 sseu->eu_mask, eu_length))
+	if (intel_sseu_copy_eumask_to_user(u64_to_user_ptr(query_item->data_ptr +
+							   sizeof(topo) +
+							   slice_length + subslice_length),
+					   sseu))
 		return -EFAULT;
 
 	return total_length;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 616164fa2e32..643d7f020a4a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -976,6 +976,14 @@
 #define GEN12_COMPUTE2_RING_BASE	0x1e000
 #define GEN12_COMPUTE3_RING_BASE	0x26000
 #define BLT_RING_BASE		0x22000
+#define XEHPC_BCS1_RING_BASE	0x3e0000
+#define XEHPC_BCS2_RING_BASE	0x3e2000
+#define XEHPC_BCS3_RING_BASE	0x3e4000
+#define XEHPC_BCS4_RING_BASE	0x3e6000
+#define XEHPC_BCS5_RING_BASE	0x3e8000
+#define XEHPC_BCS6_RING_BASE	0x3ea000
+#define XEHPC_BCS7_RING_BASE	0x3ec000
+#define XEHPC_BCS8_RING_BASE	0x3ee000
 #define DG1_GSC_HECI1_BASE	0x00258000
 #define DG1_GSC_HECI2_BASE	0x00259000
 #define DG2_GSC_HECI1_BASE	0x00373000
@@ -1846,6 +1854,7 @@
 #define BXT_RP_STATE_CAP        _MMIO(0x138170)
 #define GEN9_RP_STATE_LIMITS	_MMIO(0x138148)
 #define XEHPSDV_RP_STATE_CAP	_MMIO(0x250014)
+#define PVC_RP_STATE_CAP	_MMIO(0x281014)
 
 #define GT0_PERF_LIMIT_REASONS		_MMIO(0x1381a8)
 #define   GT0_PERF_LIMIT_REASONS_MASK	0xde3
@@ -6758,6 +6767,14 @@
 #define     DG1_UNCORE_GET_INIT_STATUS		0x0
 #define     DG1_UNCORE_INIT_STATUS_COMPLETE	0x1
 #define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US	0x23
+#define   XEHP_PCODE_FREQUENCY_CONFIG		0x6e	/* xehpsdv, pvc */
+/* XEHP_PCODE_FREQUENCY_CONFIG sub-commands (param1) */
+#define     PCODE_MBOX_FC_SC_READ_FUSED_P0	0x0
+#define     PCODE_MBOX_FC_SC_READ_FUSED_PN	0x1
+/* PCODE_MBOX_DOMAIN_* - mailbox domain IDs */
+/*   XEHP_PCODE_FREQUENCY_CONFIG param2 */
+#define     PCODE_MBOX_DOMAIN_NONE		0x0
+#define     PCODE_MBOX_DOMAIN_MEDIAFF		0x3
 #define GEN6_PCODE_DATA				_MMIO(0x138128)
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
 #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT	16
@@ -8328,23 +8345,6 @@ enum skl_power_gate {
 #define   SGGI_DIS			REG_BIT(15)
 #define   SGR_DIS			REG_BIT(13)
 
-#define XEHPSDV_TILE0_ADDR_RANGE	_MMIO(0x4900)
-#define   XEHPSDV_TILE_LMEM_RANGE_SHIFT  8
-
-#define XEHPSDV_FLAT_CCS_BASE_ADDR	_MMIO(0x4910)
-#define   XEHPSDV_CCS_BASE_SHIFT	8
-
-/* gamt regs */
-#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
-#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW  0x67F1427F /* max/min for LRA1/2 */
-#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV  0x5FF101FF /* max/min for LRA1/2 */
-#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*    "        " */
-#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*    "        " */
-
-#define MMCD_MISC_CTRL		_MMIO(0x4ddc) /* skl+ */
-#define  MMCD_PCLA		(1 << 31)
-#define  MMCD_HOTSPOT_EN	(1 << 27)
-
 #define _ICL_PHY_MISC_A		0x64C00
 #define _ICL_PHY_MISC_B		0x64C04
 #define _DG2_PHY_MISC_TC1	0x64C14 /* TC1="PHY E" but offset as if "PHY F" */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 73d5195146b0..62fad16a55e8 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -60,7 +60,7 @@ static struct kmem_cache *slab_execute_cbs;
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 {
-	return dev_name(to_request(fence)->engine->i915->drm.dev);
+	return dev_name(to_request(fence)->i915->drm.dev);
 }
 
 static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
@@ -134,17 +134,42 @@ static void i915_fence_release(struct dma_fence *fence)
 	i915_sw_fence_fini(&rq->semaphore);
 
 	/*
-	 * Keep one request on each engine for reserved use under mempressure,
+	 * Keep one request on each engine for reserved use under mempressure
 	 * do not use with virtual engines as this really is only needed for
 	 * kernel contexts.
+	 *
+	 * We do not hold a reference to the engine here and so have to be
+	 * very careful in what rq->engine we poke. The virtual engine is
+	 * referenced via the rq->context and we released that ref during
+	 * i915_request_retire(), ergo we must not dereference a virtual
+	 * engine here. Not that we would want to, as the only consumer of
+	 * the reserved engine->request_pool is the power management parking,
+	 * which must-not-fail, and that is only run on the physical engines.
+	 *
+	 * Since the request must have been executed to be have completed,
+	 * we know that it will have been processed by the HW and will
+	 * not be unsubmitted again, so rq->engine and rq->execution_mask
+	 * at this point is stable. rq->execution_mask will be a single
+	 * bit if the last and _only_ engine it could execution on was a
+	 * physical engine, if it's multiple bits then it started on and
+	 * could still be on a virtual engine. Thus if the mask is not a
+	 * power-of-two we assume that rq->engine may still be a virtual
+	 * engine and so a dangling invalid pointer that we cannot dereference
+	 *
+	 * For example, consider the flow of a bonded request through a virtual
+	 * engine. The request is created with a wide engine mask (all engines
+	 * that we might execute on). On processing the bond, the request mask
+	 * is reduced to one or more engines. If the request is subsequently
+	 * bound to a single engine, it will then be constrained to only
+	 * execute on that engine and never returned to the virtual engine
+	 * after timeslicing away, see __unwind_incomplete_requests(). Thus we
+	 * know that if the rq->execution_mask is a single bit, rq->engine
+	 * can be a physical engine with the exact corresponding mask.
 	 */
 	if (!intel_engine_is_virtual(rq->engine) &&
-	    !cmpxchg(&rq->engine->request_pool, NULL, rq)) {
-		intel_context_put(rq->context);
+	    is_power_of_2(rq->execution_mask) &&
+	    !cmpxchg(&rq->engine->request_pool, NULL, rq))
 		return;
-	}
-
-	intel_context_put(rq->context);
 
 	kmem_cache_free(slab_requests, rq);
 }
@@ -611,7 +636,7 @@ bool __i915_request_submit(struct i915_request *request)
 		goto active;
 	}
 
-	if (unlikely(intel_context_is_banned(request->context)))
+	if (unlikely(!intel_context_is_schedulable(request->context)))
 		i915_request_set_error_once(request, -EIO);
 
 	if (unlikely(fatal_error(request->fence.error)))
@@ -921,22 +946,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 		}
 	}
 
-	/*
-	 * Hold a reference to the intel_context over life of an i915_request.
-	 * Without this an i915_request can exist after the context has been
-	 * destroyed (e.g. request retired, context closed, but user space holds
-	 * a reference to the request from an out fence). In the case of GuC
-	 * submission + virtual engine, the engine that the request references
-	 * is also destroyed which can trigger bad pointer dref in fence ops
-	 * (e.g. i915_fence_get_driver_name). We could likely change these
-	 * functions to avoid touching the engine but let's just be safe and
-	 * hold the intel_context reference. In execlist mode the request always
-	 * eventually points to a physical engine so this isn't an issue.
-	 */
-	rq->context = intel_context_get(ce);
+	rq->context = ce;
 	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->execution_mask = ce->engine->mask;
+	rq->i915 = ce->engine->i915;
 
 	ret = intel_timeline_get_seqno(tl, rq, &seqno);
 	if (ret)
@@ -1008,7 +1022,6 @@ err_unwind:
 	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
 err_free:
-	intel_context_put(ce);
 	kmem_cache_free(slab_requests, rq);
 err_unreserve:
 	intel_context_unpin(ce);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 28b1f9db5487..47041ec68df8 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -196,6 +196,8 @@ struct i915_request {
 	struct dma_fence fence;
 	spinlock_t lock;
 
+	struct drm_i915_private *i915;
+
 	/**
 	 * Context and ring buffer related to this request
 	 * Contexts are refcounted, so when this request is associated with a
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 8521daba212a..1e2750210831 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -166,7 +166,14 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 	struct device *kdev = kobj_to_dev(kobj);
 	struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
 	struct i915_gpu_coredump *gpu;
-	ssize_t ret;
+	ssize_t ret = 0;
+
+	/*
+	 * FIXME: Concurrent clients triggering resets and reading + clearing
+	 * dumps can cause inconsistent sysfs reads when a user calls in with a
+	 * non-zero offset to complete a prior partial read but the
+	 * gpu_coredump has been cleared or replaced.
+	 */
 
 	gpu = i915_first_error_state(i915);
 	if (IS_ERR(gpu)) {
@@ -178,8 +185,10 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 		const char *str = "No error state collected\n";
 		size_t len = strlen(str);
 
-		ret = min_t(size_t, count, len - off);
-		memcpy(buf, str + off, ret);
+		if (off < len) {
+			ret = min_t(size_t, count, len - off);
+			memcpy(buf, str + off, ret);
+		}
 	}
 
 	return ret;
@@ -259,4 +268,6 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 
 	device_remove_bin_file(kdev,  &dpf_attrs_1);
 	device_remove_bin_file(kdev,  &dpf_attrs);
+
+	kobject_put(dev_priv->sysfs_gt);
 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4f6db539571a..5d5828b9a242 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/sched/mm.h>
+#include <linux/dma-fence-array.h>
 #include <drm/drm_gem.h>
 
 #include "display/intel_frontbuffer.h"
@@ -550,13 +551,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_GPU_ONLY))
 		return IOMEM_ERR_PTR(-EINVAL);
 
-	if (!i915_gem_object_is_lmem(vma->obj)) {
-		if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
-			err = -ENODEV;
-			goto err;
-		}
-	}
-
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 	GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
 	GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
@@ -569,20 +563,33 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 		 * of pages, that way we can also drop the
 		 * I915_BO_ALLOC_CONTIGUOUS when allocating the object.
 		 */
-		if (i915_gem_object_is_lmem(vma->obj))
+		if (i915_gem_object_is_lmem(vma->obj)) {
 			ptr = i915_gem_object_lmem_io_map(vma->obj, 0,
 							  vma->obj->base.size);
-		else
+		} else if (i915_vma_is_map_and_fenceable(vma)) {
 			ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
 						vma->node.start,
 						vma->node.size);
+		} else {
+			ptr = (void __iomem *)
+				i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
+			if (IS_ERR(ptr)) {
+				err = PTR_ERR(ptr);
+				goto err;
+			}
+			ptr = page_pack_bits(ptr, 1);
+		}
+
 		if (ptr == NULL) {
 			err = -ENOMEM;
 			goto err;
 		}
 
 		if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) {
-			io_mapping_unmap(ptr);
+			if (page_unmask_bits(ptr))
+				__i915_gem_object_release_map(vma->obj);
+			else
+				io_mapping_unmap(ptr);
 			ptr = vma->iomap;
 		}
 	}
@@ -596,7 +603,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	i915_vma_set_ggtt_write(vma);
 
 	/* NB Access through the GTT requires the device to be awake. */
-	return ptr;
+	return page_mask_bits(ptr);
 
 err_unpin:
 	__i915_vma_unpin(vma);
@@ -614,6 +621,8 @@ void i915_vma_unpin_iomap(struct i915_vma *vma)
 {
 	GEM_BUG_ON(vma->iomap == NULL);
 
+	/* XXX We keep the mapping until __i915_vma_unbind()/evict() */
+
 	i915_vma_flush_writes(vma);
 
 	i915_vma_unpin_fence(vma);
@@ -1762,7 +1771,10 @@ static void __i915_vma_iounmap(struct i915_vma *vma)
 	if (vma->iomap == NULL)
 		return;
 
-	io_mapping_unmap(vma->iomap);
+	if (page_unmask_bits(vma->iomap))
+		__i915_gem_object_release_map(vma->obj);
+	else
+		io_mapping_unmap(vma->iomap);
 	vma->iomap = NULL;
 }
 
@@ -1823,6 +1835,21 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 	if (unlikely(err))
 		return err;
 
+	/*
+	 * Reserve fences slot early to prevent an allocation after preparing
+	 * the workload and associating fences with dma_resv.
+	 */
+	if (fence && !(flags & __EXEC_OBJECT_NO_RESERVE)) {
+		struct dma_fence *curr;
+		int idx;
+
+		dma_fence_array_for_each(curr, idx, fence)
+			;
+		err = dma_resv_reserve_fences(vma->obj->base.resv, idx);
+		if (unlikely(err))
+			return err;
+	}
+
 	if (flags & EXEC_OBJECT_WRITE) {
 		struct intel_frontbuffer *front;
 
@@ -1832,31 +1859,23 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 				i915_active_add_request(&front->write, rq);
 			intel_frontbuffer_put(front);
 		}
+	}
 
-		if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
-			err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
-			if (unlikely(err))
-				return err;
-		}
+	if (fence) {
+		struct dma_fence *curr;
+		enum dma_resv_usage usage;
+		int idx;
 
-		if (fence) {
-			dma_resv_add_fence(vma->obj->base.resv, fence,
-					   DMA_RESV_USAGE_WRITE);
+		obj->read_domains = 0;
+		if (flags & EXEC_OBJECT_WRITE) {
+			usage = DMA_RESV_USAGE_WRITE;
 			obj->write_domain = I915_GEM_DOMAIN_RENDER;
-			obj->read_domains = 0;
-		}
-	} else {
-		if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
-			err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
-			if (unlikely(err))
-				return err;
+		} else {
+			usage = DMA_RESV_USAGE_READ;
 		}
 
-		if (fence) {
-			dma_resv_add_fence(vma->obj->base.resv, fence,
-					   DMA_RESV_USAGE_READ);
-			obj->write_domain = 0;
-		}
+		dma_fence_array_for_each(curr, idx, fence)
+			dma_resv_add_fence(vma->obj->base.resv, curr, usage);
 	}
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
@@ -1899,9 +1918,11 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 		/* release the fence reg _after_ flushing */
 		i915_vma_revoke_fence(vma);
 
-		__i915_vma_iounmap(vma);
 		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma));
 	}
+
+	__i915_vma_iounmap(vma);
+
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
 
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index f414144eadf8..08341174ee0a 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -143,6 +143,7 @@ enum intel_ppgtt_type {
 	func(needs_compact_pt); \
 	func(gpu_reset_clobbers_display); \
 	func(has_reset_engine); \
+	func(has_3d_pipeline); \
 	func(has_4tile); \
 	func(has_flat_ccs); \
 	func(has_global_mocs); \
@@ -150,12 +151,14 @@ enum intel_ppgtt_type {
 	func(has_heci_pxp); \
 	func(has_heci_gscfi); \
 	func(has_guc_deprivilege); \
+	func(has_l3_ccs_read); \
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_media_ratio_mode); \
-	func(has_mslices); \
+	func(has_mslice_steering); \
+	func(has_one_eu_per_fuse_bit); \
 	func(has_pooled_eu); \
 	func(has_pxp); \
 	func(has_rc6); \
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 3355486a0b20..9b7e93ca1ff9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7634,10 +7634,9 @@ static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv)
 
 static void dg2_init_clock_gating(struct drm_i915_private *i915)
 {
-	/* Wa_22010954014:dg2_g10 */
-	if (IS_DG2_G10(i915))
-		intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
-				 SGSI_SIDECLK_DIS);
+	/* Wa_22010954014:dg2 */
+	intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
+			 SGSI_SIDECLK_DIS);
 
 	/*
 	 * Wa_14010733611:dg2_g10
@@ -7648,6 +7647,17 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915)
 				 SGR_DIS | SGGI_DIS);
 }
 
+static void pvc_init_clock_gating(struct drm_i915_private *dev_priv)
+{
+	/* Wa_14012385139:pvc */
+	if (IS_PVC_BD_STEP(dev_priv, STEP_A0, STEP_B0))
+		intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGR_DIS);
+
+	/* Wa_22010954014:pvc */
+	if (IS_PVC_BD_STEP(dev_priv, STEP_A0, STEP_B0))
+		intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
+}
+
 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
 {
 	if (!HAS_PCH_CNP(dev_priv))
@@ -8064,6 +8074,7 @@ static const struct drm_i915_clock_gating_funcs platform##_clock_gating_funcs =
 	.init_clock_gating = platform##_init_clock_gating,		\
 }
 
+CG_FUNCS(pvc);
 CG_FUNCS(dg2);
 CG_FUNCS(xehpsdv);
 CG_FUNCS(adlp);
@@ -8102,7 +8113,9 @@ CG_FUNCS(nop);
  */
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
 {
-	if (IS_DG2(dev_priv))
+	if (IS_PONTEVECCHIO(dev_priv))
+		dev_priv->clock_gating_funcs = &pvc_clock_gating_funcs;
+	else if (IS_DG2(dev_priv))
 		dev_priv->clock_gating_funcs = &dg2_clock_gating_funcs;
 	else if (IS_XEHPSDV(dev_priv))
 		dev_priv->clock_gating_funcs = &xehpsdv_clock_gating_funcs;
diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c
index 74e8e4680028..42b3133d8387 100644
--- a/drivers/gpu/drm/i915/intel_step.c
+++ b/drivers/gpu/drm/i915/intel_step.c
@@ -135,6 +135,8 @@ static const struct intel_step_info adlp_n_revids[] = {
 	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_D0 },
 };
 
+static void pvc_step_init(struct drm_i915_private *i915, int pci_revid);
+
 void intel_step_init(struct drm_i915_private *i915)
 {
 	const struct intel_step_info *revids = NULL;
@@ -142,7 +144,10 @@ void intel_step_init(struct drm_i915_private *i915)
 	int revid = INTEL_REVID(i915);
 	struct intel_step_info step = {};
 
-	if (IS_DG2_G10(i915)) {
+	if (IS_PONTEVECCHIO(i915)) {
+		pvc_step_init(i915, revid);
+		return;
+	} else if (IS_DG2_G10(i915)) {
 		revids = dg2_g10_revid_step_tbl;
 		size = ARRAY_SIZE(dg2_g10_revid_step_tbl);
 	} else if (IS_DG2_G11(i915)) {
@@ -235,6 +240,69 @@ void intel_step_init(struct drm_i915_private *i915)
 	RUNTIME_INFO(i915)->step = step;
 }
 
+#define PVC_BD_REVID	GENMASK(5, 3)
+#define PVC_CT_REVID	GENMASK(2, 0)
+
+static const int pvc_bd_subids[] = {
+	[0x0] = STEP_A0,
+	[0x3] = STEP_B0,
+	[0x4] = STEP_B1,
+	[0x5] = STEP_B3,
+};
+
+static const int pvc_ct_subids[] = {
+	[0x3] = STEP_A0,
+	[0x5] = STEP_B0,
+	[0x6] = STEP_B1,
+	[0x7] = STEP_C0,
+};
+
+static int
+pvc_step_lookup(struct drm_i915_private *i915, const char *type,
+		const int *table, int size, int subid)
+{
+	if (subid < size && table[subid] != STEP_NONE)
+		return table[subid];
+
+	drm_warn(&i915->drm, "Unknown %s id 0x%02x\n", type, subid);
+
+	/*
+	 * As on other platforms, try to use the next higher ID if we land on a
+	 * gap in the table.
+	 */
+	while (subid < size && table[subid] == STEP_NONE)
+		subid++;
+
+	if (subid < size) {
+		drm_dbg(&i915->drm, "Using steppings for %s id 0x%02x\n",
+			type, subid);
+		return table[subid];
+	}
+
+	drm_dbg(&i915->drm, "Using future steppings\n");
+	return STEP_FUTURE;
+}
+
+/*
+ * PVC needs special handling since we don't lookup the
+ * revid in a table, but rather specific bitfields within
+ * the revid for various components.
+ */
+static void pvc_step_init(struct drm_i915_private *i915, int pci_revid)
+{
+	int ct_subid, bd_subid;
+
+	bd_subid = FIELD_GET(PVC_BD_REVID, pci_revid);
+	ct_subid = FIELD_GET(PVC_CT_REVID, pci_revid);
+
+	RUNTIME_INFO(i915)->step.basedie_step =
+		pvc_step_lookup(i915, "Base Die", pvc_bd_subids,
+				ARRAY_SIZE(pvc_bd_subids), bd_subid);
+	RUNTIME_INFO(i915)->step.graphics_step =
+		pvc_step_lookup(i915, "Compute Tile", pvc_ct_subids,
+				ARRAY_SIZE(pvc_ct_subids), ct_subid);
+}
+
 #define STEP_NAME_CASE(name)	\
 	case STEP_##name:	\
 		return #name;
diff --git a/drivers/gpu/drm/i915/intel_step.h b/drivers/gpu/drm/i915/intel_step.h
index d71a99bd5179..a6b12bfa9744 100644
--- a/drivers/gpu/drm/i915/intel_step.h
+++ b/drivers/gpu/drm/i915/intel_step.h
@@ -11,9 +11,10 @@
 struct drm_i915_private;
 
 struct intel_step_info {
-	u8 graphics_step;
+	u8 graphics_step;	/* Represents the compute tile on Xe_HPC */
 	u8 display_step;
 	u8 media_step;
+	u8 basedie_step;
 };
 
 #define STEP_ENUM_VAL(name)  STEP_##name,
@@ -25,6 +26,7 @@ struct intel_step_info {
 	func(B0)			\
 	func(B1)			\
 	func(B2)			\
+	func(B3)			\
 	func(C0)			\
 	func(C1)			\
 	func(D0)			\
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 83517a703eb6..a852c471d1b3 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -938,36 +938,32 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset)
 	return entry->domains;
 }
 
-#define GEN_FW_RANGE(s, e, d) \
-	{ .start = (s), .end = (e), .domains = (d) }
-
-/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
-static const struct intel_forcewake_range __vlv_fw_ranges[] = {
-	GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0x5000, 0x7fff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0xb000, 0x11fff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0x12000, 0x13fff, FORCEWAKE_MEDIA),
-	GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_MEDIA),
-	GEN_FW_RANGE(0x2e000, 0x2ffff, FORCEWAKE_RENDER),
-	GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA),
-};
-
-#define __fwtable_reg_read_fw_domains(uncore, offset) \
-({ \
-	enum forcewake_domains __fwd = 0; \
-	if (NEEDS_FORCE_WAKE((offset))) \
-		__fwd = find_fw_domain(uncore, offset); \
-	__fwd; \
-})
+/*
+ * Shadowed register tables describe special register ranges that i915 is
+ * allowed to write to without acquiring forcewake.  If these registers' power
+ * wells are down, the hardware will save values written by i915 to a shadow
+ * copy and automatically transfer them into the real register the next time
+ * the power well is woken up.  Shadowing only applies to writes; forcewake
+ * must still be acquired when reading from registers in these ranges.
+ *
+ * The documentation for shadowed registers is somewhat spotty on older
+ * platforms.  However missing registers from these lists is non-fatal; it just
+ * means we'll wake up the hardware for some register accesses where we didn't
+ * really need to.
+ *
+ * The ranges listed in these tables must be sorted by offset.
+ *
+ * When adding new tables here, please also add them to
+ * intel_shadow_table_check() in selftests/intel_uncore.c so that they will be
+ * scanned for obvious mistakes or typos by the selftests.
+ */
 
-/* *Must* be sorted by offset! See intel_shadow_table_check(). */
 static const struct i915_range gen8_shadowed_regs[] = {
 	{ .start =  0x2030, .end =  0x2030 },
 	{ .start =  0xA008, .end =  0xA00C },
 	{ .start = 0x12030, .end = 0x12030 },
 	{ .start = 0x1a030, .end = 0x1a030 },
 	{ .start = 0x22030, .end = 0x22030 },
-	/* TODO: Other registers are not yet used */
 };
 
 static const struct i915_range gen11_shadowed_regs[] = {
@@ -1080,6 +1076,45 @@ static const struct i915_range dg2_shadowed_regs[] = {
 	{ .start = 0x1F8510, .end = 0x1F8550 },
 };
 
+static const struct i915_range pvc_shadowed_regs[] = {
+	{ .start =   0x2030, .end =   0x2030 },
+	{ .start =   0x2510, .end =   0x2550 },
+	{ .start =   0xA008, .end =   0xA00C },
+	{ .start =   0xA188, .end =   0xA188 },
+	{ .start =   0xA278, .end =   0xA278 },
+	{ .start =   0xA540, .end =   0xA56C },
+	{ .start =   0xC4C8, .end =   0xC4C8 },
+	{ .start =   0xC4E0, .end =   0xC4E0 },
+	{ .start =   0xC600, .end =   0xC600 },
+	{ .start =   0xC658, .end =   0xC658 },
+	{ .start =  0x22030, .end =  0x22030 },
+	{ .start =  0x22510, .end =  0x22550 },
+	{ .start = 0x1C0030, .end = 0x1C0030 },
+	{ .start = 0x1C0510, .end = 0x1C0550 },
+	{ .start = 0x1C4030, .end = 0x1C4030 },
+	{ .start = 0x1C4510, .end = 0x1C4550 },
+	{ .start = 0x1C8030, .end = 0x1C8030 },
+	{ .start = 0x1C8510, .end = 0x1C8550 },
+	{ .start = 0x1D0030, .end = 0x1D0030 },
+	{ .start = 0x1D0510, .end = 0x1D0550 },
+	{ .start = 0x1D4030, .end = 0x1D4030 },
+	{ .start = 0x1D4510, .end = 0x1D4550 },
+	{ .start = 0x1D8030, .end = 0x1D8030 },
+	{ .start = 0x1D8510, .end = 0x1D8550 },
+	{ .start = 0x1E0030, .end = 0x1E0030 },
+	{ .start = 0x1E0510, .end = 0x1E0550 },
+	{ .start = 0x1E4030, .end = 0x1E4030 },
+	{ .start = 0x1E4510, .end = 0x1E4550 },
+	{ .start = 0x1E8030, .end = 0x1E8030 },
+	{ .start = 0x1E8510, .end = 0x1E8550 },
+	{ .start = 0x1F0030, .end = 0x1F0030 },
+	{ .start = 0x1F0510, .end = 0x1F0550 },
+	{ .start = 0x1F4030, .end = 0x1F4030 },
+	{ .start = 0x1F4510, .end = 0x1F4550 },
+	{ .start = 0x1F8030, .end = 0x1F8030 },
+	{ .start = 0x1F8510, .end = 0x1F8550 },
+};
+
 static int mmio_range_cmp(u32 key, const struct i915_range *range)
 {
 	if (key < range->start)
@@ -1107,11 +1142,70 @@ gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg)
 	return FORCEWAKE_RENDER;
 }
 
+#define __fwtable_reg_read_fw_domains(uncore, offset) \
+({ \
+	enum forcewake_domains __fwd = 0; \
+	if (NEEDS_FORCE_WAKE((offset))) \
+		__fwd = find_fw_domain(uncore, offset); \
+	__fwd; \
+})
+
+#define __fwtable_reg_write_fw_domains(uncore, offset) \
+({ \
+	enum forcewake_domains __fwd = 0; \
+	const u32 __offset = (offset); \
+	if (NEEDS_FORCE_WAKE((__offset)) && !is_shadowed(uncore, __offset)) \
+		__fwd = find_fw_domain(uncore, __offset); \
+	__fwd; \
+})
+
+#define GEN_FW_RANGE(s, e, d) \
+	{ .start = (s), .end = (e), .domains = (d) }
+
+/*
+ * All platforms' forcewake tables below must be sorted by offset ranges.
+ * Furthermore, new forcewake tables added should be "watertight" and have
+ * no gaps between ranges.
+ *
+ * When there are multiple consecutive ranges listed in the bspec with
+ * the same forcewake domain, it is customary to combine them into a single
+ * row in the tables below to keep the tables small and lookups fast.
+ * Likewise, reserved/unused ranges may be combined with the preceding and/or
+ * following ranges since the driver will never be making MMIO accesses in
+ * those ranges.
+ *
+ * For example, if the bspec were to list:
+ *
+ *    ...
+ *    0x1000 - 0x1fff:  GT
+ *    0x2000 - 0x2cff:  GT
+ *    0x2d00 - 0x2fff:  unused/reserved
+ *    0x3000 - 0xffff:  GT
+ *    ...
+ *
+ * these could all be represented by a single line in the code:
+ *
+ *   GEN_FW_RANGE(0x1000, 0xffff, FORCEWAKE_GT)
+ *
+ * When adding new forcewake tables here, please also add them to
+ * intel_uncore_mock_selftests in selftests/intel_uncore.c so that they will be
+ * scanned for obvious mistakes or typos by the selftests.
+ */
+
 static const struct intel_forcewake_range __gen6_fw_ranges[] = {
 	GEN_FW_RANGE(0x0, 0x3ffff, FORCEWAKE_RENDER),
 };
 
-/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
+static const struct intel_forcewake_range __vlv_fw_ranges[] = {
+	GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x5000, 0x7fff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0xb000, 0x11fff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x12000, 0x13fff, FORCEWAKE_MEDIA),
+	GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_MEDIA),
+	GEN_FW_RANGE(0x2e000, 0x2ffff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA),
+};
+
 static const struct intel_forcewake_range __chv_fw_ranges[] = {
 	GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER),
 	GEN_FW_RANGE(0x4000, 0x4fff, FORCEWAKE_RENDER | FORCEWAKE_MEDIA),
@@ -1131,16 +1225,6 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = {
 	GEN_FW_RANGE(0x30000, 0x37fff, FORCEWAKE_MEDIA),
 };
 
-#define __fwtable_reg_write_fw_domains(uncore, offset) \
-({ \
-	enum forcewake_domains __fwd = 0; \
-	const u32 __offset = (offset); \
-	if (NEEDS_FORCE_WAKE((__offset)) && !is_shadowed(uncore, __offset)) \
-		__fwd = find_fw_domain(uncore, __offset); \
-	__fwd; \
-})
-
-/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
 static const struct intel_forcewake_range __gen9_fw_ranges[] = {
 	GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT),
 	GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */
@@ -1176,7 +1260,6 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = {
 	GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_MEDIA),
 };
 
-/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
 static const struct intel_forcewake_range __gen11_fw_ranges[] = {
 	GEN_FW_RANGE(0x0, 0x1fff, 0), /* uncore range */
 	GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER),
@@ -1215,14 +1298,6 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = {
 	GEN_FW_RANGE(0x1d4000, 0x1dbfff, 0)
 };
 
-/*
- * *Must* be sorted by offset ranges! See intel_fw_table_check().
- *
- * Note that the spec lists several reserved/unused ranges that don't
- * actually contain any registers.  In the table below we'll combine those
- * reserved ranges with either the preceding or following range to keep the
- * table small and lookups fast.
- */
 static const struct intel_forcewake_range __gen12_fw_ranges[] = {
 	GEN_FW_RANGE(0x0, 0x1fff, 0), /*
 		0x0   -  0xaff: reserved
@@ -1327,8 +1402,6 @@ static const struct intel_forcewake_range __gen12_fw_ranges[] = {
 /*
  * Graphics IP version 12.55 brings a slight change to the 0xd800 range,
  * switching it from the GT domain to the render domain.
- *
- * *Must* be sorted by offset ranges! See intel_fw_table_check().
  */
 #define XEHP_FWRANGES(FW_RANGE_D800)					\
 	GEN_FW_RANGE(0x0, 0x1fff, 0), /*					\
@@ -1490,6 +1563,103 @@ static const struct intel_forcewake_range __dg2_fw_ranges[] = {
 	XEHP_FWRANGES(FORCEWAKE_RENDER)
 };
 
+static const struct intel_forcewake_range __pvc_fw_ranges[] = {
+	GEN_FW_RANGE(0x0, 0xaff, 0),
+	GEN_FW_RANGE(0xb00, 0xbff, FORCEWAKE_GT),
+	GEN_FW_RANGE(0xc00, 0xfff, 0),
+	GEN_FW_RANGE(0x1000, 0x1fff, FORCEWAKE_GT),
+	GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_GT),
+	GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x4000, 0x813f, FORCEWAKE_GT), /*
+		0x4000 - 0x4aff: gt
+		0x4b00 - 0x4fff: reserved
+		0x5000 - 0x51ff: gt
+		0x5200 - 0x52ff: reserved
+		0x5300 - 0x53ff: gt
+		0x5400 - 0x7fff: reserved
+		0x8000 - 0x813f: gt */
+	GEN_FW_RANGE(0x8140, 0x817f, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x8180, 0x81ff, 0),
+	GEN_FW_RANGE(0x8200, 0x94cf, FORCEWAKE_GT), /*
+		0x8200 - 0x82ff: gt
+		0x8300 - 0x84ff: reserved
+		0x8500 - 0x887f: gt
+		0x8880 - 0x8a7f: reserved
+		0x8a80 - 0x8aff: gt
+		0x8b00 - 0x8fff: reserved
+		0x9000 - 0x947f: gt
+		0x9480 - 0x94cf: reserved */
+	GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0x9560, 0x967f, 0), /*
+		0x9560 - 0x95ff: always on
+		0x9600 - 0x967f: reserved */
+	GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /*
+		0x9680 - 0x96ff: render
+		0x9700 - 0x97ff: reserved */
+	GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /*
+		0x9800 - 0xb4ff: gt
+		0xb500 - 0xbfff: reserved
+		0xc000 - 0xcfff: gt */
+	GEN_FW_RANGE(0xd000, 0xd3ff, 0),
+	GEN_FW_RANGE(0xd400, 0xdbff, FORCEWAKE_GT),
+	GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER),
+	GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /*
+		0xdd00 - 0xddff: gt
+		0xde00 - 0xde7f: reserved */
+	GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /*
+		0xde80 - 0xdeff: render
+		0xdf00 - 0xe1ff: reserved
+		0xe200 - 0xe7ff: render
+		0xe800 - 0xe8ff: reserved */
+	GEN_FW_RANGE(0xe900, 0x11fff, FORCEWAKE_GT), /*
+		 0xe900 -  0xe9ff: gt
+		 0xea00 -  0xebff: reserved
+		 0xec00 -  0xffff: gt
+		0x10000 - 0x11fff: reserved */
+	GEN_FW_RANGE(0x12000, 0x12fff, 0), /*
+		0x12000 - 0x127ff: always on
+		0x12800 - 0x12fff: reserved */
+	GEN_FW_RANGE(0x13000, 0x23fff, FORCEWAKE_GT), /*
+		0x13000 - 0x135ff: gt
+		0x13600 - 0x147ff: reserved
+		0x14800 - 0x153ff: gt
+		0x15400 - 0x19fff: reserved
+		0x1a000 - 0x1ffff: gt
+		0x20000 - 0x21fff: reserved
+		0x22000 - 0x23fff: gt */
+	GEN_FW_RANGE(0x24000, 0x2417f, 0), /*
+		24000 - 0x2407f: always on
+		24080 - 0x2417f: reserved */
+	GEN_FW_RANGE(0x24180, 0x3ffff, FORCEWAKE_GT), /*
+		0x24180 - 0x241ff: gt
+		0x24200 - 0x251ff: reserved
+		0x25200 - 0x252ff: gt
+		0x25300 - 0x25fff: reserved
+		0x26000 - 0x27fff: gt
+		0x28000 - 0x2ffff: reserved
+		0x30000 - 0x3ffff: gt */
+	GEN_FW_RANGE(0x40000, 0x1bffff, 0),
+	GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /*
+		0x1c0000 - 0x1c2bff: VD0
+		0x1c2c00 - 0x1c2cff: reserved
+		0x1c2d00 - 0x1c2dff: VD0
+		0x1c2e00 - 0x1c3eff: reserved
+		0x1c3f00 - 0x1c3fff: VD0 */
+	GEN_FW_RANGE(0x1c4000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX1), /*
+		0x1c4000 - 0x1c6aff: VD1
+		0x1c6b00 - 0x1c7eff: reserved
+		0x1c7f00 - 0x1c7fff: VD1
+		0x1c8000 - 0x1cffff: reserved */
+	GEN_FW_RANGE(0x1d0000, 0x23ffff, FORCEWAKE_MEDIA_VDBOX2), /*
+		0x1d0000 - 0x1d2aff: VD2
+		0x1d2b00 - 0x1d3eff: reserved
+		0x1d3f00 - 0x1d3fff: VD2
+		0x1d4000 - 0x23ffff: reserved */
+	GEN_FW_RANGE(0x240000, 0x3dffff, 0),
+	GEN_FW_RANGE(0x3e0000, 0x3effff, FORCEWAKE_GT),
+};
+
 static void
 ilk_dummy_write(struct intel_uncore *uncore)
 {
@@ -2125,7 +2295,11 @@ static int uncore_forcewake_init(struct intel_uncore *uncore)
 
 	ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable);
 
-	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60)) {
+		ASSIGN_FW_DOMAINS_TABLE(uncore, __pvc_fw_ranges);
+		ASSIGN_SHADOW_TABLE(uncore, pvc_shadowed_regs);
+		ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
+	} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
 		ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges);
 		ASSIGN_SHADOW_TABLE(uncore, dg2_shadowed_regs);
 		ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
@@ -2470,118 +2644,6 @@ intel_uncore_forcewake_for_reg(struct intel_uncore *uncore,
 	return fw_domains;
 }
 
-/**
- * uncore_rw_with_mcr_steering_fw - Access a register after programming
- *				    the MCR selector register.
- * @uncore: pointer to struct intel_uncore
- * @reg: register being accessed
- * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access
- * @slice: slice number (ignored for multi-cast write)
- * @subslice: sub-slice number (ignored for multi-cast write)
- * @value: register value to be written (ignored for read)
- *
- * Return: 0 for write access. register value for read access.
- *
- * Caller needs to make sure the relevant forcewake wells are up.
- */
-static u32 uncore_rw_with_mcr_steering_fw(struct intel_uncore *uncore,
-					  i915_reg_t reg, u8 rw_flag,
-					  int slice, int subslice, u32 value)
-{
-	u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
-
-	lockdep_assert_held(&uncore->lock);
-
-	if (GRAPHICS_VER(uncore->i915) >= 11) {
-		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
-		mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
-
-		/*
-		 * Wa_22013088509
-		 *
-		 * The setting of the multicast/unicast bit usually wouldn't
-		 * matter for read operations (which always return the value
-		 * from a single register instance regardless of how that bit
-		 * is set), but some platforms have a workaround requiring us
-		 * to remain in multicast mode for reads.  There's no real
-		 * downside to this, so we'll just go ahead and do so on all
-		 * platforms; we'll only clear the multicast bit from the mask
-		 * when exlicitly doing a write operation.
-		 */
-		if (rw_flag == FW_REG_WRITE)
-			mcr_mask |= GEN11_MCR_MULTICAST;
-	} else {
-		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
-		mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
-	}
-
-	old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
-
-	mcr &= ~mcr_mask;
-	mcr |= mcr_ss;
-	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
-
-	if (rw_flag == FW_REG_READ)
-		val = intel_uncore_read_fw(uncore, reg);
-	else
-		intel_uncore_write_fw(uncore, reg, value);
-
-	mcr &= ~mcr_mask;
-	mcr |= old_mcr & mcr_mask;
-
-	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
-
-	return val;
-}
-
-static u32 uncore_rw_with_mcr_steering(struct intel_uncore *uncore,
-				       i915_reg_t reg, u8 rw_flag,
-				       int slice, int subslice,
-				       u32 value)
-{
-	enum forcewake_domains fw_domains;
-	u32 val;
-
-	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
-						    rw_flag);
-	fw_domains |= intel_uncore_forcewake_for_reg(uncore,
-						     GEN8_MCR_SELECTOR,
-						     FW_REG_READ | FW_REG_WRITE);
-
-	spin_lock_irq(&uncore->lock);
-	intel_uncore_forcewake_get__locked(uncore, fw_domains);
-
-	val = uncore_rw_with_mcr_steering_fw(uncore, reg, rw_flag,
-					     slice, subslice, value);
-
-	intel_uncore_forcewake_put__locked(uncore, fw_domains);
-	spin_unlock_irq(&uncore->lock);
-
-	return val;
-}
-
-u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
-					   i915_reg_t reg, int slice, int subslice)
-{
-	return uncore_rw_with_mcr_steering_fw(uncore, reg, FW_REG_READ,
-					      slice, subslice, 0);
-}
-
-u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
-					i915_reg_t reg, int slice, int subslice)
-{
-	return uncore_rw_with_mcr_steering(uncore, reg, FW_REG_READ,
-					   slice, subslice, 0);
-}
-
-void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore,
-					  i915_reg_t reg, u32 value,
-					  int slice, int subslice)
-{
-	uncore_rw_with_mcr_steering(uncore, reg, FW_REG_WRITE,
-				    slice, subslice, value);
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_uncore.c"
 #include "selftests/intel_uncore.c"
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 52fe3d89dd2b..b1fa912a65e7 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -210,14 +210,6 @@ intel_uncore_has_fifo(const struct intel_uncore *uncore)
 	return uncore->flags & UNCORE_HAS_FIFO;
 }
 
-u32 intel_uncore_read_with_mcr_steering_fw(struct intel_uncore *uncore,
-					   i915_reg_t reg,
-					   int slice, int subslice);
-u32 intel_uncore_read_with_mcr_steering(struct intel_uncore *uncore,
-					i915_reg_t reg,	int slice, int subslice);
-void intel_uncore_write_with_mcr_steering(struct intel_uncore *uncore,
-					  i915_reg_t reg, u32 value,
-					  int slice, int subslice);
 void
 intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug);
 void intel_uncore_init_early(struct intel_uncore *uncore,
diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c
index cdd196783535..fda9bb79c049 100644
--- a/drivers/gpu/drm/i915/selftests/intel_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c
@@ -69,6 +69,7 @@ static int intel_shadow_table_check(void)
 		{ gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) },
 		{ gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) },
 		{ dg2_shadowed_regs, ARRAY_SIZE(dg2_shadowed_regs) },
+		{ pvc_shadowed_regs, ARRAY_SIZE(pvc_shadowed_regs) },
 	};
 	const struct i915_range *range;
 	unsigned int i, j;
@@ -115,6 +116,7 @@ int intel_uncore_mock_selftests(void)
 		{ __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true },
 		{ __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true },
 		{ __xehp_fw_ranges, ARRAY_SIZE(__xehp_fw_ranges), true },
+		{ __pvc_fw_ranges, ARRAY_SIZE(__pvc_fw_ranges), true },
 	};
 	int err, i;
 
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 67530bfef129..cb0d5b7200c7 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -10,24 +10,24 @@ struct agp_bridge_data;
 struct pci_dev;
 struct sg_table;
 
-void intel_gtt_get(u64 *gtt_total,
-		   phys_addr_t *mappable_base,
-		   resource_size_t *mappable_end);
+void intel_gmch_gtt_get(u64 *gtt_total,
+			phys_addr_t *mappable_base,
+			resource_size_t *mappable_end);
 
 int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 		     struct agp_bridge_data *bridge);
 void intel_gmch_remove(void);
 
-bool intel_enable_gtt(void);
+bool intel_gmch_enable_gtt(void);
 
-void intel_gtt_chipset_flush(void);
-void intel_gtt_insert_page(dma_addr_t addr,
-			   unsigned int pg,
-			   unsigned int flags);
-void intel_gtt_insert_sg_entries(struct sg_table *st,
-				 unsigned int pg_start,
-				 unsigned int flags);
-void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
+void intel_gmch_gtt_flush(void);
+void intel_gmch_gtt_insert_page(dma_addr_t addr,
+				unsigned int pg,
+				unsigned int flags);
+void intel_gmch_gtt_insert_sg_entries(struct sg_table *st,
+				      unsigned int pg_start,
+				      unsigned int flags);
+void intel_gmch_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
 
 /* Special gtt memory types */
 #define AGP_DCACHE_MEMORY	1
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a2def7b27009..de49b68b4fc8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3443,6 +3443,22 @@ struct drm_i915_gem_create_ext {
  * At which point we get the object handle in &drm_i915_gem_create_ext.handle,
  * along with the final object size in &drm_i915_gem_create_ext.size, which
  * should account for any rounding up, if required.
+ *
+ * Note that userspace has no means of knowing the current backing region
+ * for objects where @num_regions is larger than one. The kernel will only
+ * ensure that the priority order of the @regions array is honoured, either
+ * when initially placing the object, or when moving memory around due to
+ * memory pressure
+ *
+ * On Flat-CCS capable HW, compression is supported for the objects residing
+ * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) have other
+ * memory class in @regions and migrated (by i915, due to memory
+ * constraints) to the non I915_MEMORY_CLASS_DEVICE region, then i915 needs to
+ * decompress the content. But i915 doesn't have the required information to
+ * decompress the userspace compressed objects.
+ *
+ * So i915 supports Flat-CCS, on the objects which can reside only on
+ * I915_MEMORY_CLASS_DEVICE regions.
  */
 struct drm_i915_gem_create_ext_memory_regions {
 	/** @base: Extension link. See struct i915_user_extension. */