53 files changed, 19908 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
new file mode 100644
index 000000000000..07e7b8b840ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/Makefile
@@ -0,0 +1 @@
+include $(src)/Makefile.header-test # Extra header tests
diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test
new file mode 100644
index 000000000000..61e06cbb4b32
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/Makefile.header-test
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: MIT
+# Copyright © 2019 Intel Corporation
+
+# Test the headers are compilable as standalone units
+header_test := $(notdir $(wildcard $(src)/*.h))
+
+quiet_cmd_header_test = HDRTEST $@
+      cmd_header_test = echo "\#include \"$(<F)\"" > $@
+
+header_test_%.c: %.h
+	$(call cmd,header_test)
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
+
+clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
new file mode 100644
index 000000000000..6ad93a09968c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -0,0 +1,139 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "gt/intel_engine.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+static __always_inline u32 __busy_read_flag(u8 id)
+{
+	if (id == (u8)I915_ENGINE_CLASS_INVALID)
+		return 0xffff0000u;
+
+	GEM_BUG_ON(id >= 16);
+	return 0x10000u << id;
+}
+
+static __always_inline u32 __busy_write_id(u8 id)
+{
+	/*
+	 * The uABI guarantees an active writer is also amongst the read
+	 * engines. This would be true if we accessed the activity tracking
+	 * under the lock, but as we perform the lookup of the object and
+	 * its activity locklessly we can not guarantee that the last_write
+	 * being active implies that we have set the same engine flag from
+	 * last_read - hence we always set both read and write busy for
+	 * last_write.
+	 */
+	if (id == (u8)I915_ENGINE_CLASS_INVALID)
+		return 0xffffffffu;
+
+	return (id + 1) | __busy_read_flag(id);
+}
+
+static __always_inline unsigned int
+__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
+{
+	const struct i915_request *rq;
+
+	/*
+	 * We have to check the current hw status of the fence as the uABI
+	 * guarantees forward progress. We could rely on the idle worker
+	 * to eventually flush us, but to minimise latency just ask the
+	 * hardware.
+	 *
+	 * Note we only report on the status of native fences.
+	 */
+	if (!dma_fence_is_i915(fence))
+		return 0;
+
+	/* opencode to_request() in order to avoid const warnings */
+	rq = container_of(fence, const struct i915_request, fence);
+	if (i915_request_completed(rq))
+		return 0;
+
+	/* Beware type-expansion follies! */
+	BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
+	return flag(rq->engine->uabi_class);
+}
+
+static __always_inline unsigned int
+busy_check_reader(const struct dma_fence *fence)
+{
+	return __busy_set_if_active(fence, __busy_read_flag);
+}
+
+static __always_inline unsigned int
+busy_check_writer(const struct dma_fence *fence)
+{
+	if (!fence)
+		return 0;
+
+	return __busy_set_if_active(fence, __busy_write_id);
+}
+
+int
+i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_busy *args = data;
+	struct drm_i915_gem_object *obj;
+	struct reservation_object_list *list;
+	unsigned int seq;
+	int err;
+
+	err = -ENOENT;
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj)
+		goto out;
+
+	/*
+	 * A discrepancy here is that we do not report the status of
+	 * non-i915 fences, i.e. even though we may report the object as idle,
+	 * a call to set-domain may still stall waiting for foreign rendering.
+	 * This also means that wait-ioctl may report an object as busy,
+	 * where busy-ioctl considers it idle.
+	 *
+	 * We trade the ability to warn of foreign fences to report on which
+	 * i915 engines are active for the object.
+	 *
+	 * Alternatively, we can trade that extra information on read/write
+	 * activity with
+	 *	args->busy =
+	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
+	 * to report the overall busyness. This is what the wait-ioctl does.
+	 *
+	 */
+retry:
+	seq = raw_read_seqcount(&obj->base.resv->seq);
+
+	/* Translate the exclusive fence to the READ *and* WRITE engine */
+	args->busy =
+		busy_check_writer(rcu_dereference(obj->base.resv->fence_excl));
+
+	/* Translate shared fences to READ set of engines */
+	list = rcu_dereference(obj->base.resv->fence);
+	if (list) {
+		unsigned int shared_count = list->shared_count, i;
+
+		for (i = 0; i < shared_count; ++i) {
+			struct dma_fence *fence =
+				rcu_dereference(list->shared[i]);
+
+			args->busy |= busy_check_reader(fence);
+		}
+	}
+
+	if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
+		goto retry;
+
+	err = 0;
+out:
+	rcu_read_unlock();
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
new file mode 100644
index 000000000000..5295285d5843
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -0,0 +1,162 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "display/intel_frontbuffer.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+
+static DEFINE_SPINLOCK(clflush_lock);
+
+struct clflush {
+	struct dma_fence dma; /* Must be first for dma_fence_free() */
+	struct i915_sw_fence wait;
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+};
+
+static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
+{
+	return "clflush";
+}
+
+static void i915_clflush_release(struct dma_fence *fence)
+{
+	struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
+
+	i915_sw_fence_fini(&clflush->wait);
+
+	BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
+	dma_fence_free(&clflush->dma);
+}
+
+static const struct dma_fence_ops i915_clflush_ops = {
+	.get_driver_name = i915_clflush_get_driver_name,
+	.get_timeline_name = i915_clflush_get_timeline_name,
+	.release = i915_clflush_release,
+};
+
+static void __i915_do_clflush(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	drm_clflush_sg(obj->mm.pages);
+	intel_fb_obj_flush(obj, ORIGIN_CPU);
+}
+
+static void i915_clflush_work(struct work_struct *work)
+{
+	struct clflush *clflush = container_of(work, typeof(*clflush), work);
+	struct drm_i915_gem_object *obj = clflush->obj;
+
+	if (i915_gem_object_pin_pages(obj)) {
+		DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
+		goto out;
+	}
+
+	__i915_do_clflush(obj);
+
+	i915_gem_object_unpin_pages(obj);
+
+out:
+	i915_gem_object_put(obj);
+
+	dma_fence_signal(&clflush->dma);
+	dma_fence_put(&clflush->dma);
+}
+
+static int __i915_sw_fence_call
+i915_clflush_notify(struct i915_sw_fence *fence,
+		    enum i915_sw_fence_notify state)
+{
+	struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		schedule_work(&clflush->work);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&clflush->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags)
+{
+	struct clflush *clflush;
+
+	assert_object_held(obj);
+
+	/*
+	 * Stolen memory is always coherent with the GPU as it is explicitly
+	 * marked as wc by the system, or the system is cache-coherent.
+	 * Similarly, we only access struct pages through the CPU cache, so
+	 * anything not backed by physical memory we consider to be always
+	 * coherent and not need clflushing.
+	 */
+	if (!i915_gem_object_has_struct_page(obj)) {
+		obj->cache_dirty = false;
+		return false;
+	}
+
+	/* If the GPU is snooping the contents of the CPU cache,
+	 * we do not need to manually clear the CPU cache lines.  However,
+	 * the caches are only snooped when the render cache is
+	 * flushed/invalidated.  As we always have to emit invalidations
+	 * and flushes when moving into and out of the RENDER domain, correct
+	 * snooping behaviour occurs naturally as the result of our domain
+	 * tracking.
+	 */
+	if (!(flags & I915_CLFLUSH_FORCE) &&
+	    obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
+		return false;
+
+	trace_i915_gem_object_clflush(obj);
+
+	clflush = NULL;
+	if (!(flags & I915_CLFLUSH_SYNC))
+		clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+	if (clflush) {
+		GEM_BUG_ON(!obj->cache_dirty);
+
+		dma_fence_init(&clflush->dma,
+			       &i915_clflush_ops,
+			       &clflush_lock,
+			       to_i915(obj->base.dev)->mm.unordered_timeline,
+			       0);
+		i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
+
+		clflush->obj = i915_gem_object_get(obj);
+		INIT_WORK(&clflush->work, i915_clflush_work);
+
+		dma_fence_get(&clflush->dma);
+
+		i915_sw_fence_await_reservation(&clflush->wait,
+						obj->base.resv, NULL,
+						true, I915_FENCE_TIMEOUT,
+						I915_FENCE_GFP);
+
+		reservation_object_add_excl_fence(obj->base.resv,
+						  &clflush->dma);
+
+		i915_sw_fence_commit(&clflush->wait);
+	} else if (obj->mm.pages) {
+		__i915_do_clflush(obj);
+	} else {
+		GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
+	}
+
+	obj->cache_dirty = false;
+	return true;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
new file mode 100644
index 000000000000..e6c382973129
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
new file mode 100644
index 000000000000..1fdab0767a47
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+#include "i915_gem_client_blt.h"
+
+#include "i915_gem_object_blt.h"
+#include "intel_drv.h"
+
+struct i915_sleeve {
+	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
+	struct sg_table *pages;
+	struct i915_page_sizes page_sizes;
+};
+
+static int vma_set_pages(struct i915_vma *vma)
+{
+	struct i915_sleeve *sleeve = vma->private;
+
+	vma->pages = sleeve->pages;
+	vma->page_sizes = sleeve->page_sizes;
+
+	return 0;
+}
+
+static void vma_clear_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!vma->pages);
+	vma->pages = NULL;
+}
+
+static int vma_bind(struct i915_vma *vma,
+		    enum i915_cache_level cache_level,
+		    u32 flags)
+{
+	return vma->vm->vma_ops.bind_vma(vma, cache_level, flags);
+}
+
+static void vma_unbind(struct i915_vma *vma)
+{
+	vma->vm->vma_ops.unbind_vma(vma);
+}
+
+static const struct i915_vma_ops proxy_vma_ops = {
+	.set_pages = vma_set_pages,
+	.clear_pages = vma_clear_pages,
+	.bind_vma = vma_bind,
+	.unbind_vma = vma_unbind,
+};
+
+static struct i915_sleeve *create_sleeve(struct i915_address_space *vm,
+					 struct drm_i915_gem_object *obj,
+					 struct sg_table *pages,
+					 struct i915_page_sizes *page_sizes)
+{
+	struct i915_sleeve *sleeve;
+	struct i915_vma *vma;
+	int err;
+
+	sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL);
+	if (!sleeve)
+		return ERR_PTR(-ENOMEM);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_free;
+	}
+
+	vma->private = sleeve;
+	vma->ops = &proxy_vma_ops;
+
+	sleeve->vma = vma;
+	sleeve->obj = i915_gem_object_get(obj);
+	sleeve->pages = pages;
+	sleeve->page_sizes = *page_sizes;
+
+	return sleeve;
+
+err_free:
+	kfree(sleeve);
+	return ERR_PTR(err);
+}
+
+static void destroy_sleeve(struct i915_sleeve *sleeve)
+{
+	i915_gem_object_put(sleeve->obj);
+	kfree(sleeve);
+}
+
+struct clear_pages_work {
+	struct dma_fence dma;
+	struct dma_fence_cb cb;
+	struct i915_sw_fence wait;
+	struct work_struct work;
+	struct irq_work irq_work;
+	struct i915_sleeve *sleeve;
+	struct intel_context *ce;
+	u32 value;
+};
+
+static const char *clear_pages_work_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *clear_pages_work_timeline_name(struct dma_fence *fence)
+{
+	return "clear";
+}
+
+static void clear_pages_work_release(struct dma_fence *fence)
+{
+	struct clear_pages_work *w = container_of(fence, typeof(*w), dma);
+
+	destroy_sleeve(w->sleeve);
+
+	i915_sw_fence_fini(&w->wait);
+
+	BUILD_BUG_ON(offsetof(typeof(*w), dma));
+	dma_fence_free(&w->dma);
+}
+
+static const struct dma_fence_ops clear_pages_work_ops = {
+	.get_driver_name = clear_pages_work_driver_name,
+	.get_timeline_name = clear_pages_work_timeline_name,
+	.release = clear_pages_work_release,
+};
+
+static void clear_pages_signal_irq_worker(struct irq_work *work)
+{
+	struct clear_pages_work *w = container_of(work, typeof(*w), irq_work);
+
+	dma_fence_signal(&w->dma);
+	dma_fence_put(&w->dma);
+}
+
+static void clear_pages_dma_fence_cb(struct dma_fence *fence,
+				     struct dma_fence_cb *cb)
+{
+	struct clear_pages_work *w = container_of(cb, typeof(*w), cb);
+
+	if (fence->error)
+		dma_fence_set_error(&w->dma, fence->error);
+
+	/*
+	 * Push the signalling of the fence into yet another worker to avoid
+	 * the nightmare locking around the fence spinlock.
+	 */
+	irq_work_queue(&w->irq_work);
+}
+
+static void clear_pages_worker(struct work_struct *work)
+{
+	struct clear_pages_work *w = container_of(work, typeof(*w), work);
+	struct drm_i915_private *i915 = w->ce->gem_context->i915;
+	struct drm_i915_gem_object *obj = w->sleeve->obj;
+	struct i915_vma *vma = w->sleeve->vma;
+	struct i915_request *rq;
+	int err = w->dma.error;
+
+	if (unlikely(err))
+		goto out_signal;
+
+	if (obj->cache_dirty) {
+		obj->write_domain = 0;
+		if (i915_gem_object_has_struct_page(obj))
+			drm_clflush_sg(w->sleeve->pages);
+		obj->cache_dirty = false;
+	}
+
+	/* XXX: we need to kill this */
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_unlock;
+
+	rq = i915_request_create(w->ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin;
+	}
+
+	/* There's no way the fence has signalled */
+	if (dma_fence_add_callback(&rq->fence, &w->cb,
+				   clear_pages_dma_fence_cb))
+		GEM_BUG_ON(1);
+
+	if (w->ce->engine->emit_init_breadcrumb) {
+		err = w->ce->engine->emit_init_breadcrumb(rq);
+		if (unlikely(err))
+			goto out_request;
+	}
+
+	/* XXX: more feverish nightmares await */
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto out_request;
+
+	err = intel_emit_vma_fill_blt(rq, vma, w->value);
+out_request:
+	if (unlikely(err)) {
+		i915_request_skip(rq, err);
+		err = 0;
+	}
+
+	i915_request_add(rq);
+out_unpin:
+	i915_vma_unpin(vma);
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+out_signal:
+	if (unlikely(err)) {
+		dma_fence_set_error(&w->dma, err);
+		dma_fence_signal(&w->dma);
+		dma_fence_put(&w->dma);
+	}
+}
+
+static int __i915_sw_fence_call
+clear_pages_work_notify(struct i915_sw_fence *fence,
+			enum i915_sw_fence_notify state)
+{
+	struct clear_pages_work *w = container_of(fence, typeof(*w), wait);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		schedule_work(&w->work);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&w->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static DEFINE_SPINLOCK(fence_lock);
+
+/* XXX: better name please */
+int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
+				     struct intel_context *ce,
+				     struct sg_table *pages,
+				     struct i915_page_sizes *page_sizes,
+				     u32 value)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_gem_context *ctx = ce->gem_context;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct clear_pages_work *work;
+	struct i915_sleeve *sleeve;
+	int err;
+
+	sleeve = create_sleeve(vm, obj, pages, page_sizes);
+	if (IS_ERR(sleeve))
+		return PTR_ERR(sleeve);
+
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (!work) {
+		destroy_sleeve(sleeve);
+		return -ENOMEM;
+	}
+
+	work->value = value;
+	work->sleeve = sleeve;
+	work->ce = ce;
+
+	INIT_WORK(&work->work, clear_pages_worker);
+
+	init_irq_work(&work->irq_work, clear_pages_signal_irq_worker);
+
+	dma_fence_init(&work->dma,
+		       &clear_pages_work_ops,
+		       &fence_lock,
+		       i915->mm.unordered_timeline,
+		       0);
+	i915_sw_fence_init(&work->wait, clear_pages_work_notify);
+
+	i915_gem_object_lock(obj);
+	err = i915_sw_fence_await_reservation(&work->wait,
+					      obj->base.resv, NULL,
+					      true, I915_FENCE_TIMEOUT,
+					      I915_FENCE_GFP);
+	if (err < 0) {
+		dma_fence_set_error(&work->dma, err);
+	} else {
+		reservation_object_add_excl_fence(obj->base.resv, &work->dma);
+		err = 0;
+	}
+	i915_gem_object_unlock(obj);
+
+	dma_fence_get(&work->dma);
+	i915_sw_fence_commit(&work->wait);
+
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_client_blt.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
new file mode 100644
index 000000000000..3dbd28c22ff5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+#ifndef __I915_GEM_CLIENT_BLT_H__
+#define __I915_GEM_CLIENT_BLT_H__
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct i915_page_sizes;
+struct intel_context;
+struct sg_table;
+
+int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
+				     struct intel_context *ce,
+				     struct sg_table *pages,
+				     struct i915_page_sizes *page_sizes,
+				     u32 value);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
new file mode 100644
index 000000000000..0f2c22a3bcb6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -0,0 +1,2466 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2011-2012 Intel Corporation
+ */
+
+/*
+ * This file implements HW context support. On gen5+ a HW context consists of an
+ * opaque GPU object which is referenced at times of context saves and restores.
+ * With RC6 enabled, the context is also referenced as the GPU enters and exists
+ * from RC6 (GPU has it's own internal power context, except on gen5). Though
+ * something like a context does exist for the media ring, the code only
+ * supports contexts for the render ring.
+ *
+ * In software, there is a distinction between contexts created by the user,
+ * and the default HW context. The default HW context is used by GPU clients
+ * that do not request setup of their own hardware context. The default
+ * context's state is never restored to help prevent programming errors. This
+ * would happen if a client ran and piggy-backed off another clients GPU state.
+ * The default context only exists to give the GPU some offset to load as the
+ * current to invoke a save of the context we actually care about. In fact, the
+ * code could likely be constructed, albeit in a more complicated fashion, to
+ * never use the default context, though that limits the driver's ability to
+ * swap out, and/or destroy other contexts.
+ *
+ * All other contexts are created as a request by the GPU client. These contexts
+ * store GPU state, and thus allow GPU clients to not re-emit state (and
+ * potentially query certain state) at any time. The kernel driver makes
+ * certain that the appropriate commands are inserted.
+ *
+ * The context life cycle is semi-complicated in that context BOs may live
+ * longer than the context itself because of the way the hardware, and object
+ * tracking works. Below is a very crude representation of the state machine
+ * describing the context life.
+ *                                         refcount     pincount     active
+ * S0: initial state                          0            0           0
+ * S1: context created                        1            0           0
+ * S2: context is currently running           2            1           X
+ * S3: GPU referenced, but not current        2            0           1
+ * S4: context is current, but destroyed      1            1           0
+ * S5: like S3, but destroyed                 1            0           1
+ *
+ * The most common (but not all) transitions:
+ * S0->S1: client creates a context
+ * S1->S2: client submits execbuf with context
+ * S2->S3: other clients submits execbuf with context
+ * S3->S1: context object was retired
+ * S3->S2: clients submits another execbuf
+ * S2->S4: context destroy called with current context
+ * S3->S5->S0: destroy path
+ * S4->S5->S0: destroy path on current context
+ *
+ * There are two confusing terms used above:
+ *  The "current context" means the context which is currently running on the
+ *  GPU. The GPU has loaded its state already and has stored away the gtt
+ *  offset of the BO. The GPU is not actively referencing the data at this
+ *  offset, but it will on the next context switch. The only way to avoid this
+ *  is to do a GPU reset.
+ *
+ *  An "active context' is one which was previously the "current context" and is
+ *  on the active list waiting for the next context switch to occur. Until this
+ *  happens, the object must remain at the same gtt offset. It is therefore
+ *  possible to destroy a context, but it is still active.
+ *
+ */
+
+#include <linux/log2.h>
+#include <linux/nospec.h>
+
+#include <drm/i915_drm.h>
+
+#include "gt/intel_lrc_reg.h"
+
+#include "i915_gem_context.h"
+#include "i915_globals.h"
+#include "i915_trace.h"
+#include "i915_user_extensions.h"
+
+#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
+
+static struct i915_global_gem_context {
+	struct i915_global base;
+	struct kmem_cache *slab_luts;
+} global;
+
+struct i915_lut_handle *i915_lut_handle_alloc(void)
+{
+	return kmem_cache_alloc(global.slab_luts, GFP_KERNEL);
+}
+
+void i915_lut_handle_free(struct i915_lut_handle *lut)
+{
+	return kmem_cache_free(global.slab_luts, lut);
+}
+
+static void lut_close(struct i915_gem_context *ctx)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	lockdep_assert_held(&ctx->mutex);
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
+		struct i915_vma *vma = rcu_dereference_raw(*slot);
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_lut_handle *lut;
+
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		rcu_read_unlock();
+		i915_gem_object_lock(obj);
+		list_for_each_entry(lut, &obj->lut_list, obj_link) {
+			if (lut->ctx != ctx)
+				continue;
+
+			if (lut->handle != iter.index)
+				continue;
+
+			list_del(&lut->obj_link);
+			break;
+		}
+		i915_gem_object_unlock(obj);
+		rcu_read_lock();
+
+		if (&lut->obj_link != &obj->lut_list) {
+			i915_lut_handle_free(lut);
+			radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+			if (atomic_dec_and_test(&vma->open_count) &&
+			    !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+			i915_gem_object_put(obj);
+		}
+
+		i915_gem_object_put(obj);
+	}
+	rcu_read_unlock();
+}
+
+static struct intel_context *
+lookup_user_engine(struct i915_gem_context *ctx,
+		   unsigned long flags,
+		   const struct i915_engine_class_instance *ci)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+	int idx;
+
+	if (!!(flags & LOOKUP_USER_INDEX) != i915_gem_context_user_engines(ctx))
+		return ERR_PTR(-EINVAL);
+
+	if (!i915_gem_context_user_engines(ctx)) {
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(ctx->i915,
+						  ci->engine_class,
+						  ci->engine_instance);
+		if (!engine)
+			return ERR_PTR(-EINVAL);
+
+		idx = engine->id;
+	} else {
+		idx = ci->engine_instance;
+	}
+
+	return i915_gem_context_get_engine(ctx, idx);
+}
+
+static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
+{
+	unsigned int max;
+
+	lockdep_assert_held(&i915->contexts.mutex);
+
+	if (INTEL_GEN(i915) >= 11)
+		max = GEN11_MAX_CONTEXT_HW_ID;
+	else if (USES_GUC_SUBMISSION(i915))
+		/*
+		 * When using GuC in proxy submission, GuC consumes the
+		 * highest bit in the context id to indicate proxy submission.
+		 */
+		max = MAX_GUC_CONTEXT_HW_ID;
+	else
+		max = MAX_CONTEXT_HW_ID;
+
+	return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp);
+}
+
+static int steal_hw_id(struct drm_i915_private *i915)
+{
+	struct i915_gem_context *ctx, *cn;
+	LIST_HEAD(pinned);
+	int id = -ENOSPC;
+
+	lockdep_assert_held(&i915->contexts.mutex);
+
+	list_for_each_entry_safe(ctx, cn,
+				 &i915->contexts.hw_id_list, hw_id_link) {
+		if (atomic_read(&ctx->hw_id_pin_count)) {
+			list_move_tail(&ctx->hw_id_link, &pinned);
+			continue;
+		}
+
+		GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */
+		list_del_init(&ctx->hw_id_link);
+		id = ctx->hw_id;
+		break;
+	}
+
+	/*
+	 * Remember how far we got up on the last repossesion scan, so the
+	 * list is kept in a "least recently scanned" order.
+	 */
+	list_splice_tail(&pinned, &i915->contexts.hw_id_list);
+	return id;
+}
+
+static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out)
+{
+	int ret;
+
+	lockdep_assert_held(&i915->contexts.mutex);
+
+	/*
+	 * We prefer to steal/stall ourselves and our users over that of the
+	 * entire system. That may be a little unfair to our users, and
+	 * even hurt high priority clients. The choice is whether to oomkill
+	 * something else, or steal a context id.
+	 */
+	ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+	if (unlikely(ret < 0)) {
+		ret = steal_hw_id(i915);
+		if (ret < 0) /* once again for the correct errno code */
+			ret = new_hw_id(i915, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	*out = ret;
+	return 0;
+}
+
+static void release_hw_id(struct i915_gem_context *ctx)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+
+	if (list_empty(&ctx->hw_id_link))
+		return;
+
+	mutex_lock(&i915->contexts.mutex);
+	if (!list_empty(&ctx->hw_id_link)) {
+		ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id);
+		list_del_init(&ctx->hw_id_link);
+	}
+	mutex_unlock(&i915->contexts.mutex);
+}
+
+static void __free_engines(struct i915_gem_engines *e, unsigned int count)
+{
+	while (count--) {
+		if (!e->engines[count])
+			continue;
+
+		intel_context_put(e->engines[count]);
+	}
+	kfree(e);
+}
+
+static void free_engines(struct i915_gem_engines *e)
+{
+	__free_engines(e, e->num_engines);
+}
+
+static void free_engines_rcu(struct rcu_head *rcu)
+{
+	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
+}
+
+static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
+{
+	struct intel_engine_cs *engine;
+	struct i915_gem_engines *e;
+	enum intel_engine_id id;
+
+	e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	init_rcu_head(&e->rcu);
+	for_each_engine(engine, ctx->i915, id) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(ctx, engine);
+		if (IS_ERR(ce)) {
+			__free_engines(e, id);
+			return ERR_CAST(ce);
+		}
+
+		e->engines[id] = ce;
+	}
+	e->num_engines = id;
+
+	return e;
+}
+
+static void i915_gem_context_free(struct i915_gem_context *ctx)
+{
+	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
+
+	release_hw_id(ctx);
+	if (ctx->vm)
+		i915_vm_put(ctx->vm);
+
+	free_engines(rcu_access_pointer(ctx->engines));
+	mutex_destroy(&ctx->engines_mutex);
+
+	if (ctx->timeline)
+		i915_timeline_put(ctx->timeline);
+
+	kfree(ctx->name);
+	put_pid(ctx->pid);
+
+	list_del(&ctx->link);
+	mutex_destroy(&ctx->mutex);
+
+	kfree_rcu(ctx, rcu);
+}
+
+static void contexts_free(struct drm_i915_private *i915)
+{
+	struct llist_node *freed = llist_del_all(&i915->contexts.free_list);
+	struct i915_gem_context *ctx, *cn;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	llist_for_each_entry_safe(ctx, cn, freed, free_link)
+		i915_gem_context_free(ctx);
+}
+
+static void contexts_free_first(struct drm_i915_private *i915)
+{
+	struct i915_gem_context *ctx;
+	struct llist_node *freed;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	freed = llist_del_first(&i915->contexts.free_list);
+	if (!freed)
+		return;
+
+	ctx = container_of(freed, typeof(*ctx), free_link);
+	i915_gem_context_free(ctx);
+}
+
+static void contexts_free_worker(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), contexts.free_work);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	contexts_free(i915);
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
+void i915_gem_context_release(struct kref *ref)
+{
+	struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
+	struct drm_i915_private *i915 = ctx->i915;
+
+	trace_i915_context_free(ctx);
+	if (llist_add(&ctx->free_link, &i915->contexts.free_list))
+		queue_work(i915->wq, &i915->contexts.free_work);
+}
+
+static void context_close(struct i915_gem_context *ctx)
+{
+	mutex_lock(&ctx->mutex);
+
+	i915_gem_context_set_closed(ctx);
+	ctx->file_priv = ERR_PTR(-EBADF);
+
+	/*
+	 * This context will never again be assinged to HW, so we can
+	 * reuse its ID for the next context.
+	 */
+	release_hw_id(ctx);
+
+	/*
+	 * The LUT uses the VMA as a backpointer to unref the object,
+	 * so we need to clear the LUT before we close all the VMA (inside
+	 * the ppgtt).
+	 */
+	lut_close(ctx);
+
+	mutex_unlock(&ctx->mutex);
+	i915_gem_context_put(ctx);
+}
+
+static u32 default_desc_template(const struct drm_i915_private *i915,
+				 const struct i915_address_space *vm)
+{
+	u32 address_mode;
+	u32 desc;
+
+	desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+
+	address_mode = INTEL_LEGACY_32B_CONTEXT;
+	if (vm && i915_vm_is_4lvl(vm))
+		address_mode = INTEL_LEGACY_64B_CONTEXT;
+	desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
+	if (IS_GEN(i915, 8))
+		desc |= GEN8_CTX_L3LLC_COHERENT;
+
+	/* TODO: WaDisableLiteRestore when we start using semaphore
+	 * signalling between Command Streamers
+	 * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
+	 */
+
+	return desc;
+}
+
+static struct i915_gem_context *
+__create_context(struct drm_i915_private *i915)
+{
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
+	int err;
+	int i;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&ctx->ref);
+	list_add_tail(&ctx->link, &i915->contexts.list);
+	ctx->i915 = i915;
+	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
+	mutex_init(&ctx->mutex);
+
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx);
+	if (IS_ERR(e)) {
+		err = PTR_ERR(e);
+		goto err_free;
+	}
+	RCU_INIT_POINTER(ctx->engines, e);
+
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	INIT_LIST_HEAD(&ctx->hw_id_link);
+
+	/* NB: Mark all slices as needing a remap so that when the context first
+	 * loads it will restore whatever remap state already exists. If there
+	 * is no remap info, it will be a NOP. */
+	ctx->remap_slice = ALL_L3_SLICES(i915);
+
+	i915_gem_context_set_bannable(ctx);
+	i915_gem_context_set_recoverable(ctx);
+
+	ctx->ring_size = 4 * PAGE_SIZE;
+	ctx->desc_template =
+		default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm);
+
+	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
+		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+
+	return ctx;
+
+err_free:
+	kfree(ctx);
+	return ERR_PTR(err);
+}
+
+static struct i915_address_space *
+__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
+{
+	struct i915_address_space *old = ctx->vm;
+
+	ctx->vm = i915_vm_get(vm);
+	ctx->desc_template = default_desc_template(ctx->i915, vm);
+
+	return old;
+}
+
+static void __assign_ppgtt(struct i915_gem_context *ctx,
+			   struct i915_address_space *vm)
+{
+	if (vm == ctx->vm)
+		return;
+
+	vm = __set_ppgtt(ctx, vm);
+	if (vm)
+		i915_vm_put(vm);
+}
+
+static struct i915_gem_context *
+i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
+{
+	struct i915_gem_context *ctx;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
+	    !HAS_EXECLISTS(dev_priv))
+		return ERR_PTR(-EINVAL);
+
+	/* Reap the most stale context */
+	contexts_free_first(dev_priv);
+
+	ctx = __create_context(dev_priv);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	if (HAS_FULL_PPGTT(dev_priv)) {
+		struct i915_ppgtt *ppgtt;
+
+		ppgtt = i915_ppgtt_create(dev_priv);
+		if (IS_ERR(ppgtt)) {
+			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
+					 PTR_ERR(ppgtt));
+			context_close(ctx);
+			return ERR_CAST(ppgtt);
+		}
+
+		__assign_ppgtt(ctx, &ppgtt->vm);
+		i915_vm_put(&ppgtt->vm);
+	}
+
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+		struct i915_timeline *timeline;
+
+		timeline = i915_timeline_create(dev_priv, NULL);
+		if (IS_ERR(timeline)) {
+			context_close(ctx);
+			return ERR_CAST(timeline);
+		}
+
+		ctx->timeline = timeline;
+	}
+
+	trace_i915_context_create(ctx);
+
+	return ctx;
+}
+
+/**
+ * i915_gem_context_create_gvt - create a GVT GEM context
+ * @dev: drm device *
+ *
+ * This function is used to create a GVT specific GEM context.
+ *
+ * Returns:
+ * pointer to i915_gem_context on success, error pointer if failed
+ *
+ */
+struct i915_gem_context *
+i915_gem_context_create_gvt(struct drm_device *dev)
+{
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
+		return ERR_PTR(-ENODEV);
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ctx = i915_gem_create_context(to_i915(dev), 0);
+	if (IS_ERR(ctx))
+		goto out;
+
+	ret = i915_gem_context_pin_hw_id(ctx);
+	if (ret) {
+		context_close(ctx);
+		ctx = ERR_PTR(ret);
+		goto out;
+	}
+
+	ctx->file_priv = ERR_PTR(-EBADF);
+	i915_gem_context_set_closed(ctx); /* not user accessible */
+	i915_gem_context_clear_bannable(ctx);
+	i915_gem_context_set_force_single_submission(ctx);
+	if (!USES_GUC_SUBMISSION(to_i915(dev)))
+		ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
+
+	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ctx;
+}
+
+static void
+destroy_kernel_context(struct i915_gem_context **ctxp)
+{
+	struct i915_gem_context *ctx;
+
+	/* Keep the context ref so that we can free it immediately ourselves */
+	ctx = i915_gem_context_get(fetch_and_zero(ctxp));
+	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+
+	context_close(ctx);
+	i915_gem_context_free(ctx);
+}
+
+struct i915_gem_context *
+i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
+{
+	struct i915_gem_context *ctx;
+	int err;
+
+	ctx = i915_gem_create_context(i915, 0);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	err = i915_gem_context_pin_hw_id(ctx);
+	if (err) {
+		destroy_kernel_context(&ctx);
+		return ERR_PTR(err);
+	}
+
+	i915_gem_context_clear_bannable(ctx);
+	ctx->sched.priority = I915_USER_PRIORITY(prio);
+	ctx->ring_size = PAGE_SIZE;
+
+	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
+
+	return ctx;
+}
+
+static void init_contexts(struct drm_i915_private *i915)
+{
+	mutex_init(&i915->contexts.mutex);
+	INIT_LIST_HEAD(&i915->contexts.list);
+
+	/* Using the simple ida interface, the max is limited by sizeof(int) */
+	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
+	BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX);
+	ida_init(&i915->contexts.hw_ida);
+	INIT_LIST_HEAD(&i915->contexts.hw_id_list);
+
+	INIT_WORK(&i915->contexts.free_work, contexts_free_worker);
+	init_llist_head(&i915->contexts.free_list);
+}
+
+static bool needs_preempt_context(struct drm_i915_private *i915)
+{
+	return HAS_EXECLISTS(i915);
+}
+
+int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
+{
+	struct i915_gem_context *ctx;
+
+	/* Reassure ourselves we are only called once */
+	GEM_BUG_ON(dev_priv->kernel_context);
+	GEM_BUG_ON(dev_priv->preempt_context);
+
+	intel_engine_init_ctx_wa(dev_priv->engine[RCS0]);
+	init_contexts(dev_priv);
+
+	/* lowest priority; idle task */
+	ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
+	if (IS_ERR(ctx)) {
+		DRM_ERROR("Failed to create default global context\n");
+		return PTR_ERR(ctx);
+	}
+	/*
+	 * For easy recognisablity, we want the kernel context to be 0 and then
+	 * all user contexts will have non-zero hw_id. Kernel contexts are
+	 * permanently pinned, so that we never suffer a stall and can
+	 * use them from any allocation context (e.g. for evicting other
+	 * contexts and from inside the shrinker).
+	 */
+	GEM_BUG_ON(ctx->hw_id);
+	GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count));
+	dev_priv->kernel_context = ctx;
+
+	/* highest priority; preempting task */
+	if (needs_preempt_context(dev_priv)) {
+		ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX);
+		if (!IS_ERR(ctx))
+			dev_priv->preempt_context = ctx;
+		else
+			DRM_ERROR("Failed to create preempt context; disabling preemption\n");
+	}
+
+	DRM_DEBUG_DRIVER("%s context support initialized\n",
+			 DRIVER_CAPS(dev_priv)->has_logical_contexts ?
+			 "logical" : "fake");
+	return 0;
+}
+
+void i915_gem_contexts_fini(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	if (i915->preempt_context)
+		destroy_kernel_context(&i915->preempt_context);
+	destroy_kernel_context(&i915->kernel_context);
+
+	/* Must free all deferred contexts (via flush_workqueue) first */
+	GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list));
+	ida_destroy(&i915->contexts.hw_ida);
+}
+
+static int context_idr_cleanup(int id, void *p, void *data)
+{
+	context_close(p);
+	return 0;
+}
+
+static int vm_idr_cleanup(int id, void *p, void *data)
+{
+	i915_vm_put(p);
+	return 0;
+}
+
+static int gem_context_register(struct i915_gem_context *ctx,
+				struct drm_i915_file_private *fpriv)
+{
+	int ret;
+
+	ctx->file_priv = fpriv;
+	if (ctx->vm)
+		ctx->vm->file = fpriv;
+
+	ctx->pid = get_task_pid(current, PIDTYPE_PID);
+	ctx->name = kasprintf(GFP_KERNEL, "%s[%d]",
+			      current->comm, pid_nr(ctx->pid));
+	if (!ctx->name) {
+		ret = -ENOMEM;
+		goto err_pid;
+	}
+
+	/* And finally expose ourselves to userspace via the idr */
+	mutex_lock(&fpriv->context_idr_lock);
+	ret = idr_alloc(&fpriv->context_idr, ctx, 0, 0, GFP_KERNEL);
+	mutex_unlock(&fpriv->context_idr_lock);
+	if (ret >= 0)
+		goto out;
+
+	kfree(fetch_and_zero(&ctx->name));
+err_pid:
+	put_pid(fetch_and_zero(&ctx->pid));
+out:
+	return ret;
+}
+
+int i915_gem_context_open(struct drm_i915_private *i915,
+			  struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *ctx;
+	int err;
+
+	mutex_init(&file_priv->context_idr_lock);
+	mutex_init(&file_priv->vm_idr_lock);
+
+	idr_init(&file_priv->context_idr);
+	idr_init_base(&file_priv->vm_idr, 1);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	ctx = i915_gem_create_context(i915, 0);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto err;
+	}
+
+	err = gem_context_register(ctx, file_priv);
+	if (err < 0)
+		goto err_ctx;
+
+	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
+	GEM_BUG_ON(err > 0);
+
+	return 0;
+
+err_ctx:
+	context_close(ctx);
+err:
+	idr_destroy(&file_priv->vm_idr);
+	idr_destroy(&file_priv->context_idr);
+	mutex_destroy(&file_priv->vm_idr_lock);
+	mutex_destroy(&file_priv->context_idr_lock);
+	return err;
+}
+
+void i915_gem_context_close(struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+
+	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
+	idr_destroy(&file_priv->context_idr);
+	mutex_destroy(&file_priv->context_idr_lock);
+
+	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
+	idr_destroy(&file_priv->vm_idr);
+	mutex_destroy(&file_priv->vm_idr_lock);
+}
+
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_vm_control *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_ppgtt *ppgtt;
+	int err;
+
+	if (!HAS_FULL_PPGTT(i915))
+		return -ENODEV;
+
+	if (args->flags)
+		return -EINVAL;
+
+	ppgtt = i915_ppgtt_create(i915);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	ppgtt->vm.file = file_priv;
+
+	if (args->extensions) {
+		err = i915_user_extensions(u64_to_user_ptr(args->extensions),
+					   NULL, 0,
+					   ppgtt);
+		if (err)
+			goto err_put;
+	}
+
+	err = mutex_lock_interruptible(&file_priv->vm_idr_lock);
+	if (err)
+		goto err_put;
+
+	err = idr_alloc(&file_priv->vm_idr, &ppgtt->vm, 0, 0, GFP_KERNEL);
+	if (err < 0)
+		goto err_unlock;
+
+	GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */
+
+	mutex_unlock(&file_priv->vm_idr_lock);
+
+	args->vm_id = err;
+	return 0;
+
+err_unlock:
+	mutex_unlock(&file_priv->vm_idr_lock);
+err_put:
+	i915_vm_put(&ppgtt->vm);
+	return err;
+}
+
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_vm_control *args = data;
+	struct i915_address_space *vm;
+	int err;
+	u32 id;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->extensions)
+		return -EINVAL;
+
+	id = args->vm_id;
+	if (!id)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&file_priv->vm_idr_lock);
+	if (err)
+		return err;
+
+	vm = idr_remove(&file_priv->vm_idr, id);
+
+	mutex_unlock(&file_priv->vm_idr_lock);
+	if (!vm)
+		return -ENOENT;
+
+	i915_vm_put(vm);
+	return 0;
+}
+
+struct context_barrier_task {
+	struct i915_active base;
+	void (*task)(void *data);
+	void *data;
+};
+
+static void cb_retire(struct i915_active *base)
+{
+	struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
+
+	if (cb->task)
+		cb->task(cb->data);
+
+	i915_active_fini(&cb->base);
+	kfree(cb);
+}
+
+I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
+static int context_barrier_task(struct i915_gem_context *ctx,
+				intel_engine_mask_t engines,
+				bool (*skip)(struct intel_context *ce, void *data),
+				int (*emit)(struct i915_request *rq, void *data),
+				void (*task)(void *data),
+				void *data)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct context_barrier_task *cb;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	int err = 0;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!task);
+
+	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	if (!cb)
+		return -ENOMEM;
+
+	i915_active_init(i915, &cb->base, cb_retire);
+	i915_active_acquire(&cb->base);
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		struct i915_request *rq;
+
+		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
+				       ce->engine->mask)) {
+			err = -ENXIO;
+			break;
+		}
+
+		if (!(ce->engine->mask & engines))
+			continue;
+
+		if (skip && skip(ce, data))
+			continue;
+
+		rq = intel_context_create_request(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		err = 0;
+		if (emit)
+			err = emit(rq, data);
+		if (err == 0)
+			err = i915_active_ref(&cb->base, rq->fence.context, rq);
+
+		i915_request_add(rq);
+		if (err)
+			break;
+	}
+	i915_gem_context_unlock_engines(ctx);
+
+	cb->task = err ? NULL : task; /* caller needs to unwind instead */
+	cb->data = data;
+
+	i915_active_release(&cb->base);
+
+	return err;
+}
+
+static int get_ppgtt(struct drm_i915_file_private *file_priv,
+		     struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct i915_address_space *vm;
+	int ret;
+
+	if (!ctx->vm)
+		return -ENODEV;
+
+	/* XXX rcu acquire? */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	vm = i915_vm_get(ctx->vm);
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	ret = mutex_lock_interruptible(&file_priv->vm_idr_lock);
+	if (ret)
+		goto err_put;
+
+	ret = idr_alloc(&file_priv->vm_idr, vm, 0, 0, GFP_KERNEL);
+	GEM_BUG_ON(!ret);
+	if (ret < 0)
+		goto err_unlock;
+
+	i915_vm_get(vm);
+
+	args->size = 0;
+	args->value = ret;
+
+	ret = 0;
+err_unlock:
+	mutex_unlock(&file_priv->vm_idr_lock);
+err_put:
+	i915_vm_put(vm);
+	return ret;
+}
+
+static void set_ppgtt_barrier(void *data)
+{
+	struct i915_address_space *old = data;
+
+	if (INTEL_GEN(old->i915) < 8)
+		gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old));
+
+	i915_vm_put(old);
+}
+
+static int emit_ppgtt_update(struct i915_request *rq, void *data)
+{
+	struct i915_address_space *vm = rq->gem_context->vm;
+	struct intel_engine_cs *engine = rq->engine;
+	u32 base = engine->mmio_base;
+	u32 *cs;
+	int i;
+
+	if (i915_vm_is_4lvl(vm)) {
+		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+		const dma_addr_t pd_daddr = px_dma(ppgtt->pd);
+
+		cs = intel_ring_begin(rq, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(2);
+
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+		*cs++ = upper_32_bits(pd_daddr);
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+		*cs++ = lower_32_bits(pd_daddr);
+
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+	} else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
+		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+		cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+		for (i = GEN8_3LVL_PDPES; i--; ) {
+			const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
+			*cs++ = upper_32_bits(pd_daddr);
+			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
+			*cs++ = lower_32_bits(pd_daddr);
+		}
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+	} else {
+		/* ppGTT is not part of the legacy context image */
+		gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
+	}
+
+	return 0;
+}
+
+static bool skip_ppgtt_update(struct intel_context *ce, void *data)
+{
+	if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
+		return !ce->state;
+	else
+		return !atomic_read(&ce->pin_count);
+}
+
+static int set_ppgtt(struct drm_i915_file_private *file_priv,
+		     struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct i915_address_space *vm, *old;
+	int err;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (!ctx->vm)
+		return -ENODEV;
+
+	if (upper_32_bits(args->value))
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&file_priv->vm_idr_lock);
+	if (err)
+		return err;
+
+	vm = idr_find(&file_priv->vm_idr, args->value);
+	if (vm)
+		i915_vm_get(vm);
+	mutex_unlock(&file_priv->vm_idr_lock);
+	if (!vm)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (err)
+		goto out;
+
+	if (vm == ctx->vm)
+		goto unlock;
+
+	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+	mutex_lock(&ctx->mutex);
+	lut_close(ctx);
+	mutex_unlock(&ctx->mutex);
+
+	old = __set_ppgtt(ctx, vm);
+
+	/*
+	 * We need to flush any requests using the current ppgtt before
+	 * we release it as the requests do not hold a reference themselves,
+	 * only indirectly through the context.
+	 */
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   skip_ppgtt_update,
+				   emit_ppgtt_update,
+				   set_ppgtt_barrier,
+				   old);
+	if (err) {
+		ctx->vm = old;
+		ctx->desc_template = default_desc_template(ctx->i915, old);
+		i915_vm_put(vm);
+	}
+
+unlock:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+out:
+	i915_vm_put(vm);
+	return err;
+}
+
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+				 struct intel_context *ce,
+				 struct intel_sseu sseu)
+{
+	u64 offset;
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	offset = i915_ggtt_offset(ce->state) +
+		 LRC_STATE_PN * PAGE_SIZE +
+		 (CTX_R_PWR_CLK_STATE + 1) * 4;
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+	*cs++ = intel_sseu_make_rpcs(rq->i915, &sseu);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
+{
+	struct i915_request *rq;
+	int ret;
+
+	lockdep_assert_held(&ce->pin_mutex);
+
+	/*
+	 * If the context is not idle, we have to submit an ordered request to
+	 * modify its context image via the kernel context (writing to our own
+	 * image, or into the registers directory, does not stick). Pristine
+	 * and idle contexts will be configured on pinning.
+	 */
+	if (!intel_context_is_pinned(ce))
+		return 0;
+
+	rq = i915_request_create(ce->engine->kernel_context);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	/* Queue this switch after all other activity by this context. */
+	ret = i915_active_request_set(&ce->ring->timeline->last_request, rq);
+	if (ret)
+		goto out_add;
+
+	/*
+	 * Guarantee context image and the timeline remains pinned until the
+	 * modifying request is retired by setting the ce activity tracker.
+	 *
+	 * But we only need to take one pin on the account of it. Or in other
+	 * words transfer the pinned ce object to tracked active request.
+	 */
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
+	if (ret)
+		goto out_add;
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
+
+out_add:
+	i915_request_add(rq);
+	return ret;
+}
+
+static int
+__intel_context_reconfigure_sseu(struct intel_context *ce,
+				 struct intel_sseu sseu)
+{
+	int ret;
+
+	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
+
+	ret = intel_context_lock_pinned(ce);
+	if (ret)
+		return ret;
+
+	/* Nothing to do if unmodified. */
+	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+		goto unlock;
+
+	ret = gen8_modify_rpcs(ce, sseu);
+	if (!ret)
+		ce->sseu = sseu;
+
+unlock:
+	intel_context_unlock_pinned(ce);
+	return ret;
+}
+
+static int
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
+{
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+	int ret;
+
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ret = __intel_context_reconfigure_sseu(ce, sseu);
+
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return ret;
+}
+
+static int
+user_to_context_sseu(struct drm_i915_private *i915,
+		     const struct drm_i915_gem_context_param_sseu *user,
+		     struct intel_sseu *context)
+{
+	const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
+
+	/* No zeros in any field. */
+	if (!user->slice_mask || !user->subslice_mask ||
+	    !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+		return -EINVAL;
+
+	/* Max > min. */
+	if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+		return -EINVAL;
+
+	/*
+	 * Some future proofing on the types since the uAPI is wider than the
+	 * current internal implementation.
+	 */
+	if (overflows_type(user->slice_mask, context->slice_mask) ||
+	    overflows_type(user->subslice_mask, context->subslice_mask) ||
+	    overflows_type(user->min_eus_per_subslice,
+			   context->min_eus_per_subslice) ||
+	    overflows_type(user->max_eus_per_subslice,
+			   context->max_eus_per_subslice))
+		return -EINVAL;
+
+	/* Check validity against hardware. */
+	if (user->slice_mask & ~device->slice_mask)
+		return -EINVAL;
+
+	if (user->subslice_mask & ~device->subslice_mask[0])
+		return -EINVAL;
+
+	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+		return -EINVAL;
+
+	context->slice_mask = user->slice_mask;
+	context->subslice_mask = user->subslice_mask;
+	context->min_eus_per_subslice = user->min_eus_per_subslice;
+	context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+	/* Part specific restrictions. */
+	if (IS_GEN(i915, 11)) {
+		unsigned int hw_s = hweight8(device->slice_mask);
+		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+		unsigned int req_s = hweight8(context->slice_mask);
+		unsigned int req_ss = hweight8(context->subslice_mask);
+
+		/*
+		 * Only full subslice enablement is possible if more than one
+		 * slice is turned on.
+		 */
+		if (req_s > 1 && req_ss != hw_ss_per_s)
+			return -EINVAL;
+
+		/*
+		 * If more than four (SScount bitfield limit) subslices are
+		 * requested then the number has to be even.
+		 */
+		if (req_ss > 4 && (req_ss & 1))
+			return -EINVAL;
+
+		/*
+		 * If only one slice is enabled and subslice count is below the
+		 * device full enablement, it must be at most half of the all
+		 * available subslices.
+		 */
+		if (req_s == 1 && req_ss < hw_ss_per_s &&
+		    req_ss > (hw_ss_per_s / 2))
+			return -EINVAL;
+
+		/* ABI restriction - VME use case only. */
+
+		/* All slices or one slice only. */
+		if (req_s != 1 && req_s != hw_s)
+			return -EINVAL;
+
+		/*
+		 * Half subslices or full enablement only when one slice is
+		 * enabled.
+		 */
+		if (req_s == 1 &&
+		    (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
+			return -EINVAL;
+
+		/* No EU configuration changes. */
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_context *ce;
+	struct intel_sseu sseu;
+	unsigned long lookup;
+	int ret;
+
+	if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (!IS_GEN(i915, 11))
+		return -ENODEV;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd)
+		return -EINVAL;
+
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	ce = lookup_user_engine(ctx, lookup, &user_sseu.engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	/* Only render engine supports RPCS configuration. */
+	if (ce->engine->class != RENDER_CLASS) {
+		ret = -ENODEV;
+		goto out_ce;
+	}
+
+	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+	if (ret)
+		goto out_ce;
+
+	ret = intel_context_reconfigure_sseu(ce, sseu);
+	if (ret)
+		goto out_ce;
+
+	args->size = sizeof(user_sseu);
+
+out_ce:
+	intel_context_put(ce);
+	return ret;
+}
+
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *engines;
+};
+
+static int
+set_engines__load_balance(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *stack[16];
+	struct intel_engine_cs **siblings;
+	struct intel_context *ce;
+	u16 num_siblings, idx;
+	unsigned int n;
+	int err;
+
+	if (!HAS_EXECLISTS(set->ctx->i915))
+		return -ENODEV;
+
+	if (USES_GUC_SUBMISSION(set->ctx->i915))
+		return -ENODEV; /* not implement yet */
+
+	if (get_user(idx, &ext->engine_index))
+		return -EFAULT;
+
+	if (idx >= set->engines->num_engines) {
+		DRM_DEBUG("Invalid placement value, %d >= %d\n",
+			  idx, set->engines->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->engines->num_engines);
+	if (set->engines->engines[idx]) {
+		DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
+		return -EEXIST;
+	}
+
+	if (get_user(num_siblings, &ext->num_siblings))
+		return -EFAULT;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	err = check_user_mbz(&ext->mbz64);
+	if (err)
+		return err;
+
+	siblings = stack;
+	if (num_siblings > ARRAY_SIZE(stack)) {
+		siblings = kmalloc_array(num_siblings,
+					 sizeof(*siblings),
+					 GFP_KERNEL);
+		if (!siblings)
+			return -ENOMEM;
+	}
+
+	for (n = 0; n < num_siblings; n++) {
+		struct i915_engine_class_instance ci;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+			err = -EFAULT;
+			goto out_siblings;
+		}
+
+		siblings[n] = intel_engine_lookup_user(set->ctx->i915,
+						       ci.engine_class,
+						       ci.engine_instance);
+		if (!siblings[n]) {
+			DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			err = -EINVAL;
+			goto out_siblings;
+		}
+	}
+
+	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
+	if (IS_ERR(ce)) {
+		err = PTR_ERR(ce);
+		goto out_siblings;
+	}
+
+	if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
+		intel_context_put(ce);
+		err = -EEXIST;
+		goto out_siblings;
+	}
+
+out_siblings:
+	if (siblings != stack)
+		kfree(siblings);
+
+	return err;
+}
+
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_bond __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct i915_engine_class_instance ci;
+	struct intel_engine_cs *virtual;
+	struct intel_engine_cs *master;
+	u16 idx, num_bonds;
+	int err, n;
+
+	if (get_user(idx, &ext->virtual_index))
+		return -EFAULT;
+
+	if (idx >= set->engines->num_engines) {
+		DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
+			  idx, set->engines->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->engines->num_engines);
+	if (!set->engines->engines[idx]) {
+		DRM_DEBUG("Invalid engine at %d\n", idx);
+		return -EINVAL;
+	}
+	virtual = set->engines->engines[idx]->engine;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	if (copy_from_user(&ci, &ext->master, sizeof(ci)))
+		return -EFAULT;
+
+	master = intel_engine_lookup_user(set->ctx->i915,
+					  ci.engine_class, ci.engine_instance);
+	if (!master) {
+		DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n",
+			  ci.engine_class, ci.engine_instance);
+		return -EINVAL;
+	}
+
+	if (get_user(num_bonds, &ext->num_bonds))
+		return -EFAULT;
+
+	for (n = 0; n < num_bonds; n++) {
+		struct intel_engine_cs *bond;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
+			return -EFAULT;
+
+		bond = intel_engine_lookup_user(set->ctx->i915,
+						ci.engine_class,
+						ci.engine_instance);
+		if (!bond) {
+			DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			return -EINVAL;
+		}
+
+		/*
+		 * A non-virtual engine has no siblings to choose between; and
+		 * a submit fence will always be directed to the one engine.
+		 */
+		if (intel_engine_is_virtual(virtual)) {
+			err = intel_virtual_engine_attach_bond(virtual,
+							       master,
+							       bond);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static const i915_user_extension_fn set_engines__extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+	    const struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user =
+		u64_to_user_ptr(args->value);
+	struct set_engines set = { .ctx = ctx };
+	unsigned int num_engines, n;
+	u64 extensions;
+	int err;
+
+	if (!args->size) { /* switch back to legacy user_ring_map */
+		if (!i915_gem_context_user_engines(ctx))
+			return 0;
+
+		set.engines = default_engines(ctx);
+		if (IS_ERR(set.engines))
+			return PTR_ERR(set.engines);
+
+		goto replace;
+	}
+
+	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines)));
+	if (args->size < sizeof(*user) ||
+	    !IS_ALIGNED(args->size, sizeof(*user->engines))) {
+		DRM_DEBUG("Invalid size for engine array: %d\n",
+			  args->size);
+		return -EINVAL;
+	}
+
+	/*
+	 * Note that I915_EXEC_RING_MASK limits execbuf to only using the
+	 * first 64 engines defined here.
+	 */
+	num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
+
+	set.engines = kmalloc(struct_size(set.engines, engines, num_engines),
+			      GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	init_rcu_head(&set.engines->rcu);
+	for (n = 0; n < num_engines; n++) {
+		struct i915_engine_class_instance ci;
+		struct intel_engine_cs *engine;
+
+		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
+			__free_engines(set.engines, n);
+			return -EFAULT;
+		}
+
+		if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
+		    ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
+			set.engines->engines[n] = NULL;
+			continue;
+		}
+
+		engine = intel_engine_lookup_user(ctx->i915,
+						  ci.engine_class,
+						  ci.engine_instance);
+		if (!engine) {
+			DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			__free_engines(set.engines, n);
+			return -ENOENT;
+		}
+
+		set.engines->engines[n] = intel_context_create(ctx, engine);
+		if (!set.engines->engines[n]) {
+			__free_engines(set.engines, n);
+			return -ENOMEM;
+		}
+	}
+	set.engines->num_engines = num_engines;
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		free_engines(set.engines);
+		return err;
+	}
+
+replace:
+	mutex_lock(&ctx->engines_mutex);
+	if (args->size)
+		i915_gem_context_set_user_engines(ctx);
+	else
+		i915_gem_context_clear_user_engines(ctx);
+	rcu_swap_protected(ctx->engines, set.engines, 1);
+	mutex_unlock(&ctx->engines_mutex);
+
+	call_rcu(&set.engines->rcu, free_engines_rcu);
+
+	return 0;
+}
+
+static struct i915_gem_engines *
+__copy_engines(struct i915_gem_engines *e)
+{
+	struct i915_gem_engines *copy;
+	unsigned int n;
+
+	copy = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL);
+	if (!copy)
+		return ERR_PTR(-ENOMEM);
+
+	init_rcu_head(&copy->rcu);
+	for (n = 0; n < e->num_engines; n++) {
+		if (e->engines[n])
+			copy->engines[n] = intel_context_get(e->engines[n]);
+		else
+			copy->engines[n] = NULL;
+	}
+	copy->num_engines = n;
+
+	return copy;
+}
+
+static int
+get_engines(struct i915_gem_context *ctx,
+	    struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct i915_gem_engines *e;
+	size_t n, count, size;
+	int err = 0;
+
+	err = mutex_lock_interruptible(&ctx->engines_mutex);
+	if (err)
+		return err;
+
+	e = NULL;
+	if (i915_gem_context_user_engines(ctx))
+		e = __copy_engines(i915_gem_context_engines(ctx));
+	mutex_unlock(&ctx->engines_mutex);
+	if (IS_ERR_OR_NULL(e)) {
+		args->size = 0;
+		return PTR_ERR_OR_ZERO(e);
+	}
+
+	count = e->num_engines;
+
+	/* Be paranoid in case we have an impedance mismatch */
+	if (!check_struct_size(user, engines, count, &size)) {
+		err = -EINVAL;
+		goto err_free;
+	}
+	if (overflows_type(size, args->size)) {
+		err = -EINVAL;
+		goto err_free;
+	}
+
+	if (!args->size) {
+		args->size = size;
+		goto err_free;
+	}
+
+	if (args->size < size) {
+		err = -EINVAL;
+		goto err_free;
+	}
+
+	user = u64_to_user_ptr(args->value);
+	if (!access_ok(user, size)) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	if (put_user(0, &user->extensions)) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	for (n = 0; n < count; n++) {
+		struct i915_engine_class_instance ci = {
+			.engine_class = I915_ENGINE_CLASS_INVALID,
+			.engine_instance = I915_ENGINE_CLASS_INVALID_NONE,
+		};
+
+		if (e->engines[n]) {
+			ci.engine_class = e->engines[n]->engine->uabi_class;
+			ci.engine_instance = e->engines[n]->engine->instance;
+		}
+
+		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
+			err = -EFAULT;
+			goto err_free;
+		}
+	}
+
+	args->size = size;
+
+err_free:
+	free_engines(e);
+	return err;
+}
+
+static int ctx_setparam(struct drm_i915_file_private *fpriv,
+			struct i915_gem_context *ctx,
+			struct drm_i915_gem_context_param *args)
+{
+	int ret = 0;
+
+	switch (args->param) {
+	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		if (args->size)
+			ret = -EINVAL;
+		else if (args->value)
+			set_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
+		else
+			clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
+		break;
+
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (args->value)
+			i915_gem_context_set_no_error_capture(ctx);
+		else
+			i915_gem_context_clear_no_error_capture(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_BANNABLE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (!capable(CAP_SYS_ADMIN) && !args->value)
+			ret = -EPERM;
+		else if (args->value)
+			i915_gem_context_set_bannable(ctx);
+		else
+			i915_gem_context_clear_bannable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (args->value)
+			i915_gem_context_set_recoverable(ctx);
+		else
+			i915_gem_context_clear_recoverable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		{
+			s64 priority = args->value;
+
+			if (args->size)
+				ret = -EINVAL;
+			else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+				ret = -ENODEV;
+			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
+				ret = -EINVAL;
+			else if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+				 !capable(CAP_SYS_NICE))
+				ret = -EPERM;
+			else
+				ctx->sched.priority =
+					I915_USER_PRIORITY(priority);
+		}
+		break;
+
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = set_sseu(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = set_ppgtt(fpriv, ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+struct create_ext {
+	struct i915_gem_context *ctx;
+	struct drm_i915_file_private *fpriv;
+};
+
+static int create_setparam(struct i915_user_extension __user *ext, void *data)
+{
+	struct drm_i915_gem_context_create_ext_setparam local;
+	const struct create_ext *arg = data;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	if (local.param.ctx_id)
+		return -EINVAL;
+
+	return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
+}
+
+static int clone_engines(struct i915_gem_context *dst,
+			 struct i915_gem_context *src)
+{
+	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
+	struct i915_gem_engines *clone;
+	bool user_engines;
+	unsigned long n;
+
+	clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL);
+	if (!clone)
+		goto err_unlock;
+
+	init_rcu_head(&clone->rcu);
+	for (n = 0; n < e->num_engines; n++) {
+		struct intel_engine_cs *engine;
+
+		if (!e->engines[n]) {
+			clone->engines[n] = NULL;
+			continue;
+		}
+		engine = e->engines[n]->engine;
+
+		/*
+		 * Virtual engines are singletons; they can only exist
+		 * inside a single context, because they embed their
+		 * HW context... As each virtual context implies a single
+		 * timeline (each engine can only dequeue a single request
+		 * at any time), it would be surprising for two contexts
+		 * to use the same engine. So let's create a copy of
+		 * the virtual engine instead.
+		 */
+		if (intel_engine_is_virtual(engine))
+			clone->engines[n] =
+				intel_execlists_clone_virtual(dst, engine);
+		else
+			clone->engines[n] = intel_context_create(dst, engine);
+		if (IS_ERR_OR_NULL(clone->engines[n])) {
+			__free_engines(clone, n);
+			goto err_unlock;
+		}
+	}
+	clone->num_engines = n;
+
+	user_engines = i915_gem_context_user_engines(src);
+	i915_gem_context_unlock_engines(src);
+
+	free_engines(dst->engines);
+	RCU_INIT_POINTER(dst->engines, clone);
+	if (user_engines)
+		i915_gem_context_set_user_engines(dst);
+	else
+		i915_gem_context_clear_user_engines(dst);
+	return 0;
+
+err_unlock:
+	i915_gem_context_unlock_engines(src);
+	return -ENOMEM;
+}
+
+static int clone_flags(struct i915_gem_context *dst,
+		       struct i915_gem_context *src)
+{
+	dst->user_flags = src->user_flags;
+	return 0;
+}
+
+static int clone_schedattr(struct i915_gem_context *dst,
+			   struct i915_gem_context *src)
+{
+	dst->sched = src->sched;
+	return 0;
+}
+
+static int clone_sseu(struct i915_gem_context *dst,
+		      struct i915_gem_context *src)
+{
+	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
+	struct i915_gem_engines *clone;
+	unsigned long n;
+	int err;
+
+	clone = dst->engines; /* no locking required; sole access */
+	if (e->num_engines != clone->num_engines) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	for (n = 0; n < e->num_engines; n++) {
+		struct intel_context *ce = e->engines[n];
+
+		if (clone->engines[n]->engine->class != ce->engine->class) {
+			/* Must have compatible engine maps! */
+			err = -EINVAL;
+			goto unlock;
+		}
+
+		/* serialises with set_sseu */
+		err = intel_context_lock_pinned(ce);
+		if (err)
+			goto unlock;
+
+		clone->engines[n]->sseu = ce->sseu;
+		intel_context_unlock_pinned(ce);
+	}
+
+	err = 0;
+unlock:
+	i915_gem_context_unlock_engines(src);
+	return err;
+}
+
+static int clone_timeline(struct i915_gem_context *dst,
+			  struct i915_gem_context *src)
+{
+	if (src->timeline) {
+		GEM_BUG_ON(src->timeline == dst->timeline);
+
+		if (dst->timeline)
+			i915_timeline_put(dst->timeline);
+		dst->timeline = i915_timeline_get(src->timeline);
+	}
+
+	return 0;
+}
+
+static int clone_vm(struct i915_gem_context *dst,
+		    struct i915_gem_context *src)
+{
+	struct i915_address_space *vm;
+
+	rcu_read_lock();
+	do {
+		vm = READ_ONCE(src->vm);
+		if (!vm)
+			break;
+
+		if (!kref_get_unless_zero(&vm->ref))
+			continue;
+
+		/*
+		 * This ppgtt may have be reallocated between
+		 * the read and the kref, and reassigned to a third
+		 * context. In order to avoid inadvertent sharing
+		 * of this ppgtt with that third context (and not
+		 * src), we have to confirm that we have the same
+		 * ppgtt after passing through the strong memory
+		 * barrier implied by a successful
+		 * kref_get_unless_zero().
+		 *
+		 * Once we have acquired the current ppgtt of src,
+		 * we no longer care if it is released from src, as
+		 * it cannot be reallocated elsewhere.
+		 */
+
+		if (vm == READ_ONCE(src->vm))
+			break;
+
+		i915_vm_put(vm);
+	} while (1);
+	rcu_read_unlock();
+
+	if (vm) {
+		__assign_ppgtt(dst, vm);
+		i915_vm_put(vm);
+	}
+
+	return 0;
+}
+
+static int create_clone(struct i915_user_extension __user *ext, void *data)
+{
+	static int (* const fn[])(struct i915_gem_context *dst,
+				  struct i915_gem_context *src) = {
+#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
+		MAP(ENGINES, clone_engines),
+		MAP(FLAGS, clone_flags),
+		MAP(SCHEDATTR, clone_schedattr),
+		MAP(SSEU, clone_sseu),
+		MAP(TIMELINE, clone_timeline),
+		MAP(VM, clone_vm),
+#undef MAP
+	};
+	struct drm_i915_gem_context_create_ext_clone local;
+	const struct create_ext *arg = data;
+	struct i915_gem_context *dst = arg->ctx;
+	struct i915_gem_context *src;
+	int err, bit;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
+		     I915_CONTEXT_CLONE_UNKNOWN);
+
+	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
+		return -EINVAL;
+
+	if (local.rsvd)
+		return -EINVAL;
+
+	rcu_read_lock();
+	src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
+	rcu_read_unlock();
+	if (!src)
+		return -ENOENT;
+
+	GEM_BUG_ON(src == dst);
+
+	for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
+		if (!(local.flags & BIT(bit)))
+			continue;
+
+		err = fn[bit](dst, src);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static const i915_user_extension_fn create_extensions[] = {
+	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
+};
+
+static bool client_is_banned(struct drm_i915_file_private *file_priv)
+{
+	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
+}
+
+int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_context_create_ext *args = data;
+	struct create_ext ext_data;
+	int ret;
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return -ENODEV;
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
+		return -EINVAL;
+
+	ret = i915_terminally_wedged(i915);
+	if (ret)
+		return ret;
+
+	ext_data.fpriv = file->driver_priv;
+	if (client_is_banned(ext_data.fpriv)) {
+		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
+			  current->comm,
+			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
+		return -EIO;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	ext_data.ctx = i915_gem_create_context(i915, args->flags);
+	mutex_unlock(&dev->struct_mutex);
+	if (IS_ERR(ext_data.ctx))
+		return PTR_ERR(ext_data.ctx);
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) {
+		ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
+					   create_extensions,
+					   ARRAY_SIZE(create_extensions),
+					   &ext_data);
+		if (ret)
+			goto err_ctx;
+	}
+
+	ret = gem_context_register(ext_data.ctx, ext_data.fpriv);
+	if (ret < 0)
+		goto err_ctx;
+
+	args->ctx_id = ret;
+	DRM_DEBUG("HW context %d created\n", args->ctx_id);
+
+	return 0;
+
+err_ctx:
+	context_close(ext_data.ctx);
+	return ret;
+}
+
+int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file)
+{
+	struct drm_i915_gem_context_destroy *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *ctx;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (!args->ctx_id)
+		return -ENOENT;
+
+	if (mutex_lock_interruptible(&file_priv->context_idr_lock))
+		return -EINTR;
+
+	ctx = idr_remove(&file_priv->context_idr, args->ctx_id);
+	mutex_unlock(&file_priv->context_idr_lock);
+	if (!ctx)
+		return -ENOENT;
+
+	context_close(ctx);
+	return 0;
+}
+
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_context *ce;
+	unsigned long lookup;
+	int err;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd)
+		return -EINVAL;
+
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	ce = lookup_user_engine(ctx, lookup, &user_sseu.engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_lock_pinned(ce); /* serialises with set_sseu */
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	intel_context_unlock_pinned(ce);
+	intel_context_put(ce);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
+int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	int ret = 0;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	switch (args->param) {
+	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
+		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
+		break;
+
+	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+		if (ctx->vm)
+			args->value = ctx->vm->total;
+		else if (to_i915(dev)->mm.aliasing_ppgtt)
+			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+		else
+			args->value = to_i915(dev)->ggtt.vm.total;
+		break;
+
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
+		args->value = i915_gem_context_no_error_capture(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_bannable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_recoverable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
+		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
+		break;
+
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(file_priv, ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = get_engines(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	i915_gem_context_put(ctx);
+	return ret;
+}
+
+int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	ret = ctx_setparam(file_priv, ctx, args);
+
+	i915_gem_context_put(ctx);
+	return ret;
+}
+
+int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
+				       void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_reset_stats *args = data;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	if (args->flags || args->pad)
+		return -EINVAL;
+
+	ret = -ENOENT;
+	rcu_read_lock();
+	ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id);
+	if (!ctx)
+		goto out;
+
+	/*
+	 * We opt for unserialised reads here. This may result in tearing
+	 * in the extremely unlikely event of a GPU hang on this context
+	 * as we are querying them. If we need that extra layer of protection,
+	 * we should wrap the hangstats with a seqlock.
+	 */
+
+	if (capable(CAP_SYS_ADMIN))
+		args->reset_count = i915_reset_count(&dev_priv->gpu_error);
+	else
+		args->reset_count = 0;
+
+	args->batch_active = atomic_read(&ctx->guilty_count);
+	args->batch_pending = atomic_read(&ctx->active_count);
+
+	ret = 0;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	int err = 0;
+
+	mutex_lock(&i915->contexts.mutex);
+
+	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
+
+	if (list_empty(&ctx->hw_id_link)) {
+		GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count));
+
+		err = assign_hw_id(i915, &ctx->hw_id);
+		if (err)
+			goto out_unlock;
+
+		list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list);
+	}
+
+	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u);
+	atomic_inc(&ctx->hw_id_pin_count);
+
+out_unlock:
+	mutex_unlock(&i915->contexts.mutex);
+	return err;
+}
+
+/* GEM context-engines iterator: for_each_gem_engine() */
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
+{
+	const struct i915_gem_engines *e = it->engines;
+	struct intel_context *ctx;
+
+	do {
+		if (it->idx >= e->num_engines)
+			return NULL;
+
+		ctx = e->engines[it->idx++];
+	} while (!ctx);
+
+	return ctx;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_context.c"
+#include "selftests/i915_gem_context.c"
+#endif
+
+static void i915_global_gem_context_shrink(void)
+{
+	kmem_cache_shrink(global.slab_luts);
+}
+
+static void i915_global_gem_context_exit(void)
+{
+	kmem_cache_destroy(global.slab_luts);
+}
+
+static struct i915_global_gem_context global = { {
+	.shrink = i915_global_gem_context_shrink,
+	.exit = i915_global_gem_context_exit,
+} };
+
+int __init i915_global_gem_context_init(void)
+{
+	global.slab_luts = KMEM_CACHE(i915_lut_handle, 0);
+	if (!global.slab_luts)
+		return -ENOMEM;
+
+	i915_global_register(&global.base);
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
new file mode 100644
index 000000000000..9691dd062f72
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -0,0 +1,239 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CONTEXT_H__
+#define __I915_GEM_CONTEXT_H__
+
+#include "i915_gem_context_types.h"
+
+#include "gt/intel_context.h"
+
+#include "i915_gem.h"
+#include "i915_scheduler.h"
+#include "intel_device_info.h"
+
+struct drm_device;
+struct drm_file;
+
+static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_CLOSED, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_closed(struct i915_gem_context *ctx)
+{
+	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
+	set_bit(CONTEXT_CLOSED, &ctx->flags);
+}
+
+static inline bool i915_gem_context_no_error_capture(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_no_error_capture(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_no_error_capture(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags);
+}
+
+static inline bool i915_gem_context_is_bannable(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_BANNABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_bannable(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_BANNABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_bannable(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_BANNABLE, &ctx->user_flags);
+}
+
+static inline bool i915_gem_context_is_recoverable(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_recoverable(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
+static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_BANNED, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_banned(struct i915_gem_context *ctx)
+{
+	set_bit(CONTEXT_BANNED, &ctx->flags);
+}
+
+static inline bool i915_gem_context_force_single_submission(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_force_single_submission(struct i915_gem_context *ctx)
+{
+	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
+}
+
+static inline bool
+i915_gem_context_user_engines(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_set_user_engines(struct i915_gem_context *ctx)
+{
+	set_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
+{
+	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
+static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
+{
+	if (atomic_inc_not_zero(&ctx->hw_id_pin_count))
+		return 0;
+
+	return __i915_gem_context_pin_hw_id(ctx);
+}
+
+static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx)
+{
+	GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u);
+	atomic_dec(&ctx->hw_id_pin_count);
+}
+
+static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
+{
+	return !ctx->file_priv;
+}
+
+/* i915_gem_context.c */
+int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
+void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
+
+int i915_gem_context_open(struct drm_i915_private *i915,
+			  struct drm_file *file);
+void i915_gem_context_close(struct drm_file *file);
+
+void i915_gem_context_release(struct kref *ctx_ref);
+struct i915_gem_context *
+i915_gem_context_create_gvt(struct drm_device *dev);
+
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+
+int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file);
+int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file);
+int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
+				       struct drm_file *file);
+
+struct i915_gem_context *
+i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
+
+static inline struct i915_gem_context *
+i915_gem_context_get(struct i915_gem_context *ctx)
+{
+	kref_get(&ctx->ref);
+	return ctx;
+}
+
+static inline void i915_gem_context_put(struct i915_gem_context *ctx)
+{
+	kref_put(&ctx->ref, i915_gem_context_release);
+}
+
+static inline struct i915_gem_engines *
+i915_gem_context_engines(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines,
+					 lockdep_is_held(&ctx->engines_mutex));
+}
+
+static inline struct i915_gem_engines *
+i915_gem_context_lock_engines(struct i915_gem_context *ctx)
+	__acquires(&ctx->engines_mutex)
+{
+	mutex_lock(&ctx->engines_mutex);
+	return i915_gem_context_engines(ctx);
+}
+
+static inline void
+i915_gem_context_unlock_engines(struct i915_gem_context *ctx)
+	__releases(&ctx->engines_mutex)
+{
+	mutex_unlock(&ctx->engines_mutex);
+}
+
+static inline struct intel_context *
+i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	return i915_gem_context_engines(ctx)->engines[idx];
+}
+
+static inline struct intel_context *
+i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	struct intel_context *ce = ERR_PTR(-EINVAL);
+
+	rcu_read_lock(); {
+		struct i915_gem_engines *e = rcu_dereference(ctx->engines);
+		if (likely(idx < e->num_engines && e->engines[idx]))
+			ce = intel_context_get(e->engines[idx]);
+	} rcu_read_unlock();
+
+	return ce;
+}
+
+static inline void
+i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
+			   struct i915_gem_engines *engines)
+{
+	GEM_BUG_ON(!engines);
+	it->engines = engines;
+	it->idx = 0;
+}
+
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
+
+#define for_each_gem_engine(ce, engines, it) \
+	for (i915_gem_engines_iter_init(&(it), (engines)); \
+	     ((ce) = i915_gem_engines_iter_next(&(it)));)
+
+struct i915_lut_handle *i915_lut_handle_alloc(void);
+void i915_lut_handle_free(struct i915_lut_handle *lut);
+
+#endif /* !__I915_GEM_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
new file mode 100644
index 000000000000..cc513410eeef
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -0,0 +1,202 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CONTEXT_TYPES_H__
+#define __I915_GEM_CONTEXT_TYPES_H__
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/llist.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+
+#include "gt/intel_context_types.h"
+
+#include "i915_scheduler.h"
+
+struct pid;
+
+struct drm_i915_private;
+struct drm_i915_file_private;
+struct i915_address_space;
+struct i915_timeline;
+struct intel_ring;
+
+struct i915_gem_engines {
+	struct rcu_head rcu;
+	unsigned int num_engines;
+	struct intel_context *engines[];
+};
+
+struct i915_gem_engines_iter {
+	unsigned int idx;
+	const struct i915_gem_engines *engines;
+};
+
+/**
+ * struct i915_gem_context - client state
+ *
+ * The struct i915_gem_context represents the combined view of the driver and
+ * logical hardware state for a particular client.
+ */
+struct i915_gem_context {
+	/** i915: i915 device backpointer */
+	struct drm_i915_private *i915;
+
+	/** file_priv: owning file descriptor */
+	struct drm_i915_file_private *file_priv;
+
+	/**
+	 * @engines: User defined engines for this context
+	 *
+	 * Various uAPI offer the ability to lookup up an
+	 * index from this array to select an engine operate on.
+	 *
+	 * Multiple logically distinct instances of the same engine
+	 * may be defined in the array, as well as composite virtual
+	 * engines.
+	 *
+	 * Execbuf uses the I915_EXEC_RING_MASK as an index into this
+	 * array to select which HW context + engine to execute on. For
+	 * the default array, the user_ring_map[] is used to translate
+	 * the legacy uABI onto the approprate index (e.g. both
+	 * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same
+	 * context, and I915_EXEC_BSD is weird). For a use defined
+	 * array, execbuf uses I915_EXEC_RING_MASK as a plain index.
+	 *
+	 * User defined by I915_CONTEXT_PARAM_ENGINE (when the
+	 * CONTEXT_USER_ENGINES flag is set).
+	 */
+	struct i915_gem_engines __rcu *engines;
+	struct mutex engines_mutex; /* guards writes to engines */
+
+	struct i915_timeline *timeline;
+
+	/**
+	 * @vm: unique address space (GTT)
+	 *
+	 * In full-ppgtt mode, each context has its own address space ensuring
+	 * complete seperation of one client from all others.
+	 *
+	 * In other modes, this is a NULL pointer with the expectation that
+	 * the caller uses the shared global GTT.
+	 */
+	struct i915_address_space *vm;
+
+	/**
+	 * @pid: process id of creator
+	 *
+	 * Note that who created the context may not be the principle user,
+	 * as the context may be shared across a local socket. However,
+	 * that should only affect the default context, all contexts created
+	 * explicitly by the client are expected to be isolated.
+	 */
+	struct pid *pid;
+
+	/**
+	 * @name: arbitrary name
+	 *
+	 * A name is constructed for the context from the creator's process
+	 * name, pid and user handle in order to uniquely identify the
+	 * context in messages.
+	 */
+	const char *name;
+
+	/** link: place with &drm_i915_private.context_list */
+	struct list_head link;
+	struct llist_node free_link;
+
+	/**
+	 * @ref: reference count
+	 *
+	 * A reference to a context is held by both the client who created it
+	 * and on each request submitted to the hardware using the request
+	 * (to ensure the hardware has access to the state until it has
+	 * finished all pending writes). See i915_gem_context_get() and
+	 * i915_gem_context_put() for access.
+	 */
+	struct kref ref;
+
+	/**
+	 * @rcu: rcu_head for deferred freeing.
+	 */
+	struct rcu_head rcu;
+
+	/**
+	 * @user_flags: small set of booleans controlled by the user
+	 */
+	unsigned long user_flags;
+#define UCONTEXT_NO_ZEROMAP		0
+#define UCONTEXT_NO_ERROR_CAPTURE	1
+#define UCONTEXT_BANNABLE		2
+#define UCONTEXT_RECOVERABLE		3
+
+	/**
+	 * @flags: small set of booleans
+	 */
+	unsigned long flags;
+#define CONTEXT_BANNED			0
+#define CONTEXT_CLOSED			1
+#define CONTEXT_FORCE_SINGLE_SUBMISSION	2
+#define CONTEXT_USER_ENGINES		3
+
+	/**
+	 * @hw_id: - unique identifier for the context
+	 *
+	 * The hardware needs to uniquely identify the context for a few
+	 * functions like fault reporting, PASID, scheduling. The
+	 * &drm_i915_private.context_hw_ida is used to assign a unqiue
+	 * id for the lifetime of the context.
+	 *
+	 * @hw_id_pin_count: - number of times this context had been pinned
+	 * for use (should be, at most, once per engine).
+	 *
+	 * @hw_id_link: - all contexts with an assigned id are tracked
+	 * for possible repossession.
+	 */
+	unsigned int hw_id;
+	atomic_t hw_id_pin_count;
+	struct list_head hw_id_link;
+
+	struct mutex mutex;
+
+	struct i915_sched_attr sched;
+
+	/** ring_size: size for allocating the per-engine ring buffer */
+	u32 ring_size;
+	/** desc_template: invariant fields for the HW context descriptor */
+	u32 desc_template;
+
+	/** guilty_count: How many times this context has caused a GPU hang. */
+	atomic_t guilty_count;
+	/**
+	 * @active_count: How many times this context was active during a GPU
+	 * hang, but did not cause it.
+	 */
+	atomic_t active_count;
+
+	/**
+	 * @hang_timestamp: The last time(s) this context caused a GPU hang
+	 */
+	unsigned long hang_timestamp[2];
+#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
+
+	/** remap_slice: Bitmask of cache lines that need remapping */
+	u8 remap_slice;
+
+	/**
+	 * handles_vma: rbtree to look up our context specific obj/vma for
+	 * the user handle. (user handles are per fd, but the binding is
+	 * per vm, which may be one per context or shared with the global GTT)
+	 */
+	struct radix_tree_root handles_vma;
+};
+
+#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
new file mode 100644
index 000000000000..cbf1701d3acc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -0,0 +1,317 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright 2012 Red Hat Inc
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/reservation.h>
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+
+static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
+{
+	return to_intel_bo(buf->priv);
+}
+
+static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
+					     enum dma_data_direction dir)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
+	struct sg_table *st;
+	struct scatterlist *src, *dst;
+	int ret, i;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
+	/* Copy sg so that we make an independent mapping */
+	st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (st == NULL) {
+		ret = -ENOMEM;
+		goto err_unpin_pages;
+	}
+
+	ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
+	if (ret)
+		goto err_free;
+
+	src = obj->mm.pages->sgl;
+	dst = st->sgl;
+	for (i = 0; i < obj->mm.pages->nents; i++) {
+		sg_set_page(dst, sg_page(src), src->length, 0);
+		dst = sg_next(dst);
+		src = sg_next(src);
+	}
+
+	if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
+		ret = -ENOMEM;
+		goto err_free_sg;
+	}
+
+	return st;
+
+err_free_sg:
+	sg_free_table(st);
+err_free:
+	kfree(st);
+err_unpin_pages:
+	i915_gem_object_unpin_pages(obj);
+err:
+	return ERR_PTR(ret);
+}
+
+static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
+				   struct sg_table *sg,
+				   enum dma_data_direction dir)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
+
+	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
+	sg_free_table(sg);
+	kfree(sg);
+
+	i915_gem_object_unpin_pages(obj);
+}
+
+static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+
+	return i915_gem_object_pin_map(obj, I915_MAP_WB);
+}
+
+static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+}
+
+static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	struct page *page;
+
+	if (page_num >= obj->base.size >> PAGE_SHIFT)
+		return NULL;
+
+	if (!i915_gem_object_has_struct_page(obj))
+		return NULL;
+
+	if (i915_gem_object_pin_pages(obj))
+		return NULL;
+
+	/* Synchronisation is left to the caller (via .begin_cpu_access()) */
+	page = i915_gem_object_get_page(obj, page_num);
+	if (IS_ERR(page))
+		goto err_unpin;
+
+	return kmap(page);
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return NULL;
+}
+
+static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+
+	kunmap(virt_to_page(addr));
+	i915_gem_object_unpin_pages(obj);
+}
+
+static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	int ret;
+
+	if (obj->base.size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (!obj->base.filp)
+		return -ENODEV;
+
+	ret = call_mmap(obj->base.filp, vma);
+	if (ret)
+		return ret;
+
+	fput(vma->vm_file);
+	vma->vm_file = get_file(obj->base.filp);
+
+	return 0;
+}
+
+static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
+	int err;
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		return err;
+
+	err = i915_gem_object_lock_interruptible(obj);
+	if (err)
+		goto out;
+
+	err = i915_gem_object_set_to_cpu_domain(obj, write);
+	i915_gem_object_unlock(obj);
+
+out:
+	i915_gem_object_unpin_pages(obj);
+	return err;
+}
+
+static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	int err;
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		return err;
+
+	err = i915_gem_object_lock_interruptible(obj);
+	if (err)
+		goto out;
+
+	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
+
+out:
+	i915_gem_object_unpin_pages(obj);
+	return err;
+}
+
+static const struct dma_buf_ops i915_dmabuf_ops =  {
+	.map_dma_buf = i915_gem_map_dma_buf,
+	.unmap_dma_buf = i915_gem_unmap_dma_buf,
+	.release = drm_gem_dmabuf_release,
+	.map = i915_gem_dmabuf_kmap,
+	.unmap = i915_gem_dmabuf_kunmap,
+	.mmap = i915_gem_dmabuf_mmap,
+	.vmap = i915_gem_dmabuf_vmap,
+	.vunmap = i915_gem_dmabuf_vunmap,
+	.begin_cpu_access = i915_gem_begin_cpu_access,
+	.end_cpu_access = i915_gem_end_cpu_access,
+};
+
+struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *gem_obj, int flags)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+	exp_info.ops = &i915_dmabuf_ops;
+	exp_info.size = gem_obj->size;
+	exp_info.flags = flags;
+	exp_info.priv = gem_obj;
+	exp_info.resv = obj->base.resv;
+
+	if (obj->ops->dmabuf_export) {
+		int ret = obj->ops->dmabuf_export(obj);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	return drm_gem_dmabuf_export(dev, &exp_info);
+}
+
+static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
+{
+	struct sg_table *pages;
+	unsigned int sg_page_sizes;
+
+	pages = dma_buf_map_attachment(obj->base.import_attach,
+				       DMA_BIDIRECTIONAL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+	__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+
+	return 0;
+}
+
+static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
+					     struct sg_table *pages)
+{
+	dma_buf_unmap_attachment(obj->base.import_attach, pages,
+				 DMA_BIDIRECTIONAL);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
+	.get_pages = i915_gem_object_get_pages_dmabuf,
+	.put_pages = i915_gem_object_put_pages_dmabuf,
+};
+
+struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
+					     struct dma_buf *dma_buf)
+{
+	struct dma_buf_attachment *attach;
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &i915_dmabuf_ops) {
+		obj = dma_buf_to_obj(dma_buf);
+		/* is it from our device? */
+		if (obj->base.dev == dev) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			return &i915_gem_object_get(obj)->base;
+		}
+	}
+
+	/* need to attach */
+	attach = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attach))
+		return ERR_CAST(attach);
+
+	get_dma_buf(dma_buf);
+
+	obj = i915_gem_object_alloc();
+	if (obj == NULL) {
+		ret = -ENOMEM;
+		goto fail_detach;
+	}
+
+	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
+	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
+	obj->base.import_attach = attach;
+	obj->base.resv = dma_buf->resv;
+
+	/* We use GTT as shorthand for a coherent domain, one that is
+	 * neither in the GPU cache nor in the CPU cache, where all
+	 * writes are immediately visible in memory. (That's not strictly
+	 * true, but it's close! There are internal buffers such as the
+	 * write-combined buffer or a delay through the chipset for GTT
+	 * writes that do require us to treat GTT as a separate cache domain.)
+	 */
+	obj->read_domains = I915_GEM_DOMAIN_GTT;
+	obj->write_domain = 0;
+
+	return &obj->base;
+
+fail_detach:
+	dma_buf_detach(dma_buf, attach);
+	dma_buf_put(dma_buf);
+
+	return ERR_PTR(ret);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_dmabuf.c"
+#include "selftests/i915_gem_dmabuf.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644
index 000000000000..2e3ce2a69653
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -0,0 +1,796 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "display/intel_frontbuffer.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_vma.h"
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * We manually flush the CPU domain so that we can override and
+	 * force the flush for the display, and perform it asyncrhonously.
+	 */
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	if (obj->cache_dirty)
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+	if (!READ_ONCE(obj->pin_global))
+		return;
+
+	i915_gem_object_lock(obj);
+	__i915_gem_object_flush_for_display(obj);
+	i915_gem_object_unlock(obj);
+}
+
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_WC;
+		obj->write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * GTT domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_GTT;
+		obj->write_domain = I915_GEM_DOMAIN_GTT;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	assert_object_held(obj);
+
+	if (obj->cache_level == cache_level)
+		return 0;
+
+	/* Inspect the list of currently bound VMA and unbind any that would
+	 * be invalid given the new cache-level. This is principally to
+	 * catch the issue of the CS prefetch crossing page boundaries and
+	 * reading an invalid PTE on older architectures.
+	 */
+restart:
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		if (i915_vma_is_pinned(vma)) {
+			DRM_DEBUG("can not change the cache level of pinned objects\n");
+			return -EBUSY;
+		}
+
+		if (!i915_vma_is_closed(vma) &&
+		    i915_gem_valid_gtt_space(vma, cache_level))
+			continue;
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+
+		/* As unbinding may affect other elements in the
+		 * obj->vma_list (due to side-effects from retiring
+		 * an active vma), play safe and restart the iterator.
+		 */
+		goto restart;
+	}
+
+	/* We can reuse the existing drm_mm nodes but need to change the
+	 * cache-level on the PTE. We could simply unbind them all and
+	 * rebind with the correct cache-level on next use. However since
+	 * we already have a valid slot, dma mapping, pages etc, we may as
+	 * rewrite the PTE in the belief that doing so tramples upon less
+	 * state and so involves less work.
+	 */
+	if (atomic_read(&obj->bind_count)) {
+		/* Before we change the PTE, the GPU must not be accessing it.
+		 * If we wait upon the object, we know that all the bound
+		 * VMA are no longer active.
+		 */
+		ret = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (ret)
+			return ret;
+
+		if (!HAS_LLC(to_i915(obj->base.dev)) &&
+		    cache_level != I915_CACHE_NONE) {
+			/* Access to snoopable pages through the GTT is
+			 * incoherent and on some machines causes a hard
+			 * lockup. Relinquish the CPU mmaping to force
+			 * userspace to refault in the pages and we can
+			 * then double check if the GTT mapping is still
+			 * valid for that pointer access.
+			 */
+			i915_gem_object_release_mmap(obj);
+
+			/* As we no longer need a fence for GTT access,
+			 * we can relinquish it now (and so prevent having
+			 * to steal a fence from someone else on the next
+			 * fence request). Note GPU activity would have
+			 * dropped the fence as all snoopable access is
+			 * supposed to be linear.
+			 */
+			for_each_ggtt_vma(vma, obj) {
+				ret = i915_vma_put_fence(vma);
+				if (ret)
+					return ret;
+			}
+		} else {
+			/* We either have incoherent backing store and
+			 * so no GTT access or the architecture is fully
+			 * coherent. In such cases, existing GTT mmaps
+			 * ignore the cache bit in the PTE and we can
+			 * rewrite it without confusing the GPU or having
+			 * to force userspace to fault back in its mmaps.
+			 */
+		}
+
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
+			if (!drm_mm_node_allocated(&vma->node))
+				continue;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			if (ret)
+				return ret;
+		}
+	}
+
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
+		vma->node.color = cache_level;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+	obj->cache_dirty = true; /* Always invalidate stale cachelines */
+
+	return 0;
+}
+
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	int err = 0;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	switch (obj->cache_level) {
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		args->caching = I915_CACHING_CACHED;
+		break;
+
+	case I915_CACHE_WT:
+		args->caching = I915_CACHING_DISPLAY;
+		break;
+
+	default:
+		args->caching = I915_CACHING_NONE;
+		break;
+	}
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	enum i915_cache_level level;
+	int ret = 0;
+
+	switch (args->caching) {
+	case I915_CACHING_NONE:
+		level = I915_CACHE_NONE;
+		break;
+	case I915_CACHING_CACHED:
+		/*
+		 * Due to a HW issue on BXT A stepping, GPU stores via a
+		 * snooped mapping may leave stale data in a corresponding CPU
+		 * cacheline, whereas normally such cachelines would get
+		 * invalidated.
+		 */
+		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
+			return -ENODEV;
+
+		level = I915_CACHE_LLC;
+		break;
+	case I915_CACHING_DISPLAY:
+		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * The caching mode of proxy object is handled by its generator, and
+	 * not allowed to be changed by userspace.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (obj->cache_level == level)
+		goto out;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto out;
+
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	if (ret)
+		goto out;
+
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret == 0) {
+		ret = i915_gem_object_set_cache_level(obj, level);
+		i915_gem_object_unlock(obj);
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
+
+out:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/*
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
+ */
+struct i915_vma *
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	assert_object_held(obj);
+
+	/* Mark the global pin early so that we account for the
+	 * display coherency whilst setting up the cache domains.
+	 */
+	obj->pin_global++;
+
+	/* The display engine is not coherent with the LLC cache on gen6.  As
+	 * a result, we make sure that the pinning that is about to occur is
+	 * done with uncached PTEs. This is lowest common denominator for all
+	 * chipsets.
+	 *
+	 * However for gen6+, we could do better by using the GFDT bit instead
+	 * of uncaching, which would allow us to flush all the LLC-cached data
+	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+	 */
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(to_i915(obj->base.dev)) ?
+					      I915_CACHE_WT : I915_CACHE_NONE);
+	if (ret) {
+		vma = ERR_PTR(ret);
+		goto err_unpin_global;
+	}
+
+	/* As the user may map the buffer once pinned in the display plane
+	 * (e.g. libkms for the bootup splash), we have to ensure that we
+	 * always use map_and_fenceable for all scanout buffers. However,
+	 * it may simply be too big to fit into mappable, in which case
+	 * put it anyway and hope that userspace can cope (but always first
+	 * try to preserve the existing ABI).
+	 */
+	vma = ERR_PTR(-ENOSPC);
+	if ((flags & PIN_MAPPABLE) == 0 &&
+	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+					       flags |
+					       PIN_MAPPABLE |
+					       PIN_NONBLOCK);
+	if (IS_ERR(vma))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+	if (IS_ERR(vma))
+		goto err_unpin_global;
+
+	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+	__i915_gem_object_flush_for_display(obj);
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+
+	return vma;
+
+err_unpin_global:
+	obj->pin_global--;
+	return vma;
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	mutex_lock(&i915->ggtt.vm.mutex);
+	for_each_ggtt_vma(vma, obj) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
+
+	if (i915_gem_object_is_shrinkable(obj)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+
+		if (obj->mm.madv == I915_MADV_WILLNEED)
+			list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	assert_object_held(obj);
+
+	if (WARN_ON(obj->pin_global == 0))
+		return;
+
+	if (--obj->pin_global == 0)
+		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+
+	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
+	i915_gem_object_bump_inactive_ggtt(obj);
+
+	i915_vma_unpin(vma);
+}
+
+/**
+ * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* Flush the CPU cache if it's still invalid. */
+	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		obj->read_domains |= I915_GEM_DOMAIN_CPU;
+	}
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're writing through the CPU, then the GPU read domains will
+	 * need to be invalidated at next use.
+	 */
+	if (write)
+		__start_cpu_write(obj);
+
+	return 0;
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+/**
+ * Called when user space prepares to use an object with the CPU, either
+ * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ */
+int
+i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_set_domain *args = data;
+	struct drm_i915_gem_object *obj;
+	u32 read_domains = args->read_domains;
+	u32 write_domain = args->write_domain;
+	int err;
+
+	/* Only handle setting domains to types used by the CPU. */
+	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
+		return -EINVAL;
+
+	/*
+	 * Having something in the write domain implies it's in the read
+	 * domain, and only that read domain.  Enforce that in the request.
+	 */
+	if (write_domain && read_domains != write_domain)
+		return -EINVAL;
+
+	if (!read_domains)
+		return 0;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * Already in the desired write domain? Nothing for us to do!
+	 *
+	 * We apply a little bit of cunning here to catch a broader set of
+	 * no-ops. If obj->write_domain is set, we must be in the same
+	 * obj->read_domains, and only that domain. Therefore, if that
+	 * obj->write_domain matches the request read_domains, we are
+	 * already in the same read/write domain and can skip the operation,
+	 * without having to further check the requested write_domain.
+	 */
+	if (READ_ONCE(obj->write_domain) == read_domains) {
+		err = 0;
+		goto out;
+	}
+
+	/*
+	 * Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   (write_domain ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out;
+
+	/*
+	 * Proxy objects do not control access to the backing storage, ergo
+	 * they cannot be used as a means to manipulate the cache domain
+	 * tracking for that backing storage. The proxy object is always
+	 * considered to be outside of any cache domain.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	/*
+	 * Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out;
+
+	err = i915_gem_object_lock_interruptible(obj);
+	if (err)
+		goto out_unpin;
+
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+	else
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+
+	/* And bump the LRU for this access */
+	i915_gem_object_bump_inactive_ggtt(obj);
+
+	i915_gem_object_unlock(obj);
+
+	if (write_domain != 0)
+		intel_fb_obj_invalidate(obj,
+					fb_write_origin(obj, write_domain));
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush)
+{
+	int ret;
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto err_unlock;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err_unlock;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, false);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu read domain, set ourself into the gtt
+	 * read domain and manually flush cachelines (if required). This
+	 * optimizes for the case when the gpu will dirty the data
+	 * anyway again before the next pread happens.
+	 */
+	if (!obj->cache_dirty &&
+	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
+		*needs_clflush = CLFLUSH_BEFORE;
+
+out:
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
+	return ret;
+}
+
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush)
+{
+	int ret;
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto err_unlock;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err_unlock;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu write domain, set ourself into the
+	 * gtt write domain and manually flush cachelines (as required).
+	 * This optimizes for the case when the gpu will use the data
+	 * right away and we therefore have to clflush anyway.
+	 */
+	if (!obj->cache_dirty) {
+		*needs_clflush |= CLFLUSH_AFTER;
+
+		/*
+		 * Same trick applies to invalidate partially written
+		 * cachelines read before writing.
+		 */
+		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
+			*needs_clflush |= CLFLUSH_BEFORE;
+	}
+
+out:
+	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	obj->mm.dirty = true;
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
new file mode 100644
index 000000000000..5fae0e50aad0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -0,0 +1,2823 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008,2010 Intel Corporation
+ */
+
+#include <linux/intel-iommu.h>
+#include <linux/reservation.h>
+#include <linux/sync_file.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_syncobj.h>
+#include <drm/i915_drm.h>
+
+#include "display/intel_frontbuffer.h"
+
+#include "gem/i915_gem_ioctls.h"
+#include "gt/intel_context.h"
+#include "gt/intel_gt_pm.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_context.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+
+enum {
+	FORCE_CPU_RELOC = 1,
+	FORCE_GTT_RELOC,
+	FORCE_GPU_RELOC,
+#define DBG_FORCE_RELOC 0 /* choose one of the above! */
+};
+
+#define __EXEC_OBJECT_HAS_REF		BIT(31)
+#define __EXEC_OBJECT_HAS_PIN		BIT(30)
+#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
+#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
+#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
+#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
+#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
+
+#define __EXEC_HAS_RELOC	BIT(31)
+#define __EXEC_VALIDATED	BIT(30)
+#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
+#define UPDATE			PIN_OFFSET_FIXED
+
+#define BATCH_OFFSET_BIAS (256*1024)
+
+#define __I915_EXEC_ILLEGAL_FLAGS \
+	(__I915_EXEC_UNKNOWN_FLAGS | \
+	 I915_EXEC_CONSTANTS_MASK  | \
+	 I915_EXEC_RESOURCE_STREAMER)
+
+/* Catch emission of unexpected errors for CI! */
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#undef EINVAL
+#define EINVAL ({ \
+	DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
+	22; \
+})
+#endif
+
+/**
+ * DOC: User command execution
+ *
+ * Userspace submits commands to be executed on the GPU as an instruction
+ * stream within a GEM object we call a batchbuffer. This instructions may
+ * refer to other GEM objects containing auxiliary state such as kernels,
+ * samplers, render targets and even secondary batchbuffers. Userspace does
+ * not know where in the GPU memory these objects reside and so before the
+ * batchbuffer is passed to the GPU for execution, those addresses in the
+ * batchbuffer and auxiliary objects are updated. This is known as relocation,
+ * or patching. To try and avoid having to relocate each object on the next
+ * execution, userspace is told the location of those objects in this pass,
+ * but this remains just a hint as the kernel may choose a new location for
+ * any object in the future.
+ *
+ * At the level of talking to the hardware, submitting a batchbuffer for the
+ * GPU to execute is to add content to a buffer from which the HW
+ * command streamer is reading.
+ *
+ * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
+ *    Execlists, this command is not placed on the same buffer as the
+ *    remaining items.
+ *
+ * 2. Add a command to invalidate caches to the buffer.
+ *
+ * 3. Add a batchbuffer start command to the buffer; the start command is
+ *    essentially a token together with the GPU address of the batchbuffer
+ *    to be executed.
+ *
+ * 4. Add a pipeline flush to the buffer.
+ *
+ * 5. Add a memory write command to the buffer to record when the GPU
+ *    is done executing the batchbuffer. The memory write writes the
+ *    global sequence number of the request, ``i915_request::global_seqno``;
+ *    the i915 driver uses the current value in the register to determine
+ *    if the GPU has completed the batchbuffer.
+ *
+ * 6. Add a user interrupt command to the buffer. This command instructs
+ *    the GPU to issue an interrupt when the command, pipeline flush and
+ *    memory write are completed.
+ *
+ * 7. Inform the hardware of the additional commands added to the buffer
+ *    (by updating the tail pointer).
+ *
+ * Processing an execbuf ioctl is conceptually split up into a few phases.
+ *
+ * 1. Validation - Ensure all the pointers, handles and flags are valid.
+ * 2. Reservation - Assign GPU address space for every object
+ * 3. Relocation - Update any addresses to point to the final locations
+ * 4. Serialisation - Order the request with respect to its dependencies
+ * 5. Construction - Construct a request to execute the batchbuffer
+ * 6. Submission (at some point in the future execution)
+ *
+ * Reserving resources for the execbuf is the most complicated phase. We
+ * neither want to have to migrate the object in the address space, nor do
+ * we want to have to update any relocations pointing to this object. Ideally,
+ * we want to leave the object where it is and for all the existing relocations
+ * to match. If the object is given a new address, or if userspace thinks the
+ * object is elsewhere, we have to parse all the relocation entries and update
+ * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
+ * all the target addresses in all of its objects match the value in the
+ * relocation entries and that they all match the presumed offsets given by the
+ * list of execbuffer objects. Using this knowledge, we know that if we haven't
+ * moved any buffers, all the relocation entries are valid and we can skip
+ * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
+ * hang.) The requirement for using I915_EXEC_NO_RELOC are:
+ *
+ *      The addresses written in the objects must match the corresponding
+ *      reloc.presumed_offset which in turn must match the corresponding
+ *      execobject.offset.
+ *
+ *      Any render targets written to in the batch must be flagged with
+ *      EXEC_OBJECT_WRITE.
+ *
+ *      To avoid stalling, execobject.offset should match the current
+ *      address of that object within the active context.
+ *
+ * The reservation is done is multiple phases. First we try and keep any
+ * object already bound in its current location - so as long as meets the
+ * constraints imposed by the new execbuffer. Any object left unbound after the
+ * first pass is then fitted into any available idle space. If an object does
+ * not fit, all objects are removed from the reservation and the process rerun
+ * after sorting the objects into a priority order (more difficult to fit
+ * objects are tried first). Failing that, the entire VM is cleared and we try
+ * to fit the execbuf once last time before concluding that it simply will not
+ * fit.
+ *
+ * A small complication to all of this is that we allow userspace not only to
+ * specify an alignment and a size for the object in the address space, but
+ * we also allow userspace to specify the exact offset. This objects are
+ * simpler to place (the location is known a priori) all we have to do is make
+ * sure the space is available.
+ *
+ * Once all the objects are in place, patching up the buried pointers to point
+ * to the final locations is a fairly simple job of walking over the relocation
+ * entry arrays, looking up the right address and rewriting the value into
+ * the object. Simple! ... The relocation entries are stored in user memory
+ * and so to access them we have to copy them into a local buffer. That copy
+ * has to avoid taking any pagefaults as they may lead back to a GEM object
+ * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
+ * the relocation into multiple passes. First we try to do everything within an
+ * atomic context (avoid the pagefaults) which requires that we never wait. If
+ * we detect that we may wait, or if we need to fault, then we have to fallback
+ * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
+ * bells yet?) Dropping the mutex means that we lose all the state we have
+ * built up so far for the execbuf and we must reset any global data. However,
+ * we do leave the objects pinned in their final locations - which is a
+ * potential issue for concurrent execbufs. Once we have left the mutex, we can
+ * allocate and copy all the relocation entries into a large array at our
+ * leisure, reacquire the mutex, reclaim all the objects and other state and
+ * then proceed to update any incorrect addresses with the objects.
+ *
+ * As we process the relocation entries, we maintain a record of whether the
+ * object is being written to. Using NORELOC, we expect userspace to provide
+ * this information instead. We also check whether we can skip the relocation
+ * by comparing the expected value inside the relocation entry with the target's
+ * final address. If they differ, we have to map the current object and rewrite
+ * the 4 or 8 byte pointer within.
+ *
+ * Serialising an execbuf is quite simple according to the rules of the GEM
+ * ABI. Execution within each context is ordered by the order of submission.
+ * Writes to any GEM object are in order of submission and are exclusive. Reads
+ * from a GEM object are unordered with respect to other reads, but ordered by
+ * writes. A write submitted after a read cannot occur before the read, and
+ * similarly any read submitted after a write cannot occur before the write.
+ * Writes are ordered between engines such that only one write occurs at any
+ * time (completing any reads beforehand) - using semaphores where available
+ * and CPU serialisation otherwise. Other GEM access obey the same rules, any
+ * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
+ * reads before starting, and any read (either using set-domain or pread) must
+ * flush all GPU writes before starting. (Note we only employ a barrier before,
+ * we currently rely on userspace not concurrently starting a new execution
+ * whilst reading or writing to an object. This may be an advantage or not
+ * depending on how much you trust userspace not to shoot themselves in the
+ * foot.) Serialisation may just result in the request being inserted into
+ * a DAG awaiting its turn, but most simple is to wait on the CPU until
+ * all dependencies are resolved.
+ *
+ * After all of that, is just a matter of closing the request and handing it to
+ * the hardware (well, leaving it in a queue to be executed). However, we also
+ * offer the ability for batchbuffers to be run with elevated privileges so
+ * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
+ * Before any batch is given extra privileges we first must check that it
+ * contains no nefarious instructions, we check that each instruction is from
+ * our whitelist and all registers are also from an allowed list. We first
+ * copy the user's batchbuffer to a shadow (so that the user doesn't have
+ * access to it, either by the CPU or GPU as we scan it) and then parse each
+ * instruction. If everything is ok, we set a flag telling the hardware to run
+ * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
+ */
+
+struct i915_execbuffer {
+	struct drm_i915_private *i915; /** i915 backpointer */
+	struct drm_file *file; /** per-file lookup tables and limits */
+	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
+	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
+	struct i915_vma **vma;
+	unsigned int *flags;
+
+	struct intel_engine_cs *engine; /** engine to queue the request to */
+	struct intel_context *context; /* logical state for the request */
+	struct i915_gem_context *gem_context; /** caller's context */
+	struct i915_address_space *vm; /** GTT and vma for the request */
+
+	struct i915_request *request; /** our request to build */
+	struct i915_vma *batch; /** identity of the batch obj/vma */
+
+	/** actual size of execobj[] as we may extend it for the cmdparser */
+	unsigned int buffer_count;
+
+	/** list of vma not yet bound during reservation phase */
+	struct list_head unbound;
+
+	/** list of vma that have execobj.relocation_count */
+	struct list_head relocs;
+
+	/**
+	 * Track the most recently used object for relocations, as we
+	 * frequently have to perform multiple relocations within the same
+	 * obj/page
+	 */
+	struct reloc_cache {
+		struct drm_mm_node node; /** temporary GTT binding */
+		unsigned long vaddr; /** Current kmap address */
+		unsigned long page; /** Currently mapped page index */
+		unsigned int gen; /** Cached value of INTEL_GEN */
+		bool use_64bit_reloc : 1;
+		bool has_llc : 1;
+		bool has_fence : 1;
+		bool needs_unfenced : 1;
+
+		struct i915_request *rq;
+		u32 *rq_cmd;
+		unsigned int rq_size;
+	} reloc_cache;
+
+	u64 invalid_flags; /** Set of execobj.flags that are invalid */
+	u32 context_flags; /** Set of execobj.flags to insert from the ctx */
+
+	u32 batch_start_offset; /** Location within object of batch */
+	u32 batch_len; /** Length of batch within object */
+	u32 batch_flags; /** Flags composed for emit_bb_start() */
+
+	/**
+	 * Indicate either the size of the hastable used to resolve
+	 * relocation handles, or if negative that we are using a direct
+	 * index into the execobj[].
+	 */
+	int lut_size;
+	struct hlist_head *buckets; /** ht for relocation handles */
+};
+
+#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
+
+/*
+ * Used to convert any address to canonical form.
+ * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
+ * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
+ * addresses to be in a canonical form:
+ * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
+ * canonical form [63:48] == [47]."
+ */
+#define GEN8_HIGH_ADDRESS_BIT 47
+static inline u64 gen8_canonical_addr(u64 address)
+{
+	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
+}
+
+static inline u64 gen8_noncanonical_addr(u64 address)
+{
+	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
+}
+
+static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
+{
+	return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
+}
+
+static int eb_create(struct i915_execbuffer *eb)
+{
+	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
+		unsigned int size = 1 + ilog2(eb->buffer_count);
+
+		/*
+		 * Without a 1:1 association between relocation handles and
+		 * the execobject[] index, we instead create a hashtable.
+		 * We size it dynamically based on available memory, starting
+		 * first with 1:1 assocative hash and scaling back until
+		 * the allocation succeeds.
+		 *
+		 * Later on we use a positive lut_size to indicate we are
+		 * using this hashtable, and a negative value to indicate a
+		 * direct lookup.
+		 */
+		do {
+			gfp_t flags;
+
+			/* While we can still reduce the allocation size, don't
+			 * raise a warning and allow the allocation to fail.
+			 * On the last pass though, we want to try as hard
+			 * as possible to perform the allocation and warn
+			 * if it fails.
+			 */
+			flags = GFP_KERNEL;
+			if (size > 1)
+				flags |= __GFP_NORETRY | __GFP_NOWARN;
+
+			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
+					      flags);
+			if (eb->buckets)
+				break;
+		} while (--size);
+
+		if (unlikely(!size))
+			return -ENOMEM;
+
+		eb->lut_size = size;
+	} else {
+		eb->lut_size = -eb->buffer_count;
+	}
+
+	return 0;
+}
+
+static bool
+eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
+		 const struct i915_vma *vma,
+		 unsigned int flags)
+{
+	if (vma->node.size < entry->pad_to_size)
+		return true;
+
+	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
+		return true;
+
+	if (flags & EXEC_OBJECT_PINNED &&
+	    vma->node.start != entry->offset)
+		return true;
+
+	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
+	    vma->node.start < BATCH_OFFSET_BIAS)
+		return true;
+
+	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
+	    (vma->node.start + vma->node.size - 1) >> 32)
+		return true;
+
+	if (flags & __EXEC_OBJECT_NEEDS_MAP &&
+	    !i915_vma_is_map_and_fenceable(vma))
+		return true;
+
+	return false;
+}
+
+static inline bool
+eb_pin_vma(struct i915_execbuffer *eb,
+	   const struct drm_i915_gem_exec_object2 *entry,
+	   struct i915_vma *vma)
+{
+	unsigned int exec_flags = *vma->exec_flags;
+	u64 pin_flags;
+
+	if (vma->node.size)
+		pin_flags = vma->node.start;
+	else
+		pin_flags = entry->offset & PIN_OFFSET_MASK;
+
+	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
+		pin_flags |= PIN_GLOBAL;
+
+	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
+		return false;
+
+	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
+		if (unlikely(i915_vma_pin_fence(vma))) {
+			i915_vma_unpin(vma);
+			return false;
+		}
+
+		if (vma->fence)
+			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
+	}
+
+	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+	return !eb_vma_misplaced(entry, vma, exec_flags);
+}
+
+static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
+{
+	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
+
+	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
+		__i915_vma_unpin_fence(vma);
+
+	__i915_vma_unpin(vma);
+}
+
+static inline void
+eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
+{
+	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
+		return;
+
+	__eb_unreserve_vma(vma, *flags);
+	*flags &= ~__EXEC_OBJECT_RESERVED;
+}
+
+static int
+eb_validate_vma(struct i915_execbuffer *eb,
+		struct drm_i915_gem_exec_object2 *entry,
+		struct i915_vma *vma)
+{
+	if (unlikely(entry->flags & eb->invalid_flags))
+		return -EINVAL;
+
+	if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
+		return -EINVAL;
+
+	/*
+	 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
+	 * any non-page-aligned or non-canonical addresses.
+	 */
+	if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
+		     entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
+		return -EINVAL;
+
+	/* pad_to_size was once a reserved field, so sanitize it */
+	if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
+		if (unlikely(offset_in_page(entry->pad_to_size)))
+			return -EINVAL;
+	} else {
+		entry->pad_to_size = 0;
+	}
+
+	if (unlikely(vma->exec_flags)) {
+		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
+			  entry->handle, (int)(entry - eb->exec));
+		return -EINVAL;
+	}
+
+	/*
+	 * From drm_mm perspective address space is continuous,
+	 * so from this point we're always using non-canonical
+	 * form internally.
+	 */
+	entry->offset = gen8_noncanonical_addr(entry->offset);
+
+	if (!eb->reloc_cache.has_fence) {
+		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
+	} else {
+		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
+		     eb->reloc_cache.needs_unfenced) &&
+		    i915_gem_object_is_tiled(vma->obj))
+			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
+	}
+
+	if (!(entry->flags & EXEC_OBJECT_PINNED))
+		entry->flags |= eb->context_flags;
+
+	return 0;
+}
+
+static int
+eb_add_vma(struct i915_execbuffer *eb,
+	   unsigned int i, unsigned batch_idx,
+	   struct i915_vma *vma)
+{
+	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+	int err;
+
+	GEM_BUG_ON(i915_vma_is_closed(vma));
+
+	if (!(eb->args->flags & __EXEC_VALIDATED)) {
+		err = eb_validate_vma(eb, entry, vma);
+		if (unlikely(err))
+			return err;
+	}
+
+	if (eb->lut_size > 0) {
+		vma->exec_handle = entry->handle;
+		hlist_add_head(&vma->exec_node,
+			       &eb->buckets[hash_32(entry->handle,
+						    eb->lut_size)]);
+	}
+
+	if (entry->relocation_count)
+		list_add_tail(&vma->reloc_link, &eb->relocs);
+
+	/*
+	 * Stash a pointer from the vma to execobj, so we can query its flags,
+	 * size, alignment etc as provided by the user. Also we stash a pointer
+	 * to the vma inside the execobj so that we can use a direct lookup
+	 * to find the right target VMA when doing relocations.
+	 */
+	eb->vma[i] = vma;
+	eb->flags[i] = entry->flags;
+	vma->exec_flags = &eb->flags[i];
+
+	/*
+	 * SNA is doing fancy tricks with compressing batch buffers, which leads
+	 * to negative relocation deltas. Usually that works out ok since the
+	 * relocate address is still positive, except when the batch is placed
+	 * very low in the GTT. Ensure this doesn't happen.
+	 *
+	 * Note that actual hangs have only been observed on gen7, but for
+	 * paranoia do it everywhere.
+	 */
+	if (i == batch_idx) {
+		if (entry->relocation_count &&
+		    !(eb->flags[i] & EXEC_OBJECT_PINNED))
+			eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
+		if (eb->reloc_cache.has_fence)
+			eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
+
+		eb->batch = vma;
+	}
+
+	err = 0;
+	if (eb_pin_vma(eb, entry, vma)) {
+		if (entry->offset != vma->node.start) {
+			entry->offset = vma->node.start | UPDATE;
+			eb->args->flags |= __EXEC_HAS_RELOC;
+		}
+	} else {
+		eb_unreserve_vma(vma, vma->exec_flags);
+
+		list_add_tail(&vma->exec_link, &eb->unbound);
+		if (drm_mm_node_allocated(&vma->node))
+			err = i915_vma_unbind(vma);
+		if (unlikely(err))
+			vma->exec_flags = NULL;
+	}
+	return err;
+}
+
+static inline int use_cpu_reloc(const struct reloc_cache *cache,
+				const struct drm_i915_gem_object *obj)
+{
+	if (!i915_gem_object_has_struct_page(obj))
+		return false;
+
+	if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
+		return true;
+
+	if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
+		return false;
+
+	return (cache->has_llc ||
+		obj->cache_dirty ||
+		obj->cache_level != I915_CACHE_NONE);
+}
+
+static int eb_reserve_vma(const struct i915_execbuffer *eb,
+			  struct i915_vma *vma)
+{
+	struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
+	unsigned int exec_flags = *vma->exec_flags;
+	u64 pin_flags;
+	int err;
+
+	pin_flags = PIN_USER | PIN_NONBLOCK;
+	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
+		pin_flags |= PIN_GLOBAL;
+
+	/*
+	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+	 * limit address to the first 4GBs for unflagged objects.
+	 */
+	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+		pin_flags |= PIN_ZONE_4G;
+
+	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
+		pin_flags |= PIN_MAPPABLE;
+
+	if (exec_flags & EXEC_OBJECT_PINNED) {
+		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
+		pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
+	} else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
+		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+	}
+
+	err = i915_vma_pin(vma,
+			   entry->pad_to_size, entry->alignment,
+			   pin_flags);
+	if (err)
+		return err;
+
+	if (entry->offset != vma->node.start) {
+		entry->offset = vma->node.start | UPDATE;
+		eb->args->flags |= __EXEC_HAS_RELOC;
+	}
+
+	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
+		err = i915_vma_pin_fence(vma);
+		if (unlikely(err)) {
+			i915_vma_unpin(vma);
+			return err;
+		}
+
+		if (vma->fence)
+			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
+	}
+
+	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
+	GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
+
+	return 0;
+}
+
+static int eb_reserve(struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	struct list_head last;
+	struct i915_vma *vma;
+	unsigned int i, pass;
+	int err;
+
+	/*
+	 * Attempt to pin all of the buffers into the GTT.
+	 * This is done in 3 phases:
+	 *
+	 * 1a. Unbind all objects that do not match the GTT constraints for
+	 *     the execbuffer (fenceable, mappable, alignment etc).
+	 * 1b. Increment pin count for already bound objects.
+	 * 2.  Bind new objects.
+	 * 3.  Decrement pin count.
+	 *
+	 * This avoid unnecessary unbinding of later objects in order to make
+	 * room for the earlier objects *unless* we need to defragment.
+	 */
+
+	pass = 0;
+	err = 0;
+	do {
+		list_for_each_entry(vma, &eb->unbound, exec_link) {
+			err = eb_reserve_vma(eb, vma);
+			if (err)
+				break;
+		}
+		if (err != -ENOSPC)
+			return err;
+
+		/* Resort *all* the objects into priority order */
+		INIT_LIST_HEAD(&eb->unbound);
+		INIT_LIST_HEAD(&last);
+		for (i = 0; i < count; i++) {
+			unsigned int flags = eb->flags[i];
+			struct i915_vma *vma = eb->vma[i];
+
+			if (flags & EXEC_OBJECT_PINNED &&
+			    flags & __EXEC_OBJECT_HAS_PIN)
+				continue;
+
+			eb_unreserve_vma(vma, &eb->flags[i]);
+
+			if (flags & EXEC_OBJECT_PINNED)
+				/* Pinned must have their slot */
+				list_add(&vma->exec_link, &eb->unbound);
+			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
+				/* Map require the lowest 256MiB (aperture) */
+				list_add_tail(&vma->exec_link, &eb->unbound);
+			else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+				/* Prioritise 4GiB region for restricted bo */
+				list_add(&vma->exec_link, &last);
+			else
+				list_add_tail(&vma->exec_link, &last);
+		}
+		list_splice_tail(&last, &eb->unbound);
+
+		switch (pass++) {
+		case 0:
+			break;
+
+		case 1:
+			/* Too fragmented, unbind everything and retry */
+			err = i915_gem_evict_vm(eb->vm);
+			if (err)
+				return err;
+			break;
+
+		default:
+			return -ENOSPC;
+		}
+	} while (1);
+}
+
+static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
+{
+	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
+		return 0;
+	else
+		return eb->buffer_count - 1;
+}
+
+static int eb_select_context(struct i915_execbuffer *eb)
+{
+	struct i915_gem_context *ctx;
+
+	ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
+	if (unlikely(!ctx))
+		return -ENOENT;
+
+	eb->gem_context = ctx;
+	if (ctx->vm) {
+		eb->vm = ctx->vm;
+		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
+	} else {
+		eb->vm = &eb->i915->ggtt.vm;
+	}
+
+	eb->context_flags = 0;
+	if (test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags))
+		eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
+
+	return 0;
+}
+
+static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
+{
+	struct i915_request *rq;
+
+	/*
+	 * Completely unscientific finger-in-the-air estimates for suitable
+	 * maximum user request size (to avoid blocking) and then backoff.
+	 */
+	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+		return NULL;
+
+	/*
+	 * Find a request that after waiting upon, there will be at least half
+	 * the ring available. The hysteresis allows us to compete for the
+	 * shared ring and should mean that we sleep less often prior to
+	 * claiming our resources, but not so long that the ring completely
+	 * drains before we can submit our next request.
+	 */
+	list_for_each_entry(rq, &ring->request_list, ring_link) {
+		if (__intel_ring_space(rq->postfix,
+				       ring->emit, ring->size) > ring->size / 2)
+			break;
+	}
+	if (&rq->ring_link == &ring->request_list)
+		return NULL; /* weird, we will check again later for real */
+
+	return i915_request_get(rq);
+}
+
+static int eb_wait_for_ring(const struct i915_execbuffer *eb)
+{
+	struct i915_request *rq;
+	int ret = 0;
+
+	/*
+	 * Apply a light amount of backpressure to prevent excessive hogs
+	 * from blocking waiting for space whilst holding struct_mutex and
+	 * keeping all of their resources pinned.
+	 */
+
+	rq = __eb_wait_for_ring(eb->context->ring);
+	if (rq) {
+		mutex_unlock(&eb->i915->drm.struct_mutex);
+
+		if (i915_request_wait(rq,
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0)
+			ret = -EINTR;
+
+		i915_request_put(rq);
+
+		mutex_lock(&eb->i915->drm.struct_mutex);
+	}
+
+	return ret;
+}
+
+static int eb_lookup_vmas(struct i915_execbuffer *eb)
+{
+	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
+	struct drm_i915_gem_object *obj;
+	unsigned int i, batch;
+	int err;
+
+	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
+		return -EIO;
+
+	INIT_LIST_HEAD(&eb->relocs);
+	INIT_LIST_HEAD(&eb->unbound);
+
+	batch = eb_batch_index(eb);
+
+	mutex_lock(&eb->gem_context->mutex);
+	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
+		err = -ENOENT;
+		goto err_ctx;
+	}
+
+	for (i = 0; i < eb->buffer_count; i++) {
+		u32 handle = eb->exec[i].handle;
+		struct i915_lut_handle *lut;
+		struct i915_vma *vma;
+
+		vma = radix_tree_lookup(handles_vma, handle);
+		if (likely(vma))
+			goto add_vma;
+
+		obj = i915_gem_object_lookup(eb->file, handle);
+		if (unlikely(!obj)) {
+			err = -ENOENT;
+			goto err_vma;
+		}
+
+		vma = i915_vma_instance(obj, eb->vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto err_obj;
+		}
+
+		lut = i915_lut_handle_alloc();
+		if (unlikely(!lut)) {
+			err = -ENOMEM;
+			goto err_obj;
+		}
+
+		err = radix_tree_insert(handles_vma, handle, vma);
+		if (unlikely(err)) {
+			i915_lut_handle_free(lut);
+			goto err_obj;
+		}
+
+		/* transfer ref to lut */
+		if (!atomic_fetch_inc(&vma->open_count))
+			i915_vma_reopen(vma);
+		lut->handle = handle;
+		lut->ctx = eb->gem_context;
+
+		i915_gem_object_lock(obj);
+		list_add(&lut->obj_link, &obj->lut_list);
+		i915_gem_object_unlock(obj);
+
+add_vma:
+		err = eb_add_vma(eb, i, batch, vma);
+		if (unlikely(err))
+			goto err_vma;
+
+		GEM_BUG_ON(vma != eb->vma[i]);
+		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
+	}
+
+	mutex_unlock(&eb->gem_context->mutex);
+
+	eb->args->flags |= __EXEC_VALIDATED;
+	return eb_reserve(eb);
+
+err_obj:
+	i915_gem_object_put(obj);
+err_vma:
+	eb->vma[i] = NULL;
+err_ctx:
+	mutex_unlock(&eb->gem_context->mutex);
+	return err;
+}
+
+static struct i915_vma *
+eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
+{
+	if (eb->lut_size < 0) {
+		if (handle >= -eb->lut_size)
+			return NULL;
+		return eb->vma[handle];
+	} else {
+		struct hlist_head *head;
+		struct i915_vma *vma;
+
+		head = &eb->buckets[hash_32(handle, eb->lut_size)];
+		hlist_for_each_entry(vma, head, exec_node) {
+			if (vma->exec_handle == handle)
+				return vma;
+		}
+		return NULL;
+	}
+}
+
+static void eb_release_vmas(const struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		struct i915_vma *vma = eb->vma[i];
+		unsigned int flags = eb->flags[i];
+
+		if (!vma)
+			break;
+
+		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+		vma->exec_flags = NULL;
+		eb->vma[i] = NULL;
+
+		if (flags & __EXEC_OBJECT_HAS_PIN)
+			__eb_unreserve_vma(vma, flags);
+
+		if (flags & __EXEC_OBJECT_HAS_REF)
+			i915_vma_put(vma);
+	}
+}
+
+static void eb_reset_vmas(const struct i915_execbuffer *eb)
+{
+	eb_release_vmas(eb);
+	if (eb->lut_size > 0)
+		memset(eb->buckets, 0,
+		       sizeof(struct hlist_head) << eb->lut_size);
+}
+
+static void eb_destroy(const struct i915_execbuffer *eb)
+{
+	GEM_BUG_ON(eb->reloc_cache.rq);
+
+	if (eb->lut_size > 0)
+		kfree(eb->buckets);
+}
+
+static inline u64
+relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
+		  const struct i915_vma *target)
+{
+	return gen8_canonical_addr((int)reloc->delta + target->node.start);
+}
+
+static void reloc_cache_init(struct reloc_cache *cache,
+			     struct drm_i915_private *i915)
+{
+	cache->page = -1;
+	cache->vaddr = 0;
+	/* Must be a variable in the struct to allow GCC to unroll. */
+	cache->gen = INTEL_GEN(i915);
+	cache->has_llc = HAS_LLC(i915);
+	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
+	cache->has_fence = cache->gen < 4;
+	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
+	cache->node.allocated = false;
+	cache->rq = NULL;
+	cache->rq_size = 0;
+}
+
+static inline void *unmask_page(unsigned long p)
+{
+	return (void *)(uintptr_t)(p & PAGE_MASK);
+}
+
+static inline unsigned int unmask_flags(unsigned long p)
+{
+	return p & ~PAGE_MASK;
+}
+
+#define KMAP 0x4 /* after CLFLUSH_FLAGS */
+
+static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
+{
+	struct drm_i915_private *i915 =
+		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
+	return &i915->ggtt;
+}
+
+static void reloc_gpu_flush(struct reloc_cache *cache)
+{
+	GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
+	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
+
+	__i915_gem_object_flush_map(cache->rq->batch->obj, 0, cache->rq_size);
+	i915_gem_object_unpin_map(cache->rq->batch->obj);
+
+	i915_gem_chipset_flush(cache->rq->i915);
+
+	i915_request_add(cache->rq);
+	cache->rq = NULL;
+}
+
+static void reloc_cache_reset(struct reloc_cache *cache)
+{
+	void *vaddr;
+
+	if (cache->rq)
+		reloc_gpu_flush(cache);
+
+	if (!cache->vaddr)
+		return;
+
+	vaddr = unmask_page(cache->vaddr);
+	if (cache->vaddr & KMAP) {
+		if (cache->vaddr & CLFLUSH_AFTER)
+			mb();
+
+		kunmap_atomic(vaddr);
+		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
+	} else {
+		wmb();
+		io_mapping_unmap_atomic((void __iomem *)vaddr);
+		if (cache->node.allocated) {
+			struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+			ggtt->vm.clear_range(&ggtt->vm,
+					     cache->node.start,
+					     cache->node.size);
+			drm_mm_remove_node(&cache->node);
+		} else {
+			i915_vma_unpin((struct i915_vma *)cache->node.mm);
+		}
+	}
+
+	cache->vaddr = 0;
+	cache->page = -1;
+}
+
+static void *reloc_kmap(struct drm_i915_gem_object *obj,
+			struct reloc_cache *cache,
+			unsigned long page)
+{
+	void *vaddr;
+
+	if (cache->vaddr) {
+		kunmap_atomic(unmask_page(cache->vaddr));
+	} else {
+		unsigned int flushes;
+		int err;
+
+		err = i915_gem_object_prepare_write(obj, &flushes);
+		if (err)
+			return ERR_PTR(err);
+
+		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
+		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
+
+		cache->vaddr = flushes | KMAP;
+		cache->node.mm = (void *)obj;
+		if (flushes)
+			mb();
+	}
+
+	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
+	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
+	cache->page = page;
+
+	return vaddr;
+}
+
+static void *reloc_iomap(struct drm_i915_gem_object *obj,
+			 struct reloc_cache *cache,
+			 unsigned long page)
+{
+	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+	unsigned long offset;
+	void *vaddr;
+
+	if (cache->vaddr) {
+		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
+	} else {
+		struct i915_vma *vma;
+		int err;
+
+		if (use_cpu_reloc(cache, obj))
+			return NULL;
+
+		i915_gem_object_lock(obj);
+		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		i915_gem_object_unlock(obj);
+		if (err)
+			return ERR_PTR(err);
+
+		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+					       PIN_MAPPABLE |
+					       PIN_NONBLOCK |
+					       PIN_NONFAULT);
+		if (IS_ERR(vma)) {
+			memset(&cache->node, 0, sizeof(cache->node));
+			err = drm_mm_insert_node_in_range
+				(&ggtt->vm.mm, &cache->node,
+				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
+				 0, ggtt->mappable_end,
+				 DRM_MM_INSERT_LOW);
+			if (err) /* no inactive aperture space, use cpu reloc */
+				return NULL;
+		} else {
+			err = i915_vma_put_fence(vma);
+			if (err) {
+				i915_vma_unpin(vma);
+				return ERR_PTR(err);
+			}
+
+			cache->node.start = vma->node.start;
+			cache->node.mm = (void *)vma;
+		}
+	}
+
+	offset = cache->node.start;
+	if (cache->node.allocated) {
+		wmb();
+		ggtt->vm.insert_page(&ggtt->vm,
+				     i915_gem_object_get_dma_address(obj, page),
+				     offset, I915_CACHE_NONE, 0);
+	} else {
+		offset += page << PAGE_SHIFT;
+	}
+
+	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
+							 offset);
+	cache->page = page;
+	cache->vaddr = (unsigned long)vaddr;
+
+	return vaddr;
+}
+
+static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+			 struct reloc_cache *cache,
+			 unsigned long page)
+{
+	void *vaddr;
+
+	if (cache->page == page) {
+		vaddr = unmask_page(cache->vaddr);
+	} else {
+		vaddr = NULL;
+		if ((cache->vaddr & KMAP) == 0)
+			vaddr = reloc_iomap(obj, cache, page);
+		if (!vaddr)
+			vaddr = reloc_kmap(obj, cache, page);
+	}
+
+	return vaddr;
+}
+
+static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
+{
+	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
+		if (flushes & CLFLUSH_BEFORE) {
+			clflushopt(addr);
+			mb();
+		}
+
+		*addr = value;
+
+		/*
+		 * Writes to the same cacheline are serialised by the CPU
+		 * (including clflush). On the write path, we only require
+		 * that it hits memory in an orderly fashion and place
+		 * mb barriers at the start and end of the relocation phase
+		 * to ensure ordering of clflush wrt to the system.
+		 */
+		if (flushes & CLFLUSH_AFTER)
+			clflushopt(addr);
+	} else
+		*addr = value;
+}
+
+static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	i915_vma_lock(vma);
+
+	if (obj->cache_dirty & ~obj->cache_coherent)
+		i915_gem_clflush_object(obj, 0);
+	obj->write_domain = 0;
+
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+	i915_vma_unlock(vma);
+
+	return err;
+}
+
+static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
+			     struct i915_vma *vma,
+			     unsigned int len)
+{
+	struct reloc_cache *cache = &eb->reloc_cache;
+	struct drm_i915_gem_object *obj;
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	u32 *cmd;
+	int err;
+
+	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	cmd = i915_gem_object_pin_map(obj,
+				      cache->has_llc ?
+				      I915_MAP_FORCE_WB :
+				      I915_MAP_FORCE_WC);
+	i915_gem_object_unpin_pages(obj);
+	if (IS_ERR(cmd))
+		return PTR_ERR(cmd);
+
+	batch = i915_vma_instance(obj, vma->vm, NULL);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto err_unmap;
+	}
+
+	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
+	if (err)
+		goto err_unmap;
+
+	rq = i915_request_create(eb->context);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	err = reloc_move_to_gpu(rq, vma);
+	if (err)
+		goto err_request;
+
+	err = eb->engine->emit_bb_start(rq,
+					batch->node.start, PAGE_SIZE,
+					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
+	if (err)
+		goto skip_request;
+
+	i915_vma_lock(batch);
+	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
+	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
+	if (err)
+		goto skip_request;
+
+	rq->batch = batch;
+	i915_vma_unpin(batch);
+
+	cache->rq = rq;
+	cache->rq_cmd = cmd;
+	cache->rq_size = 0;
+
+	/* Return with batch mapping (cmd) still pinned */
+	return 0;
+
+skip_request:
+	i915_request_skip(rq, err);
+err_request:
+	i915_request_add(rq);
+err_unpin:
+	i915_vma_unpin(batch);
+err_unmap:
+	i915_gem_object_unpin_map(obj);
+	return err;
+}
+
+static u32 *reloc_gpu(struct i915_execbuffer *eb,
+		      struct i915_vma *vma,
+		      unsigned int len)
+{
+	struct reloc_cache *cache = &eb->reloc_cache;
+	u32 *cmd;
+
+	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
+		reloc_gpu_flush(cache);
+
+	if (unlikely(!cache->rq)) {
+		int err;
+
+		/* If we need to copy for the cmdparser, we will stall anyway */
+		if (eb_use_cmdparser(eb))
+			return ERR_PTR(-EWOULDBLOCK);
+
+		if (!intel_engine_can_store_dword(eb->engine))
+			return ERR_PTR(-ENODEV);
+
+		err = __reloc_gpu_alloc(eb, vma, len);
+		if (unlikely(err))
+			return ERR_PTR(err);
+	}
+
+	cmd = cache->rq_cmd + cache->rq_size;
+	cache->rq_size += len;
+
+	return cmd;
+}
+
+static u64
+relocate_entry(struct i915_vma *vma,
+	       const struct drm_i915_gem_relocation_entry *reloc,
+	       struct i915_execbuffer *eb,
+	       const struct i915_vma *target)
+{
+	u64 offset = reloc->offset;
+	u64 target_offset = relocation_target(reloc, target);
+	bool wide = eb->reloc_cache.use_64bit_reloc;
+	void *vaddr;
+
+	if (!eb->reloc_cache.vaddr &&
+	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
+	     !reservation_object_test_signaled_rcu(vma->resv, true))) {
+		const unsigned int gen = eb->reloc_cache.gen;
+		unsigned int len;
+		u32 *batch;
+		u64 addr;
+
+		if (wide)
+			len = offset & 7 ? 8 : 5;
+		else if (gen >= 4)
+			len = 4;
+		else
+			len = 3;
+
+		batch = reloc_gpu(eb, vma, len);
+		if (IS_ERR(batch))
+			goto repeat;
+
+		addr = gen8_canonical_addr(vma->node.start + offset);
+		if (wide) {
+			if (offset & 7) {
+				*batch++ = MI_STORE_DWORD_IMM_GEN4;
+				*batch++ = lower_32_bits(addr);
+				*batch++ = upper_32_bits(addr);
+				*batch++ = lower_32_bits(target_offset);
+
+				addr = gen8_canonical_addr(addr + 4);
+
+				*batch++ = MI_STORE_DWORD_IMM_GEN4;
+				*batch++ = lower_32_bits(addr);
+				*batch++ = upper_32_bits(addr);
+				*batch++ = upper_32_bits(target_offset);
+			} else {
+				*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
+				*batch++ = lower_32_bits(addr);
+				*batch++ = upper_32_bits(addr);
+				*batch++ = lower_32_bits(target_offset);
+				*batch++ = upper_32_bits(target_offset);
+			}
+		} else if (gen >= 6) {
+			*batch++ = MI_STORE_DWORD_IMM_GEN4;
+			*batch++ = 0;
+			*batch++ = addr;
+			*batch++ = target_offset;
+		} else if (gen >= 4) {
+			*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+			*batch++ = 0;
+			*batch++ = addr;
+			*batch++ = target_offset;
+		} else {
+			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+			*batch++ = addr;
+			*batch++ = target_offset;
+		}
+
+		goto out;
+	}
+
+repeat:
+	vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	clflush_write32(vaddr + offset_in_page(offset),
+			lower_32_bits(target_offset),
+			eb->reloc_cache.vaddr);
+
+	if (wide) {
+		offset += sizeof(u32);
+		target_offset >>= 32;
+		wide = false;
+		goto repeat;
+	}
+
+out:
+	return target->node.start | UPDATE;
+}
+
+static u64
+eb_relocate_entry(struct i915_execbuffer *eb,
+		  struct i915_vma *vma,
+		  const struct drm_i915_gem_relocation_entry *reloc)
+{
+	struct i915_vma *target;
+	int err;
+
+	/* we've already hold a reference to all valid objects */
+	target = eb_get_vma(eb, reloc->target_handle);
+	if (unlikely(!target))
+		return -ENOENT;
+
+	/* Validate that the target is in a valid r/w GPU domain */
+	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
+		DRM_DEBUG("reloc with multiple write domains: "
+			  "target %d offset %d "
+			  "read %08x write %08x",
+			  reloc->target_handle,
+			  (int) reloc->offset,
+			  reloc->read_domains,
+			  reloc->write_domain);
+		return -EINVAL;
+	}
+	if (unlikely((reloc->write_domain | reloc->read_domains)
+		     & ~I915_GEM_GPU_DOMAINS)) {
+		DRM_DEBUG("reloc with read/write non-GPU domains: "
+			  "target %d offset %d "
+			  "read %08x write %08x",
+			  reloc->target_handle,
+			  (int) reloc->offset,
+			  reloc->read_domains,
+			  reloc->write_domain);
+		return -EINVAL;
+	}
+
+	if (reloc->write_domain) {
+		*target->exec_flags |= EXEC_OBJECT_WRITE;
+
+		/*
+		 * Sandybridge PPGTT errata: We need a global gtt mapping
+		 * for MI and pipe_control writes because the gpu doesn't
+		 * properly redirect them through the ppgtt for non_secure
+		 * batchbuffers.
+		 */
+		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
+		    IS_GEN(eb->i915, 6)) {
+			err = i915_vma_bind(target, target->obj->cache_level,
+					    PIN_GLOBAL);
+			if (WARN_ONCE(err,
+				      "Unexpected failure to bind target VMA!"))
+				return err;
+		}
+	}
+
+	/*
+	 * If the relocation already has the right value in it, no
+	 * more work needs to be done.
+	 */
+	if (!DBG_FORCE_RELOC &&
+	    gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
+		return 0;
+
+	/* Check that the relocation address is valid... */
+	if (unlikely(reloc->offset >
+		     vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
+		DRM_DEBUG("Relocation beyond object bounds: "
+			  "target %d offset %d size %d.\n",
+			  reloc->target_handle,
+			  (int)reloc->offset,
+			  (int)vma->size);
+		return -EINVAL;
+	}
+	if (unlikely(reloc->offset & 3)) {
+		DRM_DEBUG("Relocation not 4-byte aligned: "
+			  "target %d offset %d.\n",
+			  reloc->target_handle,
+			  (int)reloc->offset);
+		return -EINVAL;
+	}
+
+	/*
+	 * If we write into the object, we need to force the synchronisation
+	 * barrier, either with an asynchronous clflush or if we executed the
+	 * patching using the GPU (though that should be serialised by the
+	 * timeline). To be completely sure, and since we are required to
+	 * do relocations we are already stalling, disable the user's opt
+	 * out of our synchronisation.
+	 */
+	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
+
+	/* and update the user's relocation entry */
+	return relocate_entry(vma, reloc, eb, target);
+}
+
+static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
+{
+#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
+	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
+	struct drm_i915_gem_relocation_entry __user *urelocs;
+	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
+	unsigned int remain;
+
+	urelocs = u64_to_user_ptr(entry->relocs_ptr);
+	remain = entry->relocation_count;
+	if (unlikely(remain > N_RELOC(ULONG_MAX)))
+		return -EINVAL;
+
+	/*
+	 * We must check that the entire relocation array is safe
+	 * to read. However, if the array is not writable the user loses
+	 * the updated relocation values.
+	 */
+	if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs))))
+		return -EFAULT;
+
+	do {
+		struct drm_i915_gem_relocation_entry *r = stack;
+		unsigned int count =
+			min_t(unsigned int, remain, ARRAY_SIZE(stack));
+		unsigned int copied;
+
+		/*
+		 * This is the fast path and we cannot handle a pagefault
+		 * whilst holding the struct mutex lest the user pass in the
+		 * relocations contained within a mmaped bo. For in such a case
+		 * we, the page fault handler would call i915_gem_fault() and
+		 * we would try to acquire the struct mutex again. Obviously
+		 * this is bad and so lockdep complains vehemently.
+		 */
+		pagefault_disable();
+		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
+		pagefault_enable();
+		if (unlikely(copied)) {
+			remain = -EFAULT;
+			goto out;
+		}
+
+		remain -= count;
+		do {
+			u64 offset = eb_relocate_entry(eb, vma, r);
+
+			if (likely(offset == 0)) {
+			} else if ((s64)offset < 0) {
+				remain = (int)offset;
+				goto out;
+			} else {
+				/*
+				 * Note that reporting an error now
+				 * leaves everything in an inconsistent
+				 * state as we have *already* changed
+				 * the relocation value inside the
+				 * object. As we have not changed the
+				 * reloc.presumed_offset or will not
+				 * change the execobject.offset, on the
+				 * call we may not rewrite the value
+				 * inside the object, leaving it
+				 * dangling and causing a GPU hang. Unless
+				 * userspace dynamically rebuilds the
+				 * relocations on each execbuf rather than
+				 * presume a static tree.
+				 *
+				 * We did previously check if the relocations
+				 * were writable (access_ok), an error now
+				 * would be a strange race with mprotect,
+				 * having already demonstrated that we
+				 * can read from this userspace address.
+				 */
+				offset = gen8_canonical_addr(offset & ~UPDATE);
+				if (unlikely(__put_user(offset, &urelocs[r-stack].presumed_offset))) {
+					remain = -EFAULT;
+					goto out;
+				}
+			}
+		} while (r++, --count);
+		urelocs += ARRAY_SIZE(stack);
+	} while (remain);
+out:
+	reloc_cache_reset(&eb->reloc_cache);
+	return remain;
+}
+
+static int
+eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
+{
+	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
+	struct drm_i915_gem_relocation_entry *relocs =
+		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < entry->relocation_count; i++) {
+		u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
+
+		if ((s64)offset < 0) {
+			err = (int)offset;
+			goto err;
+		}
+	}
+	err = 0;
+err:
+	reloc_cache_reset(&eb->reloc_cache);
+	return err;
+}
+
+static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
+{
+	const char __user *addr, *end;
+	unsigned long size;
+	char __maybe_unused c;
+
+	size = entry->relocation_count;
+	if (size == 0)
+		return 0;
+
+	if (size > N_RELOC(ULONG_MAX))
+		return -EINVAL;
+
+	addr = u64_to_user_ptr(entry->relocs_ptr);
+	size *= sizeof(struct drm_i915_gem_relocation_entry);
+	if (!access_ok(addr, size))
+		return -EFAULT;
+
+	end = addr + size;
+	for (; addr < end; addr += PAGE_SIZE) {
+		int err = __get_user(c, addr);
+		if (err)
+			return err;
+	}
+	return __get_user(c, end - 1);
+}
+
+static int eb_copy_relocations(const struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < count; i++) {
+		const unsigned int nreloc = eb->exec[i].relocation_count;
+		struct drm_i915_gem_relocation_entry __user *urelocs;
+		struct drm_i915_gem_relocation_entry *relocs;
+		unsigned long size;
+		unsigned long copied;
+
+		if (nreloc == 0)
+			continue;
+
+		err = check_relocations(&eb->exec[i]);
+		if (err)
+			goto err;
+
+		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+		size = nreloc * sizeof(*relocs);
+
+		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+		if (!relocs) {
+			err = -ENOMEM;
+			goto err;
+		}
+
+		/* copy_from_user is limited to < 4GiB */
+		copied = 0;
+		do {
+			unsigned int len =
+				min_t(u64, BIT_ULL(31), size - copied);
+
+			if (__copy_from_user((char *)relocs + copied,
+					     (char __user *)urelocs + copied,
+					     len)) {
+end_user:
+				user_access_end();
+end:
+				kvfree(relocs);
+				err = -EFAULT;
+				goto err;
+			}
+
+			copied += len;
+		} while (copied < size);
+
+		/*
+		 * As we do not update the known relocation offsets after
+		 * relocating (due to the complexities in lock handling),
+		 * we need to mark them as invalid now so that we force the
+		 * relocation processing next time. Just in case the target
+		 * object is evicted and then rebound into its old
+		 * presumed_offset before the next execbuffer - if that
+		 * happened we would make the mistake of assuming that the
+		 * relocations were valid.
+		 */
+		if (!user_access_begin(urelocs, size))
+			goto end;
+
+		for (copied = 0; copied < nreloc; copied++)
+			unsafe_put_user(-1,
+					&urelocs[copied].presumed_offset,
+					end_user);
+		user_access_end();
+
+		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
+	}
+
+	return 0;
+
+err:
+	while (i--) {
+		struct drm_i915_gem_relocation_entry *relocs =
+			u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
+		if (eb->exec[i].relocation_count)
+			kvfree(relocs);
+	}
+	return err;
+}
+
+static int eb_prefault_relocations(const struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+
+	if (unlikely(i915_modparams.prefault_disable))
+		return 0;
+
+	for (i = 0; i < count; i++) {
+		int err;
+
+		err = check_relocations(&eb->exec[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
+{
+	struct drm_device *dev = &eb->i915->drm;
+	bool have_copy = false;
+	struct i915_vma *vma;
+	int err = 0;
+
+repeat:
+	if (signal_pending(current)) {
+		err = -ERESTARTSYS;
+		goto out;
+	}
+
+	/* We may process another execbuffer during the unlock... */
+	eb_reset_vmas(eb);
+	mutex_unlock(&dev->struct_mutex);
+
+	/*
+	 * We take 3 passes through the slowpatch.
+	 *
+	 * 1 - we try to just prefault all the user relocation entries and
+	 * then attempt to reuse the atomic pagefault disabled fast path again.
+	 *
+	 * 2 - we copy the user entries to a local buffer here outside of the
+	 * local and allow ourselves to wait upon any rendering before
+	 * relocations
+	 *
+	 * 3 - we already have a local copy of the relocation entries, but
+	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
+	 */
+	if (!err) {
+		err = eb_prefault_relocations(eb);
+	} else if (!have_copy) {
+		err = eb_copy_relocations(eb);
+		have_copy = err == 0;
+	} else {
+		cond_resched();
+		err = 0;
+	}
+	if (err) {
+		mutex_lock(&dev->struct_mutex);
+		goto out;
+	}
+
+	/* A frequent cause for EAGAIN are currently unavailable client pages */
+	flush_workqueue(eb->i915->mm.userptr_wq);
+
+	err = i915_mutex_lock_interruptible(dev);
+	if (err) {
+		mutex_lock(&dev->struct_mutex);
+		goto out;
+	}
+
+	/* reacquire the objects */
+	err = eb_lookup_vmas(eb);
+	if (err)
+		goto err;
+
+	GEM_BUG_ON(!eb->batch);
+
+	list_for_each_entry(vma, &eb->relocs, reloc_link) {
+		if (!have_copy) {
+			pagefault_disable();
+			err = eb_relocate_vma(eb, vma);
+			pagefault_enable();
+			if (err)
+				goto repeat;
+		} else {
+			err = eb_relocate_vma_slow(eb, vma);
+			if (err)
+				goto err;
+		}
+	}
+
+	/*
+	 * Leave the user relocations as are, this is the painfully slow path,
+	 * and we want to avoid the complication of dropping the lock whilst
+	 * having buffers reserved in the aperture and so causing spurious
+	 * ENOSPC for random operations.
+	 */
+
+err:
+	if (err == -EAGAIN)
+		goto repeat;
+
+out:
+	if (have_copy) {
+		const unsigned int count = eb->buffer_count;
+		unsigned int i;
+
+		for (i = 0; i < count; i++) {
+			const struct drm_i915_gem_exec_object2 *entry =
+				&eb->exec[i];
+			struct drm_i915_gem_relocation_entry *relocs;
+
+			if (!entry->relocation_count)
+				continue;
+
+			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+			kvfree(relocs);
+		}
+	}
+
+	return err;
+}
+
+static int eb_relocate(struct i915_execbuffer *eb)
+{
+	if (eb_lookup_vmas(eb))
+		goto slow;
+
+	/* The objects are in their final locations, apply the relocations. */
+	if (eb->args->flags & __EXEC_HAS_RELOC) {
+		struct i915_vma *vma;
+
+		list_for_each_entry(vma, &eb->relocs, reloc_link) {
+			if (eb_relocate_vma(eb, vma))
+				goto slow;
+		}
+	}
+
+	return 0;
+
+slow:
+	return eb_relocate_slow(eb);
+}
+
+static int eb_move_to_gpu(struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	struct ww_acquire_ctx acquire;
+	unsigned int i;
+	int err = 0;
+
+	ww_acquire_init(&acquire, &reservation_ww_class);
+
+	for (i = 0; i < count; i++) {
+		struct i915_vma *vma = eb->vma[i];
+
+		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
+		if (!err)
+			continue;
+
+		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
+
+		if (err == -EDEADLK) {
+			GEM_BUG_ON(i == 0);
+			do {
+				int j = i - 1;
+
+				ww_mutex_unlock(&eb->vma[j]->resv->lock);
+
+				swap(eb->flags[i], eb->flags[j]);
+				swap(eb->vma[i],  eb->vma[j]);
+				eb->vma[i]->exec_flags = &eb->flags[i];
+			} while (--i);
+			GEM_BUG_ON(vma != eb->vma[0]);
+			vma->exec_flags = &eb->flags[0];
+
+			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+							       &acquire);
+		}
+		if (err)
+			break;
+	}
+	ww_acquire_done(&acquire);
+
+	while (i--) {
+		unsigned int flags = eb->flags[i];
+		struct i915_vma *vma = eb->vma[i];
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		assert_vma_held(vma);
+
+		if (flags & EXEC_OBJECT_CAPTURE) {
+			struct i915_capture_list *capture;
+
+			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+			if (capture) {
+				capture->next = eb->request->capture_list;
+				capture->vma = vma;
+				eb->request->capture_list = capture;
+			}
+		}
+
+		/*
+		 * If the GPU is not _reading_ through the CPU cache, we need
+		 * to make sure that any writes (both previous GPU writes from
+		 * before a change in snooping levels and normal CPU writes)
+		 * caught in that cache are flushed to main memory.
+		 *
+		 * We want to say
+		 *   obj->cache_dirty &&
+		 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
+		 * but gcc's optimiser doesn't handle that as well and emits
+		 * two jumps instead of one. Maybe one day...
+		 */
+		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
+			if (i915_gem_clflush_object(obj, 0))
+				flags &= ~EXEC_OBJECT_ASYNC;
+		}
+
+		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
+			err = i915_request_await_object
+				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
+		}
+
+		if (err == 0)
+			err = i915_vma_move_to_active(vma, eb->request, flags);
+
+		i915_vma_unlock(vma);
+
+		__eb_unreserve_vma(vma, flags);
+		vma->exec_flags = NULL;
+
+		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
+			i915_vma_put(vma);
+	}
+	ww_acquire_fini(&acquire);
+
+	if (unlikely(err))
+		goto err_skip;
+
+	eb->exec = NULL;
+
+	/* Unconditionally flush any chipset caches (for streaming writes). */
+	i915_gem_chipset_flush(eb->i915);
+	return 0;
+
+err_skip:
+	i915_request_skip(eb->request, err);
+	return err;
+}
+
+static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
+{
+	if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
+		return false;
+
+	/* Kernel clipping was a DRI1 misfeature */
+	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+		if (exec->num_cliprects || exec->cliprects_ptr)
+			return false;
+	}
+
+	if (exec->DR4 == 0xffffffff) {
+		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+		exec->DR4 = 0;
+	}
+	if (exec->DR1 || exec->DR4)
+		return false;
+
+	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
+		return false;
+
+	return true;
+}
+
+static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
+{
+	u32 *cs;
+	int i;
+
+	if (!IS_GEN(rq->i915, 7) || rq->engine->id != RCS0) {
+		DRM_DEBUG("sol reset is gen7/rcs only\n");
+		return -EINVAL;
+	}
+
+	cs = intel_ring_begin(rq, 4 * 2 + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(4);
+	for (i = 0; i < 4; i++) {
+		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
+		*cs++ = 0;
+	}
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
+{
+	struct drm_i915_gem_object *shadow_batch_obj;
+	struct i915_vma *vma;
+	int err;
+
+	shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
+						   PAGE_ALIGN(eb->batch_len));
+	if (IS_ERR(shadow_batch_obj))
+		return ERR_CAST(shadow_batch_obj);
+
+	err = intel_engine_cmd_parser(eb->engine,
+				      eb->batch->obj,
+				      shadow_batch_obj,
+				      eb->batch_start_offset,
+				      eb->batch_len,
+				      is_master);
+	if (err) {
+		if (err == -EACCES) /* unhandled chained batch */
+			vma = NULL;
+		else
+			vma = ERR_PTR(err);
+		goto out;
+	}
+
+	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
+	if (IS_ERR(vma))
+		goto out;
+
+	eb->vma[eb->buffer_count] = i915_vma_get(vma);
+	eb->flags[eb->buffer_count] =
+		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
+	vma->exec_flags = &eb->flags[eb->buffer_count];
+	eb->buffer_count++;
+
+out:
+	i915_gem_object_unpin_pages(shadow_batch_obj);
+	return vma;
+}
+
+static void
+add_to_client(struct i915_request *rq, struct drm_file *file)
+{
+	rq->file_priv = file->driver_priv;
+	list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
+}
+
+static int eb_submit(struct i915_execbuffer *eb)
+{
+	int err;
+
+	err = eb_move_to_gpu(eb);
+	if (err)
+		return err;
+
+	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
+		err = i915_reset_gen7_sol_offsets(eb->request);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * After we completed waiting for other engines (using HW semaphores)
+	 * then we can signal that this request/batch is ready to run. This
+	 * allows us to determine if the batch is still waiting on the GPU
+	 * or actually running by checking the breadcrumb.
+	 */
+	if (eb->engine->emit_init_breadcrumb) {
+		err = eb->engine->emit_init_breadcrumb(eb->request);
+		if (err)
+			return err;
+	}
+
+	err = eb->engine->emit_bb_start(eb->request,
+					eb->batch->node.start +
+					eb->batch_start_offset,
+					eb->batch_len,
+					eb->batch_flags);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/*
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The engine index is returned.
+ */
+static unsigned int
+gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
+			 struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+
+	/* Check whether the file_priv has already selected one ring. */
+	if ((int)file_priv->bsd_engine < 0)
+		file_priv->bsd_engine = atomic_fetch_xor(1,
+			 &dev_priv->mm.bsd_engine_dispatch_index);
+
+	return file_priv->bsd_engine;
+}
+
+static const enum intel_engine_id user_ring_map[] = {
+	[I915_EXEC_DEFAULT]	= RCS0,
+	[I915_EXEC_RENDER]	= RCS0,
+	[I915_EXEC_BLT]		= BCS0,
+	[I915_EXEC_BSD]		= VCS0,
+	[I915_EXEC_VEBOX]	= VECS0
+};
+
+static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	int err;
+
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	err = i915_terminally_wedged(eb->i915);
+	if (err)
+		return err;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	err = intel_context_pin(ce);
+	if (err)
+		return err;
+
+	eb->engine = ce->engine;
+	eb->context = ce;
+	return 0;
+}
+
+static void eb_unpin_context(struct i915_execbuffer *eb)
+{
+	intel_context_unpin(eb->context);
+}
+
+static unsigned int
+eb_select_legacy_ring(struct i915_execbuffer *eb,
+		      struct drm_file *file,
+		      struct drm_i915_gem_execbuffer2 *args)
+{
+	struct drm_i915_private *i915 = eb->i915;
+	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
+
+	if (user_ring_id != I915_EXEC_BSD &&
+	    (args->flags & I915_EXEC_BSD_MASK)) {
+		DRM_DEBUG("execbuf with non bsd ring but with invalid "
+			  "bsd dispatch flags: %d\n", (int)(args->flags));
+		return -1;
+	}
+
+	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
+		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
+
+		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
+			bsd_idx = gen8_dispatch_bsd_engine(i915, file);
+		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
+			   bsd_idx <= I915_EXEC_BSD_RING2) {
+			bsd_idx >>= I915_EXEC_BSD_SHIFT;
+			bsd_idx--;
+		} else {
+			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
+				  bsd_idx);
+			return -1;
+		}
+
+		return _VCS(bsd_idx);
+	}
+
+	if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
+		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
+		return -1;
+	}
+
+	return user_ring_map[user_ring_id];
+}
+
+static int
+eb_select_engine(struct i915_execbuffer *eb,
+		 struct drm_file *file,
+		 struct drm_i915_gem_execbuffer2 *args)
+{
+	struct intel_context *ce;
+	unsigned int idx;
+	int err;
+
+	if (i915_gem_context_user_engines(eb->gem_context))
+		idx = args->flags & I915_EXEC_RING_MASK;
+	else
+		idx = eb_select_legacy_ring(eb, file, args);
+
+	ce = i915_gem_context_get_engine(eb->gem_context, idx);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = eb_pin_context(eb, ce);
+	intel_context_put(ce);
+
+	return err;
+}
+
+static void
+__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+{
+	while (n--)
+		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+	kvfree(fences);
+}
+
+static struct drm_syncobj **
+get_fence_array(struct drm_i915_gem_execbuffer2 *args,
+		struct drm_file *file)
+{
+	const unsigned long nfences = args->num_cliprects;
+	struct drm_i915_gem_exec_fence __user *user;
+	struct drm_syncobj **fences;
+	unsigned long n;
+	int err;
+
+	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
+		return NULL;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (nfences > min_t(unsigned long,
+			    ULONG_MAX / sizeof(*user),
+			    SIZE_MAX / sizeof(*fences)))
+		return ERR_PTR(-EINVAL);
+
+	user = u64_to_user_ptr(args->cliprects_ptr);
+	if (!access_ok(user, nfences * sizeof(*user)))
+		return ERR_PTR(-EFAULT);
+
+	fences = kvmalloc_array(nfences, sizeof(*fences),
+				__GFP_NOWARN | GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+
+	for (n = 0; n < nfences; n++) {
+		struct drm_i915_gem_exec_fence fence;
+		struct drm_syncobj *syncobj;
+
+		if (__copy_from_user(&fence, user++, sizeof(fence))) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+			err = -EINVAL;
+			goto err;
+		}
+
+		syncobj = drm_syncobj_find(file, fence.handle);
+		if (!syncobj) {
+			DRM_DEBUG("Invalid syncobj handle provided\n");
+			err = -ENOENT;
+			goto err;
+		}
+
+		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+	}
+
+	return fences;
+
+err:
+	__free_fence_array(fences, n);
+	return ERR_PTR(err);
+}
+
+static void
+put_fence_array(struct drm_i915_gem_execbuffer2 *args,
+		struct drm_syncobj **fences)
+{
+	if (fences)
+		__free_fence_array(fences, args->num_cliprects);
+}
+
+static int
+await_fence_array(struct i915_execbuffer *eb,
+		  struct drm_syncobj **fences)
+{
+	const unsigned int nfences = eb->args->num_cliprects;
+	unsigned int n;
+	int err;
+
+	for (n = 0; n < nfences; n++) {
+		struct drm_syncobj *syncobj;
+		struct dma_fence *fence;
+		unsigned int flags;
+
+		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		if (!(flags & I915_EXEC_FENCE_WAIT))
+			continue;
+
+		fence = drm_syncobj_fence_get(syncobj);
+		if (!fence)
+			return -EINVAL;
+
+		err = i915_request_await_dma_fence(eb->request, fence);
+		dma_fence_put(fence);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static void
+signal_fence_array(struct i915_execbuffer *eb,
+		   struct drm_syncobj **fences)
+{
+	const unsigned int nfences = eb->args->num_cliprects;
+	struct dma_fence * const fence = &eb->request->fence;
+	unsigned int n;
+
+	for (n = 0; n < nfences; n++) {
+		struct drm_syncobj *syncobj;
+		unsigned int flags;
+
+		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		if (!(flags & I915_EXEC_FENCE_SIGNAL))
+			continue;
+
+		drm_syncobj_replace_fence(syncobj, fence);
+	}
+}
+
+static int
+i915_gem_do_execbuffer(struct drm_device *dev,
+		       struct drm_file *file,
+		       struct drm_i915_gem_execbuffer2 *args,
+		       struct drm_i915_gem_exec_object2 *exec,
+		       struct drm_syncobj **fences)
+{
+	struct i915_execbuffer eb;
+	struct dma_fence *in_fence = NULL;
+	struct dma_fence *exec_fence = NULL;
+	struct sync_file *out_fence = NULL;
+	int out_fence_fd = -1;
+	int err;
+
+	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
+	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
+		     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
+
+	eb.i915 = to_i915(dev);
+	eb.file = file;
+	eb.args = args;
+	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
+		args->flags |= __EXEC_HAS_RELOC;
+
+	eb.exec = exec;
+	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
+	eb.vma[0] = NULL;
+	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
+
+	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
+	reloc_cache_init(&eb.reloc_cache, eb.i915);
+
+	eb.buffer_count = args->buffer_count;
+	eb.batch_start_offset = args->batch_start_offset;
+	eb.batch_len = args->batch_len;
+
+	eb.batch_flags = 0;
+	if (args->flags & I915_EXEC_SECURE) {
+		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
+		    return -EPERM;
+
+		eb.batch_flags |= I915_DISPATCH_SECURE;
+	}
+	if (args->flags & I915_EXEC_IS_PINNED)
+		eb.batch_flags |= I915_DISPATCH_PINNED;
+
+	if (args->flags & I915_EXEC_FENCE_IN) {
+		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!in_fence)
+			return -EINVAL;
+	}
+
+	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+		if (in_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+
+		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!exec_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+	}
+
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			err = out_fence_fd;
+			goto err_exec_fence;
+		}
+	}
+
+	err = eb_create(&eb);
+	if (err)
+		goto err_out_fence;
+
+	GEM_BUG_ON(!eb.lut_size);
+
+	err = eb_select_context(&eb);
+	if (unlikely(err))
+		goto err_destroy;
+
+	/*
+	 * Take a local wakeref for preparing to dispatch the execbuf as
+	 * we expect to access the hardware fairly frequently in the
+	 * process. Upon first dispatch, we acquire another prolonged
+	 * wakeref that we hold until the GPU has been idle for at least
+	 * 100ms.
+	 */
+	intel_gt_pm_get(eb.i915);
+
+	err = i915_mutex_lock_interruptible(dev);
+	if (err)
+		goto err_rpm;
+
+	err = eb_select_engine(&eb, file, args);
+	if (unlikely(err))
+		goto err_unlock;
+
+	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+	if (unlikely(err))
+		goto err_engine;
+
+	err = eb_relocate(&eb);
+	if (err) {
+		/*
+		 * If the user expects the execobject.offset and
+		 * reloc.presumed_offset to be an exact match,
+		 * as for using NO_RELOC, then we cannot update
+		 * the execobject.offset until we have completed
+		 * relocation.
+		 */
+		args->flags &= ~__EXEC_HAS_RELOC;
+		goto err_vma;
+	}
+
+	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
+		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
+		err = -EINVAL;
+		goto err_vma;
+	}
+	if (eb.batch_start_offset > eb.batch->size ||
+	    eb.batch_len > eb.batch->size - eb.batch_start_offset) {
+		DRM_DEBUG("Attempting to use out-of-bounds batch\n");
+		err = -EINVAL;
+		goto err_vma;
+	}
+
+	if (eb_use_cmdparser(&eb)) {
+		struct i915_vma *vma;
+
+		vma = eb_parse(&eb, drm_is_current_master(file));
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto err_vma;
+		}
+
+		if (vma) {
+			/*
+			 * Batch parsed and accepted:
+			 *
+			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
+			 * bit from MI_BATCH_BUFFER_START commands issued in
+			 * the dispatch_execbuffer implementations. We
+			 * specifically don't want that set on batches the
+			 * command parser has accepted.
+			 */
+			eb.batch_flags |= I915_DISPATCH_SECURE;
+			eb.batch_start_offset = 0;
+			eb.batch = vma;
+		}
+	}
+
+	if (eb.batch_len == 0)
+		eb.batch_len = eb.batch->size - eb.batch_start_offset;
+
+	/*
+	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+	 * batch" bit. Hence we need to pin secure batches into the global gtt.
+	 * hsw should have this fixed, but bdw mucks it up again. */
+	if (eb.batch_flags & I915_DISPATCH_SECURE) {
+		struct i915_vma *vma;
+
+		/*
+		 * So on first glance it looks freaky that we pin the batch here
+		 * outside of the reservation loop. But:
+		 * - The batch is already pinned into the relevant ppgtt, so we
+		 *   already have the backing storage fully allocated.
+		 * - No other BO uses the global gtt (well contexts, but meh),
+		 *   so we don't really have issues with multiple objects not
+		 *   fitting due to fragmentation.
+		 * So this is actually safe.
+		 */
+		vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto err_vma;
+		}
+
+		eb.batch = vma;
+	}
+
+	/* All GPU relocation batches must be submitted prior to the user rq */
+	GEM_BUG_ON(eb.reloc_cache.rq);
+
+	/* Allocate a request for this batch buffer nice and early. */
+	eb.request = i915_request_create(eb.context);
+	if (IS_ERR(eb.request)) {
+		err = PTR_ERR(eb.request);
+		goto err_batch_unpin;
+	}
+
+	if (in_fence) {
+		err = i915_request_await_dma_fence(eb.request, in_fence);
+		if (err < 0)
+			goto err_request;
+	}
+
+	if (exec_fence) {
+		err = i915_request_await_execution(eb.request, exec_fence,
+						   eb.engine->bond_execute);
+		if (err < 0)
+			goto err_request;
+	}
+
+	if (fences) {
+		err = await_fence_array(&eb, fences);
+		if (err)
+			goto err_request;
+	}
+
+	if (out_fence_fd != -1) {
+		out_fence = sync_file_create(&eb.request->fence);
+		if (!out_fence) {
+			err = -ENOMEM;
+			goto err_request;
+		}
+	}
+
+	/*
+	 * Whilst this request exists, batch_obj will be on the
+	 * active_list, and so will hold the active reference. Only when this
+	 * request is retired will the the batch_obj be moved onto the
+	 * inactive_list and lose its active reference. Hence we do not need
+	 * to explicitly hold another reference here.
+	 */
+	eb.request->batch = eb.batch;
+
+	trace_i915_request_queue(eb.request, eb.batch_flags);
+	err = eb_submit(&eb);
+err_request:
+	add_to_client(eb.request, file);
+	i915_request_add(eb.request);
+
+	if (fences)
+		signal_fence_array(&eb, fences);
+
+	if (out_fence) {
+		if (err == 0) {
+			fd_install(out_fence_fd, out_fence->file);
+			args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
+			args->rsvd2 |= (u64)out_fence_fd << 32;
+			out_fence_fd = -1;
+		} else {
+			fput(out_fence->file);
+		}
+	}
+
+err_batch_unpin:
+	if (eb.batch_flags & I915_DISPATCH_SECURE)
+		i915_vma_unpin(eb.batch);
+err_vma:
+	if (eb.exec)
+		eb_release_vmas(&eb);
+err_engine:
+	eb_unpin_context(&eb);
+err_unlock:
+	mutex_unlock(&dev->struct_mutex);
+err_rpm:
+	intel_gt_pm_put(eb.i915);
+	i915_gem_context_put(eb.gem_context);
+err_destroy:
+	eb_destroy(&eb);
+err_out_fence:
+	if (out_fence_fd != -1)
+		put_unused_fd(out_fence_fd);
+err_exec_fence:
+	dma_fence_put(exec_fence);
+err_in_fence:
+	dma_fence_put(in_fence);
+	return err;
+}
+
+static size_t eb_element_size(void)
+{
+	return (sizeof(struct drm_i915_gem_exec_object2) +
+		sizeof(struct i915_vma *) +
+		sizeof(unsigned int));
+}
+
+static bool check_buffer_count(size_t count)
+{
+	const size_t sz = eb_element_size();
+
+	/*
+	 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
+	 * array size (see eb_create()). Otherwise, we can accept an array as
+	 * large as can be addressed (though use large arrays at your peril)!
+	 */
+
+	return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
+}
+
+/*
+ * Legacy execbuffer just creates an exec2 list from the original exec object
+ * list array and passes it to the real function.
+ */
+int
+i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_execbuffer *args = data;
+	struct drm_i915_gem_execbuffer2 exec2;
+	struct drm_i915_gem_exec_object *exec_list = NULL;
+	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
+	const size_t count = args->buffer_count;
+	unsigned int i;
+	int err;
+
+	if (!check_buffer_count(count)) {
+		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
+		return -EINVAL;
+	}
+
+	exec2.buffers_ptr = args->buffers_ptr;
+	exec2.buffer_count = args->buffer_count;
+	exec2.batch_start_offset = args->batch_start_offset;
+	exec2.batch_len = args->batch_len;
+	exec2.DR1 = args->DR1;
+	exec2.DR4 = args->DR4;
+	exec2.num_cliprects = args->num_cliprects;
+	exec2.cliprects_ptr = args->cliprects_ptr;
+	exec2.flags = I915_EXEC_RENDER;
+	i915_execbuffer2_set_context_id(exec2, 0);
+
+	if (!i915_gem_check_execbuffer(&exec2))
+		return -EINVAL;
+
+	/* Copy in the exec list from userland */
+	exec_list = kvmalloc_array(count, sizeof(*exec_list),
+				   __GFP_NOWARN | GFP_KERNEL);
+	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+				    __GFP_NOWARN | GFP_KERNEL);
+	if (exec_list == NULL || exec2_list == NULL) {
+		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
+			  args->buffer_count);
+		kvfree(exec_list);
+		kvfree(exec2_list);
+		return -ENOMEM;
+	}
+	err = copy_from_user(exec_list,
+			     u64_to_user_ptr(args->buffers_ptr),
+			     sizeof(*exec_list) * count);
+	if (err) {
+		DRM_DEBUG("copy %d exec entries failed %d\n",
+			  args->buffer_count, err);
+		kvfree(exec_list);
+		kvfree(exec2_list);
+		return -EFAULT;
+	}
+
+	for (i = 0; i < args->buffer_count; i++) {
+		exec2_list[i].handle = exec_list[i].handle;
+		exec2_list[i].relocation_count = exec_list[i].relocation_count;
+		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
+		exec2_list[i].alignment = exec_list[i].alignment;
+		exec2_list[i].offset = exec_list[i].offset;
+		if (INTEL_GEN(to_i915(dev)) < 4)
+			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
+		else
+			exec2_list[i].flags = 0;
+	}
+
+	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+	if (exec2.flags & __EXEC_HAS_RELOC) {
+		struct drm_i915_gem_exec_object __user *user_exec_list =
+			u64_to_user_ptr(args->buffers_ptr);
+
+		/* Copy the new buffer offsets back to the user's exec list. */
+		for (i = 0; i < args->buffer_count; i++) {
+			if (!(exec2_list[i].offset & UPDATE))
+				continue;
+
+			exec2_list[i].offset =
+				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
+			exec2_list[i].offset &= PIN_OFFSET_MASK;
+			if (__copy_to_user(&user_exec_list[i].offset,
+					   &exec2_list[i].offset,
+					   sizeof(user_exec_list[i].offset)))
+				break;
+		}
+	}
+
+	kvfree(exec_list);
+	kvfree(exec2_list);
+	return err;
+}
+
+int
+i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file)
+{
+	struct drm_i915_gem_execbuffer2 *args = data;
+	struct drm_i915_gem_exec_object2 *exec2_list;
+	struct drm_syncobj **fences = NULL;
+	const size_t count = args->buffer_count;
+	int err;
+
+	if (!check_buffer_count(count)) {
+		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
+		return -EINVAL;
+	}
+
+	if (!i915_gem_check_execbuffer(args))
+		return -EINVAL;
+
+	/* Allocate an extra slot for use by the command parser */
+	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+				    __GFP_NOWARN | GFP_KERNEL);
+	if (exec2_list == NULL) {
+		DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
+			  count);
+		return -ENOMEM;
+	}
+	if (copy_from_user(exec2_list,
+			   u64_to_user_ptr(args->buffers_ptr),
+			   sizeof(*exec2_list) * count)) {
+		DRM_DEBUG("copy %zd exec entries failed\n", count);
+		kvfree(exec2_list);
+		return -EFAULT;
+	}
+
+	if (args->flags & I915_EXEC_FENCE_ARRAY) {
+		fences = get_fence_array(args, file);
+		if (IS_ERR(fences)) {
+			kvfree(exec2_list);
+			return PTR_ERR(fences);
+		}
+	}
+
+	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+
+	/*
+	 * Now that we have begun execution of the batchbuffer, we ignore
+	 * any new error after this point. Also given that we have already
+	 * updated the associated relocations, we try to write out the current
+	 * object locations irrespective of any error.
+	 */
+	if (args->flags & __EXEC_HAS_RELOC) {
+		struct drm_i915_gem_exec_object2 __user *user_exec_list =
+			u64_to_user_ptr(args->buffers_ptr);
+		unsigned int i;
+
+		/* Copy the new buffer offsets back to the user's exec list. */
+		/*
+		 * Note: count * sizeof(*user_exec_list) does not overflow,
+		 * because we checked 'count' in check_buffer_count().
+		 *
+		 * And this range already got effectively checked earlier
+		 * when we did the "copy_from_user()" above.
+		 */
+		if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
+			goto end;
+
+		for (i = 0; i < args->buffer_count; i++) {
+			if (!(exec2_list[i].offset & UPDATE))
+				continue;
+
+			exec2_list[i].offset =
+				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
+			unsafe_put_user(exec2_list[i].offset,
+					&user_exec_list[i].offset,
+					end_user);
+		}
+end_user:
+		user_access_end();
+end:;
+	}
+
+	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
+	put_fence_array(args, fences);
+	kvfree(exec2_list);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
new file mode 100644
index 000000000000..cf0439e6be83
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -0,0 +1,96 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+
+struct stub_fence {
+	struct dma_fence dma;
+	struct i915_sw_fence chain;
+};
+
+static int __i915_sw_fence_call
+stub_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), chain);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_signal(&stub->dma);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&stub->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static const char *stub_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *stub_timeline_name(struct dma_fence *fence)
+{
+	return "object";
+}
+
+static void stub_release(struct dma_fence *fence)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
+
+	i915_sw_fence_fini(&stub->chain);
+
+	BUILD_BUG_ON(offsetof(typeof(*stub), dma));
+	dma_fence_free(&stub->dma);
+}
+
+static const struct dma_fence_ops stub_fence_ops = {
+	.get_driver_name = stub_driver_name,
+	.get_timeline_name = stub_timeline_name,
+	.release = stub_release,
+};
+
+struct dma_fence *
+i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
+{
+	struct stub_fence *stub;
+
+	assert_object_held(obj);
+
+	stub = kmalloc(sizeof(*stub), GFP_KERNEL);
+	if (!stub)
+		return NULL;
+
+	i915_sw_fence_init(&stub->chain, stub_notify);
+	dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock,
+		       to_i915(obj->base.dev)->mm.unordered_timeline,
+		       0);
+
+	if (i915_sw_fence_await_reservation(&stub->chain,
+					    obj->base.resv, NULL,
+					    true, I915_FENCE_TIMEOUT,
+					    I915_FENCE_GFP) < 0)
+		goto err;
+
+	reservation_object_add_excl_fence(obj->base.resv, &stub->dma);
+
+	return &stub->dma;
+
+err:
+	stub_release(&stub->dma);
+	return NULL;
+}
+
+void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
+				  struct dma_fence *fence)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
+
+	i915_sw_fence_commit(&stub->chain);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
new file mode 100644
index 000000000000..0c41e04ab8fa
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -0,0 +1,198 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/swiotlb.h>
+
+#include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+#include "i915_gem.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+#include "i915_utils.h"
+
+#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
+#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+
+static void internal_free_pages(struct sg_table *st)
+{
+	struct scatterlist *sg;
+
+	for (sg = st->sgl; sg; sg = __sg_next(sg)) {
+		if (sg_page(sg))
+			__free_pages(sg_page(sg), get_order(sg->length));
+	}
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned int sg_page_sizes;
+	unsigned int npages;
+	int max_order;
+	gfp_t gfp;
+
+	max_order = MAX_ORDER;
+#ifdef CONFIG_SWIOTLB
+	if (swiotlb_nr_tbl()) {
+		unsigned int max_segment;
+
+		max_segment = swiotlb_max_segment();
+		if (max_segment) {
+			max_segment = max_t(unsigned int, max_segment,
+					    PAGE_SIZE) >> PAGE_SHIFT;
+			max_order = min(max_order, ilog2(max_segment));
+		}
+	}
+#endif
+
+	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		gfp &= ~__GFP_HIGHMEM;
+		gfp |= __GFP_DMA32;
+	}
+
+create_st:
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	npages = obj->base.size / PAGE_SIZE;
+	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+
+	do {
+		int order = min(fls(npages) - 1, max_order);
+		struct page *page;
+
+		do {
+			page = alloc_pages(gfp | (order ? QUIET : MAYFAIL),
+					   order);
+			if (page)
+				break;
+			if (!order--)
+				goto err;
+
+			/* Limit subsequent allocations as well */
+			max_order = order;
+		} while (1);
+
+		sg_set_page(sg, page, PAGE_SIZE << order, 0);
+		sg_page_sizes |= PAGE_SIZE << order;
+		st->nents++;
+
+		npages -= 1 << order;
+		if (!npages) {
+			sg_mark_end(sg);
+			break;
+		}
+
+		sg = __sg_next(sg);
+	} while (1);
+
+	if (i915_gem_gtt_prepare_pages(obj, st)) {
+		/* Failed to dma-map try again with single page sg segments */
+		if (get_order(st->sgl->length)) {
+			internal_free_pages(st);
+			max_order = 0;
+			goto create_st;
+		}
+		goto err;
+	}
+
+	/* Mark the pages as dontneed whilst they are still pinned. As soon
+	 * as they are unpinned they are allowed to be reaped by the shrinker,
+	 * and the caller is expected to repopulate - the contents of this
+	 * object are only valid whilst active and pinned.
+	 */
+	obj->mm.madv = I915_MADV_DONTNEED;
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+
+err:
+	sg_set_page(sg, NULL, 0, 0);
+	sg_mark_end(sg);
+	internal_free_pages(st);
+
+	return -ENOMEM;
+}
+
+static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
+					       struct sg_table *pages)
+{
+	i915_gem_gtt_finish_pages(obj, pages);
+	internal_free_pages(pages);
+
+	obj->mm.dirty = false;
+	obj->mm.madv = I915_MADV_WILLNEED;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = i915_gem_object_get_pages_internal,
+	.put_pages = i915_gem_object_put_pages_internal,
+};
+
+/**
+ * i915_gem_object_create_internal: create an object with volatile pages
+ * @i915: the i915 device
+ * @size: the size in bytes of backing storage to allocate for the object
+ *
+ * Creates a new object that wraps some internal memory for private use.
+ * This object is not backed by swappable storage, and as such its contents
+ * are volatile and only valid whilst pinned. If the object is reaped by the
+ * shrinker, its pages and data will be discarded. Equally, it is not a full
+ * GEM object and so not valid for access from userspace. This makes it useful
+ * for hardware interfaces like ringbuffers (which are pinned from the time
+ * the request is written to the time the hardware stops accessing it), but
+ * not for contexts (which need to be preserved when not active for later
+ * reuse). Note that it is not cleared upon allocation.
+ */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *i915,
+				phys_addr_t size)
+{
+	struct drm_i915_gem_object *obj;
+	unsigned int cache_level;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+	i915_gem_object_init(obj, &i915_gem_object_internal_ops);
+
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+
+	cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
new file mode 100644
index 000000000000..ddc7f2a52b3e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef I915_GEM_IOCTLS_H
+#define I915_GEM_IOCTLS_H
+
+struct drm_device;
+struct drm_file;
+
+int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_create_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file);
+int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index 000000000000..391621ee3cbb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,508 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/mman.h>
+#include <linux/sizes.h>
+
+#include "i915_drv.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_vma.h"
+#include "intel_drv.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+	      unsigned long addr, unsigned long size)
+{
+	if (vma->vm_file != filp)
+		return false;
+
+	return vma->vm_start == addr &&
+	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ *			 it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_mmap *args = data;
+	struct drm_i915_gem_object *obj;
+	unsigned long addr;
+
+	if (args->flags & ~(I915_MMAP_WC))
+		return -EINVAL;
+
+	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+		return -ENODEV;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/* prime objects have no backing filp to GEM mmap
+	 * pages from.
+	 */
+	if (!obj->base.filp) {
+		addr = -ENXIO;
+		goto err;
+	}
+
+	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
+		addr = -EINVAL;
+		goto err;
+	}
+
+	addr = vm_mmap(obj->base.filp, 0, args->size,
+		       PROT_READ | PROT_WRITE, MAP_SHARED,
+		       args->offset);
+	if (IS_ERR_VALUE(addr))
+		goto err;
+
+	if (args->flags & I915_MMAP_WC) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+
+		if (down_write_killable(&mm->mmap_sem)) {
+			addr = -EINTR;
+			goto err;
+		}
+		vma = find_vma(mm, addr);
+		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+			vma->vm_page_prot =
+				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		else
+			addr = -ENOMEM;
+		up_write(&mm->mmap_sem);
+		if (IS_ERR_VALUE(addr))
+			goto err;
+
+		/* This may race, but that's ok, it only gets set */
+		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
+	}
+	i915_gem_object_put(obj);
+
+	args->addr_ptr = (u64)addr;
+	return 0;
+
+err:
+	i915_gem_object_put(obj);
+	return addr;
+}
+
+static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ *     aligned and suitable for fencing, and still fit into the available
+ *     mappable space left by the pinned display objects. A classic problem
+ *     we called the page-fault-of-doom where we would ping-pong between
+ *     two objects that could not fit inside the GTT and so the memcpy
+ *     would page one object in at the expense of the other between every
+ *     single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ *     object is too large for the available space (or simply too large
+ *     for the mappable aperture!), a view is created instead and faulted
+ *     into userspace. (This view is aligned and sized appropriately for
+ *     fenced access.)
+ *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
+ * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
+ *     pagefault; swapin remains transparent.
+ *
+ * Restrictions:
+ *
+ *  * snoopable objects cannot be accessed via the GTT. It can cause machine
+ *    hangs on some architectures, corruption on others. An attempt to service
+ *    a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ *  * the object must be able to fit into RAM (physical memory, though no
+ *    limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ *  * a new GTT page fault will synchronize rendering from the GPU and flush
+ *    all data to system memory. Subsequent access will not be synchronized.
+ *
+ *  * all mappings are revoked on runtime device suspend.
+ *
+ *  * there are only 8, 16 or 32 fence registers to share between all users
+ *    (older machines require fence register for display and blitter access
+ *    as well). Contention of the fence registers will cause the previous users
+ *    to be unmapped and any new access will generate new page faults.
+ *
+ *  * running out of memory while servicing a fault may generate a SIGBUS,
+ *    rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+	return 3;
+}
+
+static inline struct i915_ggtt_view
+compute_partial_view(const struct drm_i915_gem_object *obj,
+		     pgoff_t page_offset,
+		     unsigned int chunk)
+{
+	struct i915_ggtt_view view;
+
+	if (i915_gem_object_is_tiled(obj))
+		chunk = roundup(chunk, tile_row_pages(obj));
+
+	view.type = I915_GGTT_VIEW_PARTIAL;
+	view.partial.offset = rounddown(page_offset, chunk);
+	view.partial.size =
+		min_t(unsigned int, chunk,
+		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+	/* If the partial covers the entire object, just create a normal VMA. */
+	if (chunk >= obj->base.size >> PAGE_SHIFT)
+		view.type = I915_GGTT_VIEW_NORMAL;
+
+	return view;
+}
+
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * @vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
+ */
+vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+{
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
+	struct vm_area_struct *area = vmf->vma;
+	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct intel_runtime_pm *rpm = &i915->runtime_pm;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	bool write = area->vm_flags & VM_WRITE;
+	intel_wakeref_t wakeref;
+	struct i915_vma *vma;
+	pgoff_t page_offset;
+	int srcu;
+	int ret;
+
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write)
+		return VM_FAULT_SIGBUS;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+
+	trace_i915_gem_object_fault(obj, page_offset, true, write);
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
+	wakeref = intel_runtime_pm_get(rpm);
+
+	srcu = i915_reset_trylock(i915);
+	if (srcu < 0) {
+		ret = srcu;
+		goto err_rpm;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto err_reset;
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+		ret = -EFAULT;
+		goto err_unlock;
+	}
+
+	/* Now pin it into the GTT as needed */
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE |
+				       PIN_NONBLOCK |
+				       PIN_NONFAULT);
+	if (IS_ERR(vma)) {
+		/* Use a partial view if it is bigger than available space */
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+		unsigned int flags;
+
+		flags = PIN_MAPPABLE;
+		if (view.type == I915_GGTT_VIEW_NORMAL)
+			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+		/*
+		 * Userspace is now writing through an untracked VMA, abandon
+		 * all hope that the hardware is able to track future writes.
+		 */
+		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		if (IS_ERR(vma) && !view.type) {
+			flags = PIN_MAPPABLE;
+			view.type = I915_GGTT_VIEW_PARTIAL;
+			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		}
+	}
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unlock;
+	}
+
+	ret = i915_vma_pin_fence(vma);
+	if (ret)
+		goto err_unpin;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = remap_io_mapping(area,
+			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
+			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+			       min_t(u64, vma->size, area->vm_end - area->vm_start),
+			       &ggtt->iomap);
+	if (ret)
+		goto err_fence;
+
+	/* Mark as being mmapped into userspace for later revocation */
+	assert_rpm_wakelock_held(rpm);
+	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+		list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
+	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
+		intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
+				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
+	GEM_BUG_ON(!obj->userfault_count);
+
+	i915_vma_set_ggtt_write(vma);
+
+err_fence:
+	i915_vma_unpin_fence(vma);
+err_unpin:
+	__i915_vma_unpin(vma);
+err_unlock:
+	mutex_unlock(&dev->struct_mutex);
+err_reset:
+	i915_reset_unlock(i915, srcu);
+err_rpm:
+	intel_runtime_pm_put(rpm, wakeref);
+	i915_gem_object_unpin_pages(obj);
+err:
+	switch (ret) {
+	case -EIO:
+		/*
+		 * We eat errors when the gpu is terminally wedged to avoid
+		 * userspace unduly crashing (gl has no provisions for mmaps to
+		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
+		 * and so needs to be reported.
+		 */
+		if (!i915_terminally_wedged(i915))
+			return VM_FAULT_SIGBUS;
+		/* else: fall through */
+	case -EAGAIN:
+		/*
+		 * EAGAIN means the gpu is hung and we'll wait for the error
+		 * handler to reset everything when re-faulting in
+		 * i915_mutex_lock_interruptible.
+		 */
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	case -ENOSPC:
+	case -EFAULT:
+		return VM_FAULT_SIGBUS;
+	default:
+		WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!obj->userfault_count);
+
+	obj->userfault_count = 0;
+	list_del(&obj->userfault_link);
+	drm_vma_node_unmap(&obj->base.vma_node,
+			   obj->base.dev->anon_inode->i_mapping);
+
+	for_each_ggtt_vma(vma, obj)
+		i915_vma_unset_userfault(vma);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ *
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by i915_gem_fault().
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	intel_wakeref_t wakeref;
+
+	/* Serialisation between user GTT access and our code depends upon
+	 * revoking the CPU's PTE whilst the mutex is held. The next user
+	 * pagefault then has to wait until we release the mutex.
+	 *
+	 * Note that RPM complicates somewhat by adding an additional
+	 * requirement that operations to the GGTT be made holding the RPM
+	 * wakeref.
+	 */
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	if (!obj->userfault_count)
+		goto out;
+
+	__i915_gem_object_release_mmap(obj);
+
+	/* Ensure that the CPU's PTE are revoked and there are not outstanding
+	 * memory transactions from userspace before we return. The TLB
+	 * flushing implied above by changing the PTE above *should* be
+	 * sufficient, an extra barrier here just provides us with a bit
+	 * of paranoid documentation about our requirement to serialise
+	 * memory writes before touching registers / GSM.
+	 */
+	wmb();
+
+out:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+}
+
+static int create_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int err;
+
+	err = drm_gem_create_mmap_offset(&obj->base);
+	if (likely(!err))
+		return 0;
+
+	/* Attempt to reap some mmap space from dead objects */
+	do {
+		err = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE,
+					     MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			break;
+
+		i915_gem_drain_freed_objects(i915);
+		err = drm_gem_create_mmap_offset(&obj->base);
+		if (!err)
+			break;
+
+	} while (flush_delayed_work(&i915->gem.retire_work));
+
+	return err;
+}
+
+int
+i915_gem_mmap_gtt(struct drm_file *file,
+		  struct drm_device *dev,
+		  u32 handle,
+		  u64 *offset)
+{
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	obj = i915_gem_object_lookup(file, handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = create_mmap_offset(obj);
+	if (ret == 0)
+		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_gem_mmap_gtt *args = data;
+
+	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_mman.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
new file mode 100644
index 000000000000..be6caccce0c5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "display/intel_frontbuffer.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_context.h"
+#include "i915_gem_object.h"
+#include "i915_globals.h"
+
+static struct i915_global_object {
+	struct i915_global base;
+	struct kmem_cache *slab_objects;
+} global;
+
+struct drm_i915_gem_object *i915_gem_object_alloc(void)
+{
+	return kmem_cache_zalloc(global.slab_objects, GFP_KERNEL);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+	return kmem_cache_free(global.slab_objects, obj);
+}
+
+static void
+frontbuffer_retire(struct i915_active_request *active,
+		   struct i915_request *request)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(active, typeof(*obj), frontbuffer_write);
+
+	intel_fb_obj_flush(obj, ORIGIN_CS);
+}
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops)
+{
+	mutex_init(&obj->mm.lock);
+
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
+	INIT_LIST_HEAD(&obj->lut_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
+
+	init_rcu_head(&obj->rcu);
+
+	obj->ops = ops;
+
+	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
+	i915_active_request_init(&obj->frontbuffer_write,
+				 NULL, frontbuffer_retire);
+
+	obj->mm.madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->mm.get_page.lock);
+}
+
+/**
+ * Mark up the object's coherency levels for a given cache_level
+ * @obj: #drm_i915_gem_object
+ * @cache_level: cache level
+ */
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+					 unsigned int cache_level)
+{
+	obj->cache_level = cache_level;
+
+	if (cache_level != I915_CACHE_NONE)
+		obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
+				       I915_BO_CACHE_COHERENT_FOR_WRITE);
+	else if (HAS_LLC(to_i915(obj->base.dev)))
+		obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
+	else
+		obj->cache_coherent = 0;
+
+	obj->cache_dirty =
+		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+}
+
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem);
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_lut_handle *lut, *ln;
+	LIST_HEAD(close);
+
+	i915_gem_object_lock(obj);
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+
+		if (ctx->file_priv != fpriv)
+			continue;
+
+		i915_gem_context_get(ctx);
+		list_move(&lut->obj_link, &close);
+	}
+	i915_gem_object_unlock(obj);
+
+	list_for_each_entry_safe(lut, ln, &close, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
+
+		/*
+		 * We allow the process to have multiple handles to the same
+		 * vma, in the same fd namespace, by virtue of flink/open.
+		 */
+
+		mutex_lock(&ctx->mutex);
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		if (vma) {
+			GEM_BUG_ON(vma->obj != obj);
+			GEM_BUG_ON(!atomic_read(&vma->open_count));
+			if (atomic_dec_and_test(&vma->open_count) &&
+			    !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+		}
+		mutex_unlock(&ctx->mutex);
+
+		i915_gem_context_put(lut->ctx);
+		i915_lut_handle_free(lut);
+		i915_gem_object_put(obj);
+	}
+}
+
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+				    struct llist_node *freed)
+{
+	struct drm_i915_gem_object *obj, *on;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	llist_for_each_entry_safe(obj, on, freed, freed) {
+		struct i915_vma *vma, *vn;
+
+		trace_i915_gem_object_destroy(obj);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(i915_gem_object_is_active(obj));
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
+			GEM_BUG_ON(i915_vma_is_active(vma));
+			vma->flags &= ~I915_VMA_PIN_MASK;
+			i915_vma_destroy(vma);
+		}
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
+
+		/*
+		 * This serializes freeing with the shrinker. Since the free
+		 * is delayed, first by RCU then by the workqueue, we want the
+		 * shrinker to be able to free pages of unreferenced objects,
+		 * or else we may oom whilst there are plenty of deferred
+		 * freed objects.
+		 */
+		if (i915_gem_object_has_pages(obj) &&
+		    i915_gem_object_is_shrinkable(obj)) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+			list_del_init(&obj->mm.link);
+			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+		}
+
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(atomic_read(&obj->bind_count));
+		GEM_BUG_ON(obj->userfault_count);
+		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
+		GEM_BUG_ON(!list_empty(&obj->lut_list));
+
+		if (obj->ops->release)
+			obj->ops->release(obj);
+
+		atomic_set(&obj->mm.pages_pin_count, 0);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+		if (obj->base.import_attach)
+			drm_prime_gem_destroy(&obj->base, NULL);
+
+		drm_gem_object_release(&obj->base);
+
+		bitmap_free(obj->bit_17);
+		i915_gem_object_free(obj);
+
+		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+		atomic_dec(&i915->mm.free_count);
+
+		cond_resched();
+	}
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+}
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915)
+{
+	struct llist_node *freed;
+
+	/* Free the oldest, most stale object to keep the free_list short */
+	freed = NULL;
+	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
+		/* Only one consumer of llist_del_first() allowed */
+		spin_lock(&i915->mm.free_lock);
+		freed = llist_del_first(&i915->mm.free_list);
+		spin_unlock(&i915->mm.free_lock);
+	}
+	if (unlikely(freed)) {
+		freed->next = NULL;
+		__i915_gem_free_objects(i915, freed);
+	}
+}
+
+static void __i915_gem_free_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, mm.free_work);
+	struct llist_node *freed;
+
+	/*
+	 * All file-owned VMA should have been released by this point through
+	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
+	 * However, the object may also be bound into the global GTT (e.g.
+	 * older GPUs without per-process support, or for direct access through
+	 * the GTT either for the user or for scanout). Those VMA still need to
+	 * unbound now.
+	 */
+
+	spin_lock(&i915->mm.free_lock);
+	while ((freed = llist_del_all(&i915->mm.free_list))) {
+		spin_unlock(&i915->mm.free_lock);
+
+		__i915_gem_free_objects(i915, freed);
+		if (need_resched())
+			return;
+
+		spin_lock(&i915->mm.free_lock);
+	}
+	spin_unlock(&i915->mm.free_lock);
+}
+
+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	/*
+	 * We reuse obj->rcu for the freed list, so we had better not treat
+	 * it like a rcu_head from this point forwards. And we expect all
+	 * objects to be freed via this path.
+	 */
+	destroy_rcu_head(&obj->rcu);
+
+	/*
+	 * Since we require blocking on struct_mutex to unbind the freed
+	 * object from the GPU before releasing resources back to the
+	 * system, we can not do that directly from the RCU callback (which may
+	 * be a softirq context), but must instead then defer that work onto a
+	 * kthread. We use the RCU callback rather than move the freed object
+	 * directly onto the work queue so that we can mix between using the
+	 * worker and performing frees directly from subsequent allocations for
+	 * crude but effective memory throttling.
+	 */
+	if (llist_add(&obj->freed, &i915->mm.free_list))
+		queue_work(i915->wq, &i915->mm.free_work);
+}
+
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	/*
+	 * Before we free the object, make sure any pure RCU-only
+	 * read-side critical sections are complete, e.g.
+	 * i915_gem_busy_ioctl(). For the corresponding synchronized
+	 * lookup see i915_gem_object_lookup_rcu().
+	 */
+	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
+	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	return !(obj->cache_level == I915_CACHE_NONE ||
+		 obj->cache_level == I915_CACHE_WT);
+}
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	assert_object_held(obj);
+
+	if (!(obj->write_domain & flush_domains))
+		return;
+
+	switch (obj->write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		i915_gem_flush_ggtt_writes(dev_priv);
+
+		intel_fb_obj_flush(obj,
+				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+		for_each_ggtt_vma(vma, obj) {
+			if (vma->iomap)
+				continue;
+
+			i915_vma_unset_ggtt_write(vma);
+		}
+		break;
+
+	case I915_GEM_DOMAIN_WC:
+		wmb();
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+
+	case I915_GEM_DOMAIN_RENDER:
+		if (gpu_write_needs_clflush(obj))
+			obj->cache_dirty = true;
+		break;
+	}
+
+	obj->write_domain = 0;
+}
+
+void i915_gem_init__objects(struct drm_i915_private *i915)
+{
+	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+}
+
+static void i915_global_objects_shrink(void)
+{
+	kmem_cache_shrink(global.slab_objects);
+}
+
+static void i915_global_objects_exit(void)
+{
+	kmem_cache_destroy(global.slab_objects);
+}
+
+static struct i915_global_object global = { {
+	.shrink = i915_global_objects_shrink,
+	.exit = i915_global_objects_exit,
+} };
+
+int __init i915_global_objects_init(void)
+{
+	global.slab_objects =
+		KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
+	if (!global.slab_objects)
+		return -ENOMEM;
+
+	i915_global_register(&global.base);
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/huge_gem_object.c"
+#include "selftests/huge_pages.c"
+#include "selftests/i915_gem_object.c"
+#include "selftests/i915_gem_coherency.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
new file mode 100644
index 000000000000..dfebd5706f16
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -0,0 +1,430 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_H__
+#define __I915_GEM_OBJECT_H__
+
+#include <drm/drm_gem.h>
+#include <drm/drm_file.h>
+#include <drm/drm_device.h>
+
+#include <drm/i915_drm.h>
+
+#include "i915_gem_object_types.h"
+
+#include "i915_gem_gtt.h"
+
+void i915_gem_init__objects(struct drm_i915_private *i915);
+
+struct drm_i915_gem_object *i915_gem_object_alloc(void);
+void i915_gem_object_free(struct drm_i915_gem_object *obj);
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
+				       const void *data, size_t size);
+
+extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
+void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				     struct sg_table *pages,
+				     bool needs_clflush);
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align);
+
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
+void i915_gem_free_object(struct drm_gem_object *obj);
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915);
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+
+/**
+ * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
+ * @filp: DRM file private date
+ * @handle: userspace handle
+ *
+ * Returns:
+ *
+ * A pointer to the object named by the handle if such exists on @filp, NULL
+ * otherwise. This object is only valid whilst under the RCU read lock, and
+ * note carefully the object may be in the process of being destroyed.
+ */
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
+{
+#ifdef CONFIG_LOCKDEP
+	WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map));
+#endif
+	return idr_find(&file->object_idr, handle);
+}
+
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup(struct drm_file *file, u32 handle)
+{
+	struct drm_i915_gem_object *obj;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, handle);
+	if (obj && !kref_get_unless_zero(&obj->base.refcount))
+		obj = NULL;
+	rcu_read_unlock();
+
+	return obj;
+}
+
+__deprecated
+extern struct drm_gem_object *
+drm_gem_object_lookup(struct drm_file *file, u32 handle);
+
+__attribute__((nonnull))
+static inline struct drm_i915_gem_object *
+i915_gem_object_get(struct drm_i915_gem_object *obj)
+{
+	drm_gem_object_get(&obj->base);
+	return obj;
+}
+
+__attribute__((nonnull))
+static inline void
+i915_gem_object_put(struct drm_i915_gem_object *obj)
+{
+	__drm_gem_object_put(&obj->base);
+}
+
+#define assert_object_held(obj) reservation_object_assert_held((obj)->base.resv)
+
+static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
+{
+	reservation_object_lock(obj->base.resv, NULL);
+}
+
+static inline int
+i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
+{
+	return reservation_object_lock_interruptible(obj->base.resv, NULL);
+}
+
+static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
+{
+	reservation_object_unlock(obj->base.resv);
+}
+
+struct dma_fence *
+i915_gem_object_lock_fence(struct drm_i915_gem_object *obj);
+void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
+				  struct dma_fence *fence);
+
+static inline void
+i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
+{
+	obj->base.vma_node.readonly = true;
+}
+
+static inline bool
+i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
+{
+	return obj->base.vma_node.readonly;
+}
+
+static inline bool
+i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
+}
+
+static inline bool
+i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
+}
+
+static inline bool
+i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
+}
+
+static inline bool
+i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
+}
+
+static inline bool
+i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
+{
+	return READ_ONCE(obj->active_count);
+}
+
+static inline bool
+i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
+{
+	return READ_ONCE(obj->framebuffer_references);
+}
+
+static inline unsigned int
+i915_gem_object_get_tiling(const struct drm_i915_gem_object *obj)
+{
+	return obj->tiling_and_stride & TILING_MASK;
+}
+
+static inline bool
+i915_gem_object_is_tiled(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
+}
+
+static inline unsigned int
+i915_gem_object_get_stride(const struct drm_i915_gem_object *obj)
+{
+	return obj->tiling_and_stride & STRIDE_MASK;
+}
+
+static inline unsigned int
+i915_gem_tile_height(unsigned int tiling)
+{
+	GEM_BUG_ON(!tiling);
+	return tiling == I915_TILING_Y ? 32 : 8;
+}
+
+static inline unsigned int
+i915_gem_object_get_tile_height(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_tile_height(i915_gem_object_get_tiling(obj));
+}
+
+static inline unsigned int
+i915_gem_object_get_tile_row_size(const struct drm_i915_gem_object *obj)
+{
+	return (i915_gem_object_get_stride(obj) *
+		i915_gem_object_get_tile_height(obj));
+}
+
+int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+			       unsigned int tiling, unsigned int stride);
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n, unsigned int *offset);
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+			 unsigned int n);
+
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n);
+
+dma_addr_t
+i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
+				    unsigned long n,
+				    unsigned int *len);
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n);
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes);
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	might_lock(&obj->mm.lock);
+
+	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
+		return 0;
+
+	return __i915_gem_object_get_pages(obj);
+}
+
+static inline bool
+i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
+{
+	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
+}
+
+static inline void
+__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	atomic_inc(&obj->mm.pages_pin_count);
+}
+
+static inline bool
+i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
+{
+	return atomic_read(&obj->mm.pages_pin_count);
+}
+
+static inline void
+__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	atomic_dec(&obj->mm.pages_pin_count);
+}
+
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_unpin_pages(obj);
+}
+
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
+	I915_MM_NORMAL = 0,
+	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
+};
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
+
+enum i915_map_type {
+	I915_MAP_WB = 0,
+	I915_MAP_WC,
+#define I915_MAP_OVERRIDE BIT(31)
+	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
+	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
+};
+
+/**
+ * i915_gem_object_pin_map - return a contiguous mapping of the entire object
+ * @obj: the object to map into kernel address space
+ * @type: the type of mapping, used to select pgprot_t
+ *
+ * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
+ * pages and then returns a contiguous mapping of the backing storage into
+ * the kernel address space. Based on the @type of mapping, the PTE will be
+ * set to either WriteBack or WriteCombine (via pgprot_t).
+ *
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
+ *
+ * Returns the pointer through which to access the mapped object, or an
+ * ERR_PTR() on error.
+ */
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+					   enum i915_map_type type);
+
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+				 unsigned long offset,
+				 unsigned long size);
+static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_flush_map(obj, 0, obj->base.size);
+}
+
+/**
+ * i915_gem_object_unpin_map - releases an earlier mapping
+ * @obj: the object to unmap
+ *
+ * After pinning the object and mapping its pages, once you are finished
+ * with your access, call i915_gem_object_unpin_map() to release the pin
+ * upon the mapping. Once the pin count reaches zero, that mapping may be
+ * removed.
+ */
+static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains);
+
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush);
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE	BIT(0)
+#define CLFLUSH_AFTER	BIT(1)
+#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_unlock(obj);
+}
+
+static inline struct intel_engine_cs *
+i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
+{
+	struct intel_engine_cs *engine = NULL;
+	struct dma_fence *fence;
+
+	rcu_read_lock();
+	fence = reservation_object_get_excl_rcu(obj->base.resv);
+	rcu_read_unlock();
+
+	if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
+		engine = to_request(fence)->engine;
+	dma_fence_put(fence);
+
+	return engine;
+}
+
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+					 unsigned int cache_level);
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
+
+int __must_check
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+struct i915_vma * __must_check
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
+
+static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	if (obj->cache_dirty)
+		return false;
+
+	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+		return true;
+
+	return obj->pin_global; /* currently in use by HW, keep flushed */
+}
+
+static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	if (cpu_write_needs_clflush(obj))
+		obj->cache_dirty = true;
+}
+
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+			 unsigned int flags,
+			 long timeout);
+int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+				  unsigned int flags,
+				  const struct i915_sched_attr *attr);
+#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
new file mode 100644
index 000000000000..cb42e3a312e2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_object_blt.h"
+
+#include "i915_gem_clflush.h"
+#include "intel_drv.h"
+
+int intel_emit_vma_fill_blt(struct i915_request *rq,
+			    struct i915_vma *vma,
+			    u32 value)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 8);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (INTEL_GEN(rq->i915) >= 8) {
+		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+		*cs++ = 0;
+		*cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+		*cs++ = lower_32_bits(vma->node.start);
+		*cs++ = upper_32_bits(vma->node.start);
+		*cs++ = value;
+		*cs++ = MI_NOOP;
+	} else {
+		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+		*cs++ = 0;
+		*cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+		*cs++ = vma->node.start;
+		*cs++ = value;
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+	}
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
+			     struct intel_context *ce,
+			     u32 value)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_gem_context *ctx = ce->gem_context;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+
+	/* XXX: ce->vm please */
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (unlikely(err))
+		return err;
+
+	if (obj->cache_dirty & ~obj->cache_coherent) {
+		i915_gem_object_lock(obj);
+		i915_gem_clflush_object(obj, 0);
+		i915_gem_object_unlock(obj);
+	}
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin;
+	}
+
+	err = i915_request_await_object(rq, obj, true);
+	if (unlikely(err))
+		goto out_request;
+
+	if (ce->engine->emit_init_breadcrumb) {
+		err = ce->engine->emit_init_breadcrumb(rq);
+		if (unlikely(err))
+			goto out_request;
+	}
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (unlikely(err))
+		goto out_request;
+
+	err = intel_emit_vma_fill_blt(rq, vma, value);
+out_request:
+	if (unlikely(err))
+		i915_request_skip(rq, err);
+
+	i915_request_add(rq);
+out_unpin:
+	i915_vma_unpin(vma);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_object_blt.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
new file mode 100644
index 000000000000..7ec7de6ac0c0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_BLT_H__
+#define __I915_GEM_OBJECT_BLT_H__
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct intel_context;
+struct i915_request;
+struct i915_vma;
+
+int intel_emit_vma_fill_blt(struct i915_request *rq,
+			    struct i915_vma *vma,
+			    u32 value);
+
+int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
+			     struct intel_context *ce,
+			     u32 value);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
new file mode 100644
index 000000000000..18bf4f8d6d80
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -0,0 +1,262 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_TYPES_H__
+#define __I915_GEM_OBJECT_TYPES_H__
+
+#include <drm/drm_gem.h>
+
+#include "i915_active.h"
+#include "i915_selftest.h"
+
+struct drm_i915_gem_object;
+
+/*
+ * struct i915_lut_handle tracks the fast lookups from handle to vma used
+ * for execbuf. Although we use a radixtree for that mapping, in order to
+ * remove them as the object or context is closed, we need a secondary list
+ * and a translation entry (i915_lut_handle).
+ */
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
+struct drm_i915_gem_object_ops {
+	unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
+#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
+
+	/* Interface between the GEM object and its backing storage.
+	 * get_pages() is called once prior to the use of the associated set
+	 * of pages before to binding them into the GTT, and put_pages() is
+	 * called after we no longer need them. As we expect there to be
+	 * associated cost with migrating pages between the backing storage
+	 * and making them available for the GPU (e.g. clflush), we may hold
+	 * onto the pages after they are no longer referenced by the GPU
+	 * in case they may be used again shortly (for example migrating the
+	 * pages to a different memory domain within the GTT). put_pages()
+	 * will therefore most likely be called when the object itself is
+	 * being released or under memory pressure (where we attempt to
+	 * reap pages for the shrinker).
+	 */
+	int (*get_pages)(struct drm_i915_gem_object *obj);
+	void (*put_pages)(struct drm_i915_gem_object *obj,
+			  struct sg_table *pages);
+	void (*truncate)(struct drm_i915_gem_object *obj);
+	void (*writeback)(struct drm_i915_gem_object *obj);
+
+	int (*pwrite)(struct drm_i915_gem_object *obj,
+		      const struct drm_i915_gem_pwrite *arg);
+
+	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
+	void (*release)(struct drm_i915_gem_object *obj);
+};
+
+struct drm_i915_gem_object {
+	struct drm_gem_object base;
+
+	const struct drm_i915_gem_object_ops *ops;
+
+	struct {
+		/**
+		 * @vma.lock: protect the list/tree of vmas
+		 */
+		spinlock_t lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
+
+	/**
+	 * @lut_list: List of vma lookup entries in use for this object.
+	 *
+	 * If this object is closed, we need to remove all of its VMA from
+	 * the fast lookup index in associated contexts; @lut_list provides
+	 * this translation from object to context->handles_vma.
+	 */
+	struct list_head lut_list;
+
+	/** Stolen memory for this object, instead of being backed by shmem. */
+	struct drm_mm_node *stolen;
+	union {
+		struct rcu_head rcu;
+		struct llist_node freed;
+	};
+
+	/**
+	 * Whether the object is currently in the GGTT mmap.
+	 */
+	unsigned int userfault_count;
+	struct list_head userfault_link;
+
+	struct list_head batch_pool_link;
+	I915_SELFTEST_DECLARE(struct list_head st_link);
+
+	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned int cache_level:3;
+	unsigned int cache_coherent:2;
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+	unsigned int cache_dirty:1;
+
+	/**
+	 * @read_domains: Read memory domains.
+	 *
+	 * These monitor which caches contain read/write data related to the
+	 * object. When transitioning from one set of domains to another,
+	 * the driver is called to ensure that caches are suitably flushed and
+	 * invalidated.
+	 */
+	u16 read_domains;
+
+	/**
+	 * @write_domain: Corresponding unique write memory domain.
+	 */
+	u16 write_domain;
+
+	atomic_t frontbuffer_bits;
+	unsigned int frontbuffer_ggtt_origin; /* write once */
+	struct i915_active_request frontbuffer_write;
+
+	/** Current tiling stride for the object, if it's tiled. */
+	unsigned int tiling_and_stride;
+#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
+#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
+#define STRIDE_MASK (~TILING_MASK)
+
+	/** Count of VMA actually bound by this object */
+	atomic_t bind_count;
+	unsigned int active_count;
+	/** Count of how many global VMA are currently pinned for use by HW */
+	unsigned int pin_global;
+
+	struct {
+		struct mutex lock; /* protects the pages and their use */
+		atomic_t pages_pin_count;
+
+		struct sg_table *pages;
+		void *mapping;
+
+		/* TODO: whack some of this into the error state */
+		struct i915_page_sizes {
+			/**
+			 * The sg mask of the pages sg_table. i.e the mask of
+			 * of the lengths for each sg entry.
+			 */
+			unsigned int phys;
+
+			/**
+			 * The gtt page sizes we are allowed to use given the
+			 * sg mask and the supported page sizes. This will
+			 * express the smallest unit we can use for the whole
+			 * object, as well as the larger sizes we may be able
+			 * to use opportunistically.
+			 */
+			unsigned int sg;
+
+			/**
+			 * The actual gtt page size usage. Since we can have
+			 * multiple vma associated with this object we need to
+			 * prevent any trampling of state, hence a copy of this
+			 * struct also lives in each vma, therefore the gtt
+			 * value here should only be read/write through the vma.
+			 */
+			unsigned int gtt;
+		} page_sizes;
+
+		I915_SELFTEST_DECLARE(unsigned int page_mask);
+
+		struct i915_gem_object_page_iter {
+			struct scatterlist *sg_pos;
+			unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+			struct radix_tree_root radix;
+			struct mutex lock; /* protects this cache */
+		} get_page;
+
+		/**
+		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
+		 * locked by i915->mm.obj_lock.
+		 */
+		struct list_head link;
+
+		/**
+		 * Advice: are the backing pages purgeable?
+		 */
+		unsigned int madv:2;
+
+		/**
+		 * This is set if the object has been written to since the
+		 * pages were last acquired.
+		 */
+		bool dirty:1;
+
+		/**
+		 * This is set if the object has been pinned due to unknown
+		 * swizzling.
+		 */
+		bool quirked:1;
+	} mm;
+
+	/** References from framebuffers, locks out tiling changes. */
+	unsigned int framebuffer_references;
+
+	/** Record of address bit 17 of each page at last unbind. */
+	unsigned long *bit_17;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+
+			struct i915_mm_struct *mm;
+			struct i915_mmu_object *mmu_object;
+			struct work_struct *work;
+		} userptr;
+
+		unsigned long scratch;
+
+		void *gvt_info;
+	};
+
+	/** for phys allocated objects */
+	struct drm_dma_handle *phys_handle;
+};
+
+static inline struct drm_i915_gem_object *
+to_intel_bo(struct drm_gem_object *gem)
+{
+	/* Assert that to_intel_bo(NULL) == NULL */
+	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
+
+	return container_of(gem, struct drm_i915_gem_object, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
new file mode 100644
index 000000000000..b36ad269f4ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -0,0 +1,544 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long supported = INTEL_INFO(i915)->page_sizes;
+	int i;
+
+	lockdep_assert_held(&obj->mm.lock);
+
+	/* Make the pages coherent with the GPU (flushing any swapin). */
+	if (obj->cache_dirty) {
+		obj->write_domain = 0;
+		if (i915_gem_object_has_struct_page(obj))
+			drm_clflush_sg(pages);
+		obj->cache_dirty = false;
+	}
+
+	obj->mm.get_page.sg_pos = pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
+
+	obj->mm.pages = pages;
+
+	if (i915_gem_object_is_tiled(obj) &&
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+		GEM_BUG_ON(obj->mm.quirked);
+		__i915_gem_object_pin_pages(obj);
+		obj->mm.quirked = true;
+	}
+
+	GEM_BUG_ON(!sg_page_sizes);
+	obj->mm.page_sizes.phys = sg_page_sizes;
+
+	/*
+	 * Calculate the supported page-sizes which fit into the given
+	 * sg_page_sizes. This will give us the page-sizes which we may be able
+	 * to use opportunistically when later inserting into the GTT. For
+	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
+	 * 64K or 4K pages, although in practice this will depend on a number of
+	 * other factors.
+	 */
+	obj->mm.page_sizes.sg = 0;
+	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		if (obj->mm.page_sizes.phys & ~0u << i)
+			obj->mm.page_sizes.sg |= BIT(i);
+	}
+	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
+
+	if (i915_gem_object_is_shrinkable(obj)) {
+		struct list_head *list;
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+
+		i915->mm.shrink_count++;
+		i915->mm.shrink_memory += obj->base.size;
+
+		if (obj->mm.madv != I915_MADV_WILLNEED)
+			list = &i915->mm.purge_list;
+		else
+			list = &i915->mm.shrink_list;
+		list_add_tail(&obj->mm.link, list);
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+		DRM_DEBUG("Attempting to obtain a purgeable object\n");
+		return -EFAULT;
+	}
+
+	err = obj->ops->get_pages(obj);
+	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
+
+	return err;
+}
+
+/* Ensure that the associated pages are gathered from the backing storage
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
+ * multiple times before they are released by a single call to
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
+ * either as a result of memory pressure (reaping pages under the shrinker)
+ * or as the object is itself released.
+ */
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return err;
+
+	if (unlikely(!i915_gem_object_has_pages(obj))) {
+		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+		err = ____i915_gem_object_get_pages(obj);
+		if (err)
+			goto unlock;
+
+		smp_mb__before_atomic();
+	}
+	atomic_inc(&obj->mm.pages_pin_count);
+
+unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+/* Immediately discard the backing storage */
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+{
+	drm_gem_free_mmap_offset(&obj->base);
+	if (obj->ops->truncate)
+		obj->ops->truncate(obj);
+}
+
+/* Try to discard unwanted pages */
+void i915_gem_object_writeback(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->mm.lock);
+	GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+	if (obj->ops->writeback)
+		obj->ops->writeback(obj);
+}
+
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+	rcu_read_unlock();
+}
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *pages;
+
+	pages = fetch_and_zero(&obj->mm.pages);
+	if (IS_ERR_OR_NULL(pages))
+		return pages;
+
+	if (i915_gem_object_is_shrinkable(obj)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+
+		list_del(&obj->mm.link);
+		i915->mm.shrink_count--;
+		i915->mm.shrink_memory -= obj->base.size;
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+
+	if (obj->mm.mapping) {
+		void *ptr;
+
+		ptr = page_mask_bits(obj->mm.mapping);
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		obj->mm.mapping = NULL;
+	}
+
+	__i915_gem_object_reset_page_iter(obj);
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+	return pages;
+}
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (i915_gem_object_has_pinned_pages(obj))
+		return -EBUSY;
+
+	GEM_BUG_ON(atomic_read(&obj->bind_count));
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	mutex_lock_nested(&obj->mm.lock, subclass);
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	/*
+	 * ->put_pages might need to allocate memory for the bit17 swizzle
+	 * array, hence protect them from being reaped by removing them from gtt
+	 * lists early.
+	 */
+	pages = __i915_gem_object_unset_pages(obj);
+
+	/*
+	 * XXX Temporary hijinx to avoid updating all backends to handle
+	 * NULL pages. In the future, when we have more asynchronous
+	 * get_pages backends we should be better able to handle the
+	 * cancellation of the async task in a more uniform manner.
+	 */
+	if (!pages && !i915_gem_object_needs_async_cancel(obj))
+		pages = ERR_PTR(-EINVAL);
+
+	if (!IS_ERR(pages))
+		obj->ops->put_pages(obj, pages);
+
+	err = 0;
+unlock:
+	mutex_unlock(&obj->mm.lock);
+
+	return err;
+}
+
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+				 enum i915_map_type type)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *sgt = obj->mm.pages;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	struct page *stack_pages[32];
+	struct page **pages = stack_pages;
+	unsigned long i = 0;
+	pgprot_t pgprot;
+	void *addr;
+
+	/* A single page can always be kmapped */
+	if (n_pages == 1 && type == I915_MAP_WB)
+		return kmap(sg_page(sgt->sgl));
+
+	if (n_pages > ARRAY_SIZE(stack_pages)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
+			return NULL;
+	}
+
+	for_each_sgt_page(page, sgt_iter, sgt)
+		pages[i++] = page;
+
+	/* Check that we have the expected number of pages */
+	GEM_BUG_ON(i != n_pages);
+
+	switch (type) {
+	default:
+		MISSING_CASE(type);
+		/* fallthrough to use PAGE_KERNEL anyway */
+	case I915_MAP_WB:
+		pgprot = PAGE_KERNEL;
+		break;
+	case I915_MAP_WC:
+		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+		break;
+	}
+	addr = vmap(pages, n_pages, 0, pgprot);
+
+	if (pages != stack_pages)
+		kvfree(pages);
+
+	return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+			      enum i915_map_type type)
+{
+	enum i915_map_type has_type;
+	bool pinned;
+	void *ptr;
+	int err;
+
+	if (unlikely(!i915_gem_object_has_struct_page(obj)))
+		return ERR_PTR(-ENXIO);
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return ERR_PTR(err);
+
+	pinned = !(type & I915_MAP_OVERRIDE);
+	type &= ~I915_MAP_OVERRIDE;
+
+	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+		if (unlikely(!i915_gem_object_has_pages(obj))) {
+			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+			err = ____i915_gem_object_get_pages(obj);
+			if (err)
+				goto err_unlock;
+
+			smp_mb__before_atomic();
+		}
+		atomic_inc(&obj->mm.pages_pin_count);
+		pinned = false;
+	}
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (ptr && has_type != type) {
+		if (pinned) {
+			err = -EBUSY;
+			goto err_unpin;
+		}
+
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		ptr = obj->mm.mapping = NULL;
+	}
+
+	if (!ptr) {
+		ptr = i915_gem_object_map(obj, type);
+		if (!ptr) {
+			err = -ENOMEM;
+			goto err_unpin;
+		}
+
+		obj->mm.mapping = page_pack_bits(ptr, type);
+	}
+
+out_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return ptr;
+
+err_unpin:
+	atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+	ptr = ERR_PTR(err);
+	goto out_unlock;
+}
+
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+				 unsigned long offset,
+				 unsigned long size)
+{
+	enum i915_map_type has_type;
+	void *ptr;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
+				     offset, size, obj->base.size));
+
+	obj->mm.dirty = true;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
+		return;
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (has_type == I915_MAP_WC)
+		return;
+
+	drm_clflush_virt_range(ptr + offset, size);
+	if (size == obj->base.size) {
+		obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
+		obj->cache_dirty = false;
+	}
+}
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n,
+		       unsigned int *offset)
+{
+	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+	struct scatterlist *sg;
+	unsigned int idx, count;
+
+	might_sleep();
+	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	/* As we iterate forward through the sg, we record each entry in a
+	 * radixtree for quick repeated (backwards) lookups. If we have seen
+	 * this index previously, we will have an entry for it.
+	 *
+	 * Initial lookup is O(N), but this is amortized to O(1) for
+	 * sequential page access (where each new request is consecutive
+	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
+	 * i.e. O(1) with a large constant!
+	 */
+	if (n < READ_ONCE(iter->sg_idx))
+		goto lookup;
+
+	mutex_lock(&iter->lock);
+
+	/* We prefer to reuse the last sg so that repeated lookup of this
+	 * (or the subsequent) sg are fast - comparing against the last
+	 * sg is faster than going through the radixtree.
+	 */
+
+	sg = iter->sg_pos;
+	idx = iter->sg_idx;
+	count = __sg_page_count(sg);
+
+	while (idx + count <= n) {
+		void *entry;
+		unsigned long i;
+		int ret;
+
+		/* If we cannot allocate and insert this entry, or the
+		 * individual pages from this range, cancel updating the
+		 * sg_idx so that on this lookup we are forced to linearly
+		 * scan onwards, but on future lookups we will try the
+		 * insertion again (in which case we need to be careful of
+		 * the error return reporting that we have already inserted
+		 * this index).
+		 */
+		ret = radix_tree_insert(&iter->radix, idx, sg);
+		if (ret && ret != -EEXIST)
+			goto scan;
+
+		entry = xa_mk_value(idx);
+		for (i = 1; i < count; i++) {
+			ret = radix_tree_insert(&iter->radix, idx + i, entry);
+			if (ret && ret != -EEXIST)
+				goto scan;
+		}
+
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+scan:
+	iter->sg_pos = sg;
+	iter->sg_idx = idx;
+
+	mutex_unlock(&iter->lock);
+
+	if (unlikely(n < idx)) /* insertion completed by another thread */
+		goto lookup;
+
+	/* In case we failed to insert the entry into the radixtree, we need
+	 * to look beyond the current sg.
+	 */
+	while (idx + count <= n) {
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+	*offset = n - idx;
+	return sg;
+
+lookup:
+	rcu_read_lock();
+
+	sg = radix_tree_lookup(&iter->radix, n);
+	GEM_BUG_ON(!sg);
+
+	/* If this index is in the middle of multi-page sg entry,
+	 * the radix tree will contain a value entry that points
+	 * to the start of that range. We will return the pointer to
+	 * the base page and the offset of this page within the
+	 * sg entry's range.
+	 */
+	*offset = 0;
+	if (unlikely(xa_is_value(sg))) {
+		unsigned long base = xa_to_value(sg);
+
+		sg = radix_tree_lookup(&iter->radix, base);
+		GEM_BUG_ON(!sg);
+
+		*offset = n - base;
+	}
+
+	rcu_read_unlock();
+
+	return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n)
+{
+	struct page *page;
+
+	page = i915_gem_object_get_page(obj, n);
+	if (!obj->mm.dirty)
+		set_page_dirty(page);
+
+	return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
+				    unsigned long n,
+				    unsigned int *len)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+
+	if (len)
+		*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
+
+	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n)
+{
+	return i915_gem_object_get_dma_address_len(obj, n, NULL);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
new file mode 100644
index 000000000000..2deac933cf59
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -0,0 +1,212 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/shmem_fs.h>
+#include <linux/swap.h>
+
+#include <drm/drm.h> /* for drm_legacy.h! */
+#include <drm/drm_cache.h>
+#include <drm/drm_legacy.h> /* for drm_pci.h! */
+#include <drm/drm_pci.h>
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+
+static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	struct drm_dma_handle *phys;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	char *vaddr;
+	int i;
+	int err;
+
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	/* Always aligning to the object size, allows a single allocation
+	 * to handle all possible callers, and given typical object sizes,
+	 * the alignment of the buddy allocation will naturally match.
+	 */
+	phys = drm_pci_alloc(obj->base.dev,
+			     roundup_pow_of_two(obj->base.size),
+			     roundup_pow_of_two(obj->base.size));
+	if (!phys)
+		return -ENOMEM;
+
+	vaddr = phys->vaddr;
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto err_phys;
+		}
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		put_page(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(to_i915(obj->base.dev));
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st) {
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_dma_address(sg) = phys->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->phys_handle = phys;
+
+	__i915_gem_object_set_pages(obj, st, sg->length);
+
+	return 0;
+
+err_phys:
+	drm_pci_free(obj->base.dev, phys);
+
+	return err;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
+{
+	__i915_gem_object_release_shmem(obj, pages, false);
+
+	if (obj->mm.dirty) {
+		struct address_space *mapping = obj->base.filp->f_mapping;
+		char *vaddr = obj->phys_handle->vaddr;
+		int i;
+
+		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->mm.madv == I915_MADV_WILLNEED)
+				mark_page_accessed(page);
+			put_page(page);
+			vaddr += PAGE_SIZE;
+		}
+		obj->mm.dirty = false;
+	}
+
+	sg_free_table(pages);
+	kfree(pages);
+
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (align > obj->base.size)
+		return -EINVAL;
+
+	if (obj->ops == &i915_gem_phys_ops)
+		return 0;
+
+	if (obj->ops != &i915_gem_shmem_ops)
+		return -EINVAL;
+
+	err = i915_gem_object_unbind(obj);
+	if (err)
+		return err;
+
+	mutex_lock(&obj->mm.lock);
+
+	if (obj->mm.madv != I915_MADV_WILLNEED) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.quirked) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.mapping) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
+
+	pages = __i915_gem_object_unset_pages(obj);
+
+	obj->ops = &i915_gem_phys_ops;
+
+	err = ____i915_gem_object_get_pages(obj);
+	if (err)
+		goto err_xfer;
+
+	/* Perma-pin (until release) the physical set of pages */
+	__i915_gem_object_pin_pages(obj);
+
+	if (!IS_ERR_OR_NULL(pages))
+		i915_gem_shmem_ops.put_pages(obj, pages);
+	mutex_unlock(&obj->mm.lock);
+	return 0;
+
+err_xfer:
+	obj->ops = &i915_gem_shmem_ops;
+	if (!IS_ERR_OR_NULL(pages)) {
+		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+	}
+err_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_phys.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
new file mode 100644
index 000000000000..05011d4a3b88
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -0,0 +1,294 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt_pm.h"
+
+#include "i915_drv.h"
+#include "i915_globals.h"
+
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct i915_active_request *active =
+			container_of((struct list_head *)node,
+				     typeof(*active), link);
+
+		INIT_LIST_HEAD(&active->link);
+		RCU_INIT_POINTER(active->request, NULL);
+
+		active->retire(active, NULL);
+	}
+}
+
+static void i915_gem_park(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		call_idle_barriers(engine); /* cleanup after wedging */
+		i915_gem_batch_pool_fini(&engine->batch_pool);
+	}
+
+	i915_timelines_park(i915);
+	i915_vma_parked(i915);
+
+	i915_globals_park();
+}
+
+static void idle_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gem.idle_work);
+	bool park;
+
+	cancel_delayed_work_sync(&i915->gem.retire_work);
+	mutex_lock(&i915->drm.struct_mutex);
+
+	intel_wakeref_lock(&i915->gt.wakeref);
+	park = !intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work);
+	intel_wakeref_unlock(&i915->gt.wakeref);
+	if (park)
+		i915_gem_park(i915);
+	else
+		queue_delayed_work(i915->wq,
+				   &i915->gem.retire_work,
+				   round_jiffies_up_relative(HZ));
+
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gem.retire_work.work);
+
+	/* Come back later if the device is busy... */
+	if (mutex_trylock(&i915->drm.struct_mutex)) {
+		i915_retire_requests(i915);
+		mutex_unlock(&i915->drm.struct_mutex);
+	}
+
+	queue_delayed_work(i915->wq,
+			   &i915->gem.retire_work,
+			   round_jiffies_up_relative(HZ));
+}
+
+static int pm_notifier(struct notifier_block *nb,
+		       unsigned long action,
+		       void *data)
+{
+	struct drm_i915_private *i915 =
+		container_of(nb, typeof(*i915), gem.pm_notifier);
+
+	switch (action) {
+	case INTEL_GT_UNPARK:
+		i915_globals_unpark();
+		queue_delayed_work(i915->wq,
+				   &i915->gem.retire_work,
+				   round_jiffies_up_relative(HZ));
+		break;
+
+	case INTEL_GT_PARK:
+		queue_work(i915->wq, &i915->gem.idle_work);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
+{
+	bool result = !i915_terminally_wedged(i915);
+
+	do {
+		if (i915_gem_wait_for_idle(i915,
+					   I915_WAIT_LOCKED |
+					   I915_WAIT_FOR_IDLE_BOOST,
+					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+			/* XXX hide warning from gem_eio */
+			if (i915_modparams.reset) {
+				dev_err(i915->drm.dev,
+					"Failed to idle engines, declaring wedged!\n");
+				GEM_TRACE_DUMP();
+			}
+
+			/*
+			 * Forcibly cancel outstanding work and leave
+			 * the gpu quiet.
+			 */
+			i915_gem_set_wedged(i915);
+			result = false;
+		}
+	} while (i915_retire_requests(i915) && result);
+
+	GEM_BUG_ON(i915->gt.awake);
+	return result;
+}
+
+bool i915_gem_load_power_context(struct drm_i915_private *i915)
+{
+	return switch_to_kernel_context_sync(i915);
+}
+
+void i915_gem_suspend(struct drm_i915_private *i915)
+{
+	GEM_TRACE("\n");
+
+	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
+	flush_workqueue(i915->wq);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	/*
+	 * We have to flush all the executing contexts to main memory so
+	 * that they can saved in the hibernation image. To ensure the last
+	 * context image is coherent, we have to switch away from it. That
+	 * leaves the i915->kernel_context still active when
+	 * we actually suspend, and its image in memory may not match the GPU
+	 * state. Fortunately, the kernel_context is disposable and we do
+	 * not rely on its state.
+	 */
+	switch_to_kernel_context_sync(i915);
+
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	/*
+	 * Assert that we successfully flushed all the work and
+	 * reset the GPU back to its idle, low power state.
+	 */
+	GEM_BUG_ON(i915->gt.awake);
+	flush_work(&i915->gem.idle_work);
+
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+	i915_gem_drain_freed_objects(i915);
+
+	intel_uc_suspend(i915);
+}
+
+static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
+{
+	return list_first_entry_or_null(list,
+					struct drm_i915_gem_object,
+					mm.link);
+}
+
+void i915_gem_suspend_late(struct drm_i915_private *i915)
+{
+	struct drm_i915_gem_object *obj;
+	struct list_head *phases[] = {
+		&i915->mm.shrink_list,
+		&i915->mm.purge_list,
+		NULL
+	}, **phase;
+	unsigned long flags;
+
+	/*
+	 * Neither the BIOS, ourselves or any other kernel
+	 * expects the system to be in execlists mode on startup,
+	 * so we need to reset the GPU back to legacy mode. And the only
+	 * known way to disable logical contexts is through a GPU reset.
+	 *
+	 * So in order to leave the system in a known default configuration,
+	 * always reset the GPU upon unload and suspend. Afterwards we then
+	 * clean up the GEM state tracking, flushing off the requests and
+	 * leaving the system in a known idle state.
+	 *
+	 * Note that is of the upmost importance that the GPU is idle and
+	 * all stray writes are flushed *before* we dismantle the backing
+	 * storage for the pinned objects.
+	 *
+	 * However, since we are uncertain that resetting the GPU on older
+	 * machines is a good idea, we don't - just in case it leaves the
+	 * machine in an unusable condition.
+	 */
+
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	for (phase = phases; *phase; phase++) {
+		LIST_HEAD(keep);
+
+		while ((obj = first_mm_object(*phase))) {
+			list_move_tail(&obj->mm.link, &keep);
+
+			/* Beware the background _i915_gem_free_objects */
+			if (!kref_get_unless_zero(&obj->base.refcount))
+				continue;
+
+			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+
+			i915_gem_object_lock(obj);
+			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+			i915_gem_object_unlock(obj);
+			i915_gem_object_put(obj);
+
+			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		}
+
+		list_splice_tail(&keep, *phase);
+	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+
+	intel_uc_sanitize(i915);
+	i915_gem_sanitize(i915);
+}
+
+void i915_gem_resume(struct drm_i915_private *i915)
+{
+	GEM_TRACE("\n");
+
+	WARN_ON(i915->gt.awake);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+
+	i915_gem_restore_gtt_mappings(i915);
+	i915_gem_restore_fences(i915);
+
+	/*
+	 * As we didn't flush the kernel context before suspend, we cannot
+	 * guarantee that the context image is complete. So let's just reset
+	 * it and start again.
+	 */
+	intel_gt_resume(i915);
+
+	if (i915_gem_init_hw(i915))
+		goto err_wedged;
+
+	intel_uc_resume(i915);
+
+	/* Always reload a context for powersaving. */
+	if (!i915_gem_load_power_context(i915))
+		goto err_wedged;
+
+out_unlock:
+	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return;
+
+err_wedged:
+	if (!i915_reset_failed(i915)) {
+		dev_err(i915->drm.dev,
+			"Failed to re-initialize GPU, declaring it wedged!\n");
+		i915_gem_set_wedged(i915);
+	}
+	goto out_unlock;
+}
+
+void i915_gem_init__pm(struct drm_i915_private *i915)
+{
+	INIT_WORK(&i915->gem.idle_work, idle_work_handler);
+	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
+
+	i915->gem.pm_notifier.notifier_call = pm_notifier;
+	blocking_notifier_chain_register(&i915->gt.pm_notifications,
+					 &i915->gem.pm_notifier);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
new file mode 100644
index 000000000000..6f7d5d11ac3b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
@@ -0,0 +1,25 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_PM_H__
+#define __I915_GEM_PM_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct work_struct;
+
+void i915_gem_init__pm(struct drm_i915_private *i915);
+
+bool i915_gem_load_power_context(struct drm_i915_private *i915);
+void i915_gem_resume(struct drm_i915_private *i915);
+
+void i915_gem_idle_work_handler(struct work_struct *work);
+
+void i915_gem_suspend(struct drm_i915_private *i915);
+void i915_gem_suspend_late(struct drm_i915_private *i915);
+
+#endif /* __I915_GEM_PM_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
new file mode 100644
index 000000000000..19d9ecdb2894
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -0,0 +1,571 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+
+/*
+ * Move pages to appropriate lru and release the pagevec, decrementing the
+ * ref count of those pages.
+ */
+static void check_release_pagevec(struct pagevec *pvec)
+{
+	check_move_unevictable_pages(pvec);
+	__pagevec_release(pvec);
+	cond_resched();
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	unsigned long i;
+	struct address_space *mapping;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	unsigned long last_pfn = 0;	/* suppress gcc warning */
+	unsigned int max_segment = i915_sg_segment_size();
+	unsigned int sg_page_sizes;
+	struct pagevec pvec;
+	gfp_t noreclaim;
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+	/*
+	 * If there's no chance of allocating enough pages for the whole
+	 * object, bail early.
+	 */
+	if (page_count > totalram_pages())
+		return -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+rebuild_st:
+	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Get the list of pages out of our struct file.  They'll be pinned
+	 * at this point until we release them.
+	 *
+	 * Fail silently without starting the shrinker
+	 */
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_unevictable(mapping);
+	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+	for (i = 0; i < page_count; i++) {
+		const unsigned int shrink[] = {
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
+			cond_resched();
+			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+			if (!IS_ERR(page))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(page);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(i915, 2 * page_count, NULL, *s++);
+
+			/*
+			 * We've tried hard to allocate the memory by reaping
+			 * our own buffer, now let the real VM do its job and
+			 * go down in flames if truly OOM.
+			 *
+			 * However, since graphics tend to be disposable,
+			 * defer the oom here by reporting the ENOMEM back
+			 * to userspace.
+			 */
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
+
+				/*
+				 * Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want __GFP_RETRY_MAYFAIL.
+				 */
+				gfp |= __GFP_RETRY_MAYFAIL;
+			}
+		} while (1);
+
+		if (!i ||
+		    sg->length >= max_segment ||
+		    page_to_pfn(page) != last_pfn + 1) {
+			if (i) {
+				sg_page_sizes |= sg->length;
+				sg = sg_next(sg);
+			}
+			st->nents++;
+			sg_set_page(sg, page, PAGE_SIZE, 0);
+		} else {
+			sg->length += PAGE_SIZE;
+		}
+		last_pfn = page_to_pfn(page);
+
+		/* Check that the i965g/gm workaround works. */
+		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
+	}
+	if (sg) { /* loop terminated early; short sg table */
+		sg_page_sizes |= sg->length;
+		sg_mark_end(sg);
+	}
+
+	/* Trim unused sg entries to avoid wasting memory. */
+	i915_sg_trim(st);
+
+	ret = i915_gem_gtt_prepare_pages(obj, st);
+	if (ret) {
+		/*
+		 * DMA remapping failed? One possible cause is that
+		 * it could not reserve enough large entries, asking
+		 * for PAGE_SIZE chunks instead may be helpful.
+		 */
+		if (max_segment > PAGE_SIZE) {
+			for_each_sgt_page(page, sgt_iter, st)
+				put_page(page);
+			sg_free_table(st);
+
+			max_segment = PAGE_SIZE;
+			goto rebuild_st;
+		} else {
+			dev_warn(&i915->drm.pdev->dev,
+				 "Failed to DMA remap %lu pages\n",
+				 page_count);
+			goto err_pages;
+		}
+	}
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_do_bit_17_swizzle(obj, st);
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+
+err_sg:
+	sg_mark_end(sg);
+err_pages:
+	mapping_clear_unevictable(mapping);
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, st) {
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	sg_free_table(st);
+	kfree(st);
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ret;
+}
+
+static void
+shmem_truncate(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * Our goal here is to return as much of the memory as
+	 * is possible back to the system as we are called from OOM.
+	 * To do this we must instruct the shmfs to drop all of its
+	 * backing pages, *now*.
+	 */
+	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
+	obj->mm.madv = __I915_MADV_PURGED;
+	obj->mm.pages = ERR_PTR(-EFAULT);
+}
+
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_NONE,
+		.nr_to_write = SWAP_CLUSTER_MAX,
+		.range_start = 0,
+		.range_end = LLONG_MAX,
+		.for_reclaim = 1,
+	};
+	unsigned long i;
+
+	/*
+	 * Leave mmapings intact (GTT will have been revoked on unbinding,
+	 * leaving only CPU mmapings around) and add those pages to the LRU
+	 * instead of invoking writeback so they are aged and paged out
+	 * as normal.
+	 */
+	mapping = obj->base.filp->f_mapping;
+
+	/* Begin writeback on each dirty page */
+	for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+		struct page *page;
+
+		page = find_lock_entry(mapping, i);
+		if (!page || xa_is_value(page))
+			continue;
+
+		if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
+			int ret;
+
+			SetPageReclaim(page);
+			ret = mapping->a_ops->writepage(page, &wbc);
+			if (!PageWriteback(page))
+				ClearPageReclaim(page);
+			if (!ret)
+				goto put;
+		}
+		unlock_page(page);
+put:
+		put_page(page);
+	}
+}
+
+void
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				struct sg_table *pages,
+				bool needs_clflush)
+{
+	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
+
+	if (obj->mm.madv == I915_MADV_DONTNEED)
+		obj->mm.dirty = false;
+
+	if (needs_clflush &&
+	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
+	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+		drm_clflush_sg(pages);
+
+	__start_cpu_write(obj);
+}
+
+static void
+shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
+{
+	struct sgt_iter sgt_iter;
+	struct pagevec pvec;
+	struct page *page;
+
+	__i915_gem_object_release_shmem(obj, pages, true);
+
+	i915_gem_gtt_finish_pages(obj, pages);
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_save_bit_17_swizzle(obj, pages);
+
+	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
+
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, pages) {
+		if (obj->mm.dirty)
+			set_page_dirty(page);
+
+		if (obj->mm.madv == I915_MADV_WILLNEED)
+			mark_page_accessed(page);
+
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	obj->mm.dirty = false;
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static int
+shmem_pwrite(struct drm_i915_gem_object *obj,
+	     const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Caller already validated user args */
+	GEM_BUG_ON(!access_ok(user_data, arg->size));
+
+	/*
+	 * Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using a write into
+	 * the pagecache. We avoid the penalty of instantiating all the
+	 * pages, important if the user is just writing to a few and never
+	 * uses the object on the GPU, and using a direct write into shmemfs
+	 * allows it to avoid the cost of retrieving a page (either swapin
+	 * or clearing-before-use) before it is overwritten.
+	 */
+	if (i915_gem_object_has_pages(obj))
+		return -ENODEV;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return -EFAULT;
+
+	/*
+	 * Before the pages are instantiated the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's prerogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+		char c;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		/* Prefault the user page to reduce potential recursion */
+		err = __get_user(c, user_data);
+		if (err)
+			return err;
+
+		err = __get_user(c, user_data + len - 1);
+		if (err)
+			return err;
+
+		err = pagecache_write_begin(obj->base.filp, mapping,
+					    offset, len, 0,
+					    &page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap_atomic(page);
+		unwritten = __copy_from_user_inatomic(vaddr + pg,
+						      user_data,
+						      len);
+		kunmap_atomic(vaddr);
+
+		err = pagecache_write_end(obj->base.filp, mapping,
+					  offset, len, len - unwritten,
+					  page, data);
+		if (err < 0)
+			return err;
+
+		/* We don't handle -EFAULT, leave it to the caller to check */
+		if (unwritten)
+			return -ENODEV;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
+const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE,
+
+	.get_pages = shmem_get_pages,
+	.put_pages = shmem_put_pages,
+	.truncate = shmem_truncate,
+	.writeback = shmem_writeback,
+
+	.pwrite = shmem_pwrite,
+};
+
+static int create_shmem(struct drm_i915_private *i915,
+			struct drm_gem_object *obj,
+			size_t size)
+{
+	unsigned long flags = VM_NORESERVE;
+	struct file *filp;
+
+	drm_gem_private_object_init(&i915->drm, obj, size);
+
+	if (i915->mm.gemfs)
+		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
+						 flags);
+	else
+		filp = shmem_file_setup("i915", size, flags);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	obj->filp = filp;
+	return 0;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
+{
+	struct drm_i915_gem_object *obj;
+	struct address_space *mapping;
+	unsigned int cache_level;
+	gfp_t mask;
+	int ret;
+
+	/* There is a prevalence of the assumption that we fit the object's
+	 * page count inside a 32bit _signed_ variable. Let's document this and
+	 * catch if we ever need to fix it. In the meantime, if you do spot
+	 * such a local variable, please consider fixing!
+	 */
+	if (size >> PAGE_SHIFT > INT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	ret = create_shmem(i915, &obj->base, size);
+	if (ret)
+		goto fail;
+
+	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		mask &= ~__GFP_HIGHMEM;
+		mask |= __GFP_DMA32;
+	}
+
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+	i915_gem_object_init(obj, &i915_gem_shmem_ops);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+
+	if (HAS_LLC(i915))
+		/* On some devices, we can have the GPU use the LLC (the CPU
+		 * cache) for about a 10% performance improvement
+		 * compared to uncached.  Graphics requests other than
+		 * display scanout are coherent with the CPU in
+		 * accessing this cache.  This means in this mode we
+		 * don't need to clflush on the CPU side, and on the
+		 * GPU side we only need to flush internal caches to
+		 * get data visible to the CPU.
+		 *
+		 * However, we maintain the display planes as UC, and so
+		 * need to rebind when first used as such.
+		 */
+		cache_level = I915_CACHE_LLC;
+	else
+		cache_level = I915_CACHE_NONE;
+
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	trace_i915_gem_object_create(obj);
+
+	return obj;
+
+fail:
+	i915_gem_object_free(obj);
+	return ERR_PTR(ret);
+}
+
+/* Allocate a new GEM object and fill it with the supplied data */
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
+				       const void *data, size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	struct file *file;
+	size_t offset;
+	int err;
+
+	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
+	if (IS_ERR(obj))
+		return obj;
+
+	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
+
+	file = obj->base.filp;
+	offset = 0;
+	do {
+		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
+		struct page *page;
+		void *pgdata, *vaddr;
+
+		err = pagecache_write_begin(file, file->f_mapping,
+					    offset, len, 0,
+					    &page, &pgdata);
+		if (err < 0)
+			goto fail;
+
+		vaddr = kmap(page);
+		memcpy(vaddr, data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(file, file->f_mapping,
+					  offset, len, len,
+					  page, pgdata);
+		if (err < 0)
+			goto fail;
+
+		size -= len;
+		data += len;
+		offset += len;
+	} while (size);
+
+	return obj;
+
+fail:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
new file mode 100644
index 000000000000..3a926a8755c6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -0,0 +1,535 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2015 Intel Corporation
+ */
+
+#include <linux/oom.h>
+#include <linux/sched/mm.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include <linux/vmalloc.h>
+#include <drm/i915_drm.h>
+
+#include "i915_trace.h"
+
+static bool shrinker_lock(struct drm_i915_private *i915,
+			  unsigned int flags,
+			  bool *unlock)
+{
+	struct mutex *m = &i915->drm.struct_mutex;
+
+	switch (mutex_trylock_recursive(m)) {
+	case MUTEX_TRYLOCK_RECURSIVE:
+		*unlock = false;
+		return true;
+
+	case MUTEX_TRYLOCK_FAILED:
+		*unlock = false;
+		if (flags & I915_SHRINK_ACTIVE &&
+		    mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0)
+			*unlock = true;
+		return *unlock;
+
+	case MUTEX_TRYLOCK_SUCCESS:
+		*unlock = true;
+		return true;
+	}
+
+	BUG();
+}
+
+static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
+{
+	if (!unlock)
+		return;
+
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
+static bool swap_available(void)
+{
+	return get_nr_swap_pages() > 0;
+}
+
+static bool can_release_pages(struct drm_i915_gem_object *obj)
+{
+	/* Consider only shrinkable ojects. */
+	if (!i915_gem_object_is_shrinkable(obj))
+		return false;
+
+	/* Only report true if by unbinding the object and putting its pages
+	 * we can actually make forward progress towards freeing physical
+	 * pages.
+	 *
+	 * If the pages are pinned for any other reason than being bound
+	 * to the GPU, simply unbinding from the GPU is not going to succeed
+	 * in releasing our pin count on the pages themselves.
+	 */
+	if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
+		return false;
+
+	/* If any vma are "permanently" pinned, it will prevent us from
+	 * reclaiming the obj->mm.pages. We only allow scanout objects to claim
+	 * a permanent pin, along with a few others like the context objects.
+	 * To simplify the scan, and to avoid walking the list of vma under the
+	 * object, we just check the count of its permanently pinned.
+	 */
+	if (READ_ONCE(obj->pin_global))
+		return false;
+
+	/* We can only return physical pages to the system if we can either
+	 * discard the contents (because the user has marked them as being
+	 * purgeable) or if we can move their contents out to swap.
+	 */
+	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
+}
+
+static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
+{
+	if (i915_gem_object_unbind(obj) == 0)
+		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+	return !i915_gem_object_has_pages(obj);
+}
+
+static void try_to_writeback(struct drm_i915_gem_object *obj,
+			     unsigned int flags)
+{
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		i915_gem_object_truncate(obj);
+	case __I915_MADV_PURGED:
+		return;
+	}
+
+	if (flags & I915_SHRINK_WRITEBACK)
+		i915_gem_object_writeback(obj);
+}
+
+/**
+ * i915_gem_shrink - Shrink buffer object caches
+ * @i915: i915 device
+ * @target: amount of memory to make available, in pages
+ * @nr_scanned: optional output for number of pages scanned (incremental)
+ * @shrink: control flags for selecting cache types
+ *
+ * This function is the main interface to the shrinker. It will try to release
+ * up to @target pages of main memory backing storage from buffer objects.
+ * Selection of the specific caches can be done with @flags. This is e.g. useful
+ * when purgeable objects should be removed from caches preferentially.
+ *
+ * Note that it's not guaranteed that released amount is actually available as
+ * free system memory - the pages might still be in-used to due to other reasons
+ * (like cpu mmaps) or the mm core has reused them before we could grab them.
+ * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to
+ * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all().
+ *
+ * Also note that any kind of pinning (both per-vma address space pins and
+ * backing storage pins at the buffer object level) result in the shrinker code
+ * having to skip the object.
+ *
+ * Returns:
+ * The number of pages of backing storage actually released.
+ */
+unsigned long
+i915_gem_shrink(struct drm_i915_private *i915,
+		unsigned long target,
+		unsigned long *nr_scanned,
+		unsigned int shrink)
+{
+	const struct {
+		struct list_head *list;
+		unsigned int bit;
+	} phases[] = {
+		{ &i915->mm.purge_list, ~0u },
+		{
+			&i915->mm.shrink_list,
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND
+		},
+		{ NULL, 0 },
+	}, *phase;
+	intel_wakeref_t wakeref = 0;
+	unsigned long count = 0;
+	unsigned long scanned = 0;
+	bool unlock;
+
+	if (!shrinker_lock(i915, shrink, &unlock))
+		return 0;
+
+	/*
+	 * When shrinking the active list, we should also consider active
+	 * contexts. Active contexts are pinned until they are retired, and
+	 * so can not be simply unbound to retire and unpin their pages. To
+	 * shrink the contexts, we must wait until the gpu is idle and
+	 * completed its switch to the kernel context. In short, we do
+	 * not have a good mechanism for idling a specific context.
+	 */
+
+	trace_i915_gem_shrink(i915, target, shrink);
+	i915_retire_requests(i915);
+
+	/*
+	 * Unbinding of objects will require HW access; Let us not wake the
+	 * device just to recover a little memory. If absolutely necessary,
+	 * we will force the wake during oom-notifier.
+	 */
+	if (shrink & I915_SHRINK_BOUND) {
+		wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
+		if (!wakeref)
+			shrink &= ~I915_SHRINK_BOUND;
+	}
+
+	/*
+	 * As we may completely rewrite the (un)bound list whilst unbinding
+	 * (due to retiring requests) we have to strictly process only
+	 * one element of the list at the time, and recheck the list
+	 * on every iteration.
+	 *
+	 * In particular, we must hold a reference whilst removing the
+	 * object as we may end up waiting for and/or retiring the objects.
+	 * This might release the final reference (held by the active list)
+	 * and result in the object being freed from under us. This is
+	 * similar to the precautions the eviction code must take whilst
+	 * removing objects.
+	 *
+	 * Also note that although these lists do not hold a reference to
+	 * the object we can safely grab one here: The final object
+	 * unreferencing and the bound_list are both protected by the
+	 * dev->struct_mutex and so we won't ever be able to observe an
+	 * object on the bound_list with a reference count equals 0.
+	 */
+	for (phase = phases; phase->list; phase++) {
+		struct list_head still_in_list;
+		struct drm_i915_gem_object *obj;
+		unsigned long flags;
+
+		if ((shrink & phase->bit) == 0)
+			continue;
+
+		INIT_LIST_HEAD(&still_in_list);
+
+		/*
+		 * We serialize our access to unreferenced objects through
+		 * the use of the struct_mutex. While the objects are not
+		 * yet freed (due to RCU then a workqueue) we still want
+		 * to be able to shrink their pages, so they remain on
+		 * the unbound/bound list until actually freed.
+		 */
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		while (count < target &&
+		       (obj = list_first_entry_or_null(phase->list,
+						       typeof(*obj),
+						       mm.link))) {
+			list_move_tail(&obj->mm.link, &still_in_list);
+
+			if (shrink & I915_SHRINK_VMAPS &&
+			    !is_vmalloc_addr(obj->mm.mapping))
+				continue;
+
+			if (!(shrink & I915_SHRINK_ACTIVE) &&
+			    (i915_gem_object_is_active(obj) ||
+			     i915_gem_object_is_framebuffer(obj)))
+				continue;
+
+			if (!(shrink & I915_SHRINK_BOUND) &&
+			    atomic_read(&obj->bind_count))
+				continue;
+
+			if (!can_release_pages(obj))
+				continue;
+
+			if (!kref_get_unless_zero(&obj->base.refcount))
+				continue;
+
+			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+
+			if (unsafe_drop_pages(obj)) {
+				/* May arrive from get_pages on another bo */
+				mutex_lock_nested(&obj->mm.lock,
+						  I915_MM_SHRINKER);
+				if (!i915_gem_object_has_pages(obj)) {
+					try_to_writeback(obj, shrink);
+					count += obj->base.size >> PAGE_SHIFT;
+				}
+				mutex_unlock(&obj->mm.lock);
+			}
+
+			scanned += obj->base.size >> PAGE_SHIFT;
+			i915_gem_object_put(obj);
+
+			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		}
+		list_splice_tail(&still_in_list, phase->list);
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+
+	if (shrink & I915_SHRINK_BOUND)
+		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+
+	i915_retire_requests(i915);
+
+	shrinker_unlock(i915, unlock);
+
+	if (nr_scanned)
+		*nr_scanned += scanned;
+	return count;
+}
+
+/**
+ * i915_gem_shrink_all - Shrink buffer object caches completely
+ * @i915: i915 device
+ *
+ * This is a simple wraper around i915_gem_shrink() to aggressively shrink all
+ * caches completely. It also first waits for and retires all outstanding
+ * requests to also be able to release backing storage for active objects.
+ *
+ * This should only be used in code to intentionally quiescent the gpu or as a
+ * last-ditch effort when memory seems to have run out.
+ *
+ * Returns:
+ * The number of pages of backing storage actually released.
+ */
+unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
+{
+	intel_wakeref_t wakeref;
+	unsigned long freed = 0;
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+		freed = i915_gem_shrink(i915, -1UL, NULL,
+					I915_SHRINK_BOUND |
+					I915_SHRINK_UNBOUND |
+					I915_SHRINK_ACTIVE);
+	}
+
+	return freed;
+}
+
+static unsigned long
+i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct drm_i915_private *i915 =
+		container_of(shrinker, struct drm_i915_private, mm.shrinker);
+	unsigned long num_objects;
+	unsigned long count;
+
+	count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT;
+	num_objects = READ_ONCE(i915->mm.shrink_count);
+
+	/*
+	 * Update our preferred vmscan batch size for the next pass.
+	 * Our rough guess for an effective batch size is roughly 2
+	 * available GEM objects worth of pages. That is we don't want
+	 * the shrinker to fire, until it is worth the cost of freeing an
+	 * entire GEM object.
+	 */
+	if (num_objects) {
+		unsigned long avg = 2 * count / num_objects;
+
+		i915->mm.shrinker.batch =
+			max((i915->mm.shrinker.batch + avg) >> 1,
+			    128ul /* default SHRINK_BATCH */);
+	}
+
+	return count;
+}
+
+static unsigned long
+i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct drm_i915_private *i915 =
+		container_of(shrinker, struct drm_i915_private, mm.shrinker);
+	unsigned long freed;
+	bool unlock;
+
+	sc->nr_scanned = 0;
+
+	if (!shrinker_lock(i915, 0, &unlock))
+		return SHRINK_STOP;
+
+	freed = i915_gem_shrink(i915,
+				sc->nr_to_scan,
+				&sc->nr_scanned,
+				I915_SHRINK_BOUND |
+				I915_SHRINK_UNBOUND |
+				I915_SHRINK_WRITEBACK);
+	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
+		intel_wakeref_t wakeref;
+
+		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+			freed += i915_gem_shrink(i915,
+						 sc->nr_to_scan - sc->nr_scanned,
+						 &sc->nr_scanned,
+						 I915_SHRINK_ACTIVE |
+						 I915_SHRINK_BOUND |
+						 I915_SHRINK_UNBOUND |
+						 I915_SHRINK_WRITEBACK);
+		}
+	}
+
+	shrinker_unlock(i915, unlock);
+
+	return sc->nr_scanned ? freed : SHRINK_STOP;
+}
+
+static int
+i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct drm_i915_private *i915 =
+		container_of(nb, struct drm_i915_private, mm.oom_notifier);
+	struct drm_i915_gem_object *obj;
+	unsigned long unevictable, available, freed_pages;
+	intel_wakeref_t wakeref;
+	unsigned long flags;
+
+	freed_pages = 0;
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
+					       I915_SHRINK_BOUND |
+					       I915_SHRINK_UNBOUND |
+					       I915_SHRINK_WRITEBACK);
+
+	/* Because we may be allocating inside our own driver, we cannot
+	 * assert that there are no objects with pinned pages that are not
+	 * being pointed to by hardware.
+	 */
+	available = unevictable = 0;
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
+		if (!can_release_pages(obj))
+			unevictable += obj->base.size >> PAGE_SHIFT;
+		else
+			available += obj->base.size >> PAGE_SHIFT;
+	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+
+	if (freed_pages || available)
+		pr_info("Purging GPU memory, %lu pages freed, "
+			"%lu pages still pinned, %lu pages left available.\n",
+			freed_pages, unevictable, available);
+
+	*(unsigned long *)ptr += freed_pages;
+	return NOTIFY_DONE;
+}
+
+static int
+i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct drm_i915_private *i915 =
+		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
+	struct i915_vma *vma, *next;
+	unsigned long freed_pages = 0;
+	intel_wakeref_t wakeref;
+	bool unlock;
+
+	if (!shrinker_lock(i915, 0, &unlock))
+		return NOTIFY_DONE;
+
+	/* Force everything onto the inactive lists */
+	if (i915_gem_wait_for_idle(i915,
+				   I915_WAIT_LOCKED,
+				   MAX_SCHEDULE_TIMEOUT))
+		goto out;
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
+					       I915_SHRINK_BOUND |
+					       I915_SHRINK_UNBOUND |
+					       I915_SHRINK_VMAPS);
+
+	/* We also want to clear any cached iomaps as they wrap vmap */
+	mutex_lock(&i915->ggtt.vm.mutex);
+	list_for_each_entry_safe(vma, next,
+				 &i915->ggtt.vm.bound_list, vm_link) {
+		unsigned long count = vma->node.size >> PAGE_SHIFT;
+
+		if (!vma->iomap || i915_vma_is_active(vma))
+			continue;
+
+		mutex_unlock(&i915->ggtt.vm.mutex);
+		if (i915_vma_unbind(vma) == 0)
+			freed_pages += count;
+		mutex_lock(&i915->ggtt.vm.mutex);
+	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
+
+out:
+	shrinker_unlock(i915, unlock);
+
+	*(unsigned long *)ptr += freed_pages;
+	return NOTIFY_DONE;
+}
+
+/**
+ * i915_gem_shrinker_register - Register the i915 shrinker
+ * @i915: i915 device
+ *
+ * This function registers and sets up the i915 shrinker and OOM handler.
+ */
+void i915_gem_shrinker_register(struct drm_i915_private *i915)
+{
+	i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
+	i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
+	i915->mm.shrinker.seeks = DEFAULT_SEEKS;
+	i915->mm.shrinker.batch = 4096;
+	WARN_ON(register_shrinker(&i915->mm.shrinker));
+
+	i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
+	WARN_ON(register_oom_notifier(&i915->mm.oom_notifier));
+
+	i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
+	WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier));
+}
+
+/**
+ * i915_gem_shrinker_unregister - Unregisters the i915 shrinker
+ * @i915: i915 device
+ *
+ * This function unregisters the i915 shrinker and OOM handler.
+ */
+void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
+{
+	WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
+	WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier));
+	unregister_shrinker(&i915->mm.shrinker);
+}
+
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+				    struct mutex *mutex)
+{
+	bool unlock = false;
+
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	if (!lockdep_is_held_type(&i915->drm.struct_mutex, -1)) {
+		mutex_acquire(&i915->drm.struct_mutex.dep_map,
+			      I915_MM_NORMAL, 0, _RET_IP_);
+		unlock = true;
+	}
+
+	fs_reclaim_acquire(GFP_KERNEL);
+
+	/*
+	 * As we invariably rely on the struct_mutex within the shrinker,
+	 * but have a complicated recursion dance, taint all the mutexes used
+	 * within the shrinker with the struct_mutex. For completeness, we
+	 * taint with all subclass of struct_mutex, even though we should
+	 * only need tainting by I915_MM_NORMAL to catch possible ABBA
+	 * deadlocks from using struct_mutex inside @mutex.
+	 */
+	mutex_acquire(&i915->drm.struct_mutex.dep_map,
+		      I915_MM_SHRINKER, 0, _RET_IP_);
+
+	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
+	mutex_release(&mutex->dep_map, 0, _RET_IP_);
+
+	mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+
+	fs_reclaim_release(GFP_KERNEL);
+
+	if (unlock)
+		mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
new file mode 100644
index 000000000000..de1fab2058ec
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -0,0 +1,702 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2012 Intel Corporation
+ */
+
+#include <linux/errno.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_mm.h>
+#include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+
+/*
+ * The BIOS typically reserves some of the system's memory for the exclusive
+ * use of the integrated graphics. This memory is no longer available for
+ * use by the OS and so the user finds that his system has less memory
+ * available than he put in. We refer to this memory as stolen.
+ *
+ * The BIOS will allocate its framebuffer from the stolen memory. Our
+ * goal is try to reuse that object for our own fbcon which must always
+ * be available for panics. Anything else we can reuse the stolen memory
+ * for is a boon.
+ */
+
+int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
+					 struct drm_mm_node *node, u64 size,
+					 unsigned alignment, u64 start, u64 end)
+{
+	int ret;
+
+	if (!drm_mm_initialized(&dev_priv->mm.stolen))
+		return -ENODEV;
+
+	/* WaSkipStolenMemoryFirstPage:bdw+ */
+	if (INTEL_GEN(dev_priv) >= 8 && start < 4096)
+		start = 4096;
+
+	mutex_lock(&dev_priv->mm.stolen_lock);
+	ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node,
+					  size, alignment, 0,
+					  start, end, DRM_MM_INSERT_BEST);
+	mutex_unlock(&dev_priv->mm.stolen_lock);
+
+	return ret;
+}
+
+int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
+				struct drm_mm_node *node, u64 size,
+				unsigned alignment)
+{
+	return i915_gem_stolen_insert_node_in_range(dev_priv, node, size,
+						    alignment, 0, U64_MAX);
+}
+
+void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
+				 struct drm_mm_node *node)
+{
+	mutex_lock(&dev_priv->mm.stolen_lock);
+	drm_mm_remove_node(node);
+	mutex_unlock(&dev_priv->mm.stolen_lock);
+}
+
+static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
+			      struct resource *dsm)
+{
+	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct resource *r;
+
+	if (dsm->start == 0 || dsm->end <= dsm->start)
+		return -EINVAL;
+
+	/*
+	 * TODO: We have yet too encounter the case where the GTT wasn't at the
+	 * end of stolen. With that assumption we could simplify this.
+	 */
+
+	/* Make sure we don't clobber the GTT if it's within stolen memory */
+	if (INTEL_GEN(dev_priv) <= 4 &&
+	    !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) {
+		struct resource stolen[2] = {*dsm, *dsm};
+		struct resource ggtt_res;
+		resource_size_t ggtt_start;
+
+		ggtt_start = I915_READ(PGTBL_CTL);
+		if (IS_GEN(dev_priv, 4))
+			ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) |
+				     (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28;
+		else
+			ggtt_start &= PGTBL_ADDRESS_LO_MASK;
+
+		ggtt_res =
+			(struct resource) DEFINE_RES_MEM(ggtt_start,
+							 ggtt_total_entries(ggtt) * 4);
+
+		if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end)
+			stolen[0].end = ggtt_res.start;
+		if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end)
+			stolen[1].start = ggtt_res.end;
+
+		/* Pick the larger of the two chunks */
+		if (resource_size(&stolen[0]) > resource_size(&stolen[1]))
+			*dsm = stolen[0];
+		else
+			*dsm = stolen[1];
+
+		if (stolen[0].start != stolen[1].start ||
+		    stolen[0].end != stolen[1].end) {
+			DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res);
+			DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm);
+		}
+	}
+
+	/*
+	 * Verify that nothing else uses this physical address. Stolen
+	 * memory should be reserved by the BIOS and hidden from the
+	 * kernel. So if the region is already marked as busy, something
+	 * is seriously wrong.
+	 */
+	r = devm_request_mem_region(dev_priv->drm.dev, dsm->start,
+				    resource_size(dsm),
+				    "Graphics Stolen Memory");
+	if (r == NULL) {
+		/*
+		 * One more attempt but this time requesting region from
+		 * start + 1, as we have seen that this resolves the region
+		 * conflict with the PCI Bus.
+		 * This is a BIOS w/a: Some BIOS wrap stolen in the root
+		 * PCI bus, but have an off-by-one error. Hence retry the
+		 * reservation starting from 1 instead of 0.
+		 * There's also BIOS with off-by-one on the other end.
+		 */
+		r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1,
+					    resource_size(dsm) - 2,
+					    "Graphics Stolen Memory");
+		/*
+		 * GEN3 firmware likes to smash pci bridges into the stolen
+		 * range. Apparently this works.
+		 */
+		if (r == NULL && !IS_GEN(dev_priv, 3)) {
+			DRM_ERROR("conflict detected with stolen region: %pR\n",
+				  dsm);
+
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv)
+{
+	if (!drm_mm_initialized(&dev_priv->mm.stolen))
+		return;
+
+	drm_mm_takedown(&dev_priv->mm.stolen);
+}
+
+static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = I915_READ(IS_GM45(dev_priv) ?
+				CTG_STOLEN_RESERVED :
+				ELK_STOLEN_RESERVED);
+	resource_size_t stolen_top = dev_priv->dsm.end + 1;
+
+	DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n",
+			 IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val);
+
+	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0)
+		return;
+
+	/*
+	 * Whether ILK really reuses the ELK register for this is unclear.
+	 * Let's see if we catch anyone with this supposedly enabled on ILK.
+	 */
+	WARN(IS_GEN(dev_priv, 5), "ILK stolen reserved found? 0x%08x\n",
+	     reg_val);
+
+	if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK))
+		return;
+
+	*base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16;
+	WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base);
+
+	*size = stolen_top - *base;
+}
+
+static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				     resource_size_t *base,
+				     resource_size_t *size)
+{
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
+
+	switch (reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK) {
+	case GEN6_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	case GEN6_STOLEN_RESERVED_512K:
+		*size = 512 * 1024;
+		break;
+	case GEN6_STOLEN_RESERVED_256K:
+		*size = 256 * 1024;
+		break;
+	case GEN6_STOLEN_RESERVED_128K:
+		*size = 128 * 1024;
+		break;
+	default:
+		*size = 1024 * 1024;
+		MISSING_CASE(reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK);
+	}
+}
+
+static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = dev_priv->dsm.end + 1;
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) {
+	default:
+		MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK);
+		/* fall through */
+	case GEN7_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	}
+
+	/*
+	 * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the
+	 * reserved location as (top - size).
+	 */
+	*base = stolen_top - *size;
+}
+
+static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				     resource_size_t *base,
+				     resource_size_t *size)
+{
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	*base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK;
+
+	switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) {
+	case GEN7_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	case GEN7_STOLEN_RESERVED_256K:
+		*size = 256 * 1024;
+		break;
+	default:
+		*size = 1024 * 1024;
+		MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK);
+	}
+}
+
+static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
+
+	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
+	case GEN8_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_2M:
+		*size = 2 * 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_4M:
+		*size = 4 * 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_8M:
+		*size = 8 * 1024 * 1024;
+		break;
+	default:
+		*size = 8 * 1024 * 1024;
+		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
+	}
+}
+
+static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = dev_priv->dsm.end + 1;
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK))
+		return;
+
+	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
+	*size = stolen_top - *base;
+}
+
+static void icl_get_stolen_reserved(struct drm_i915_private *i915,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u64 reg_val = intel_uncore_read64(&i915->uncore, GEN6_STOLEN_RESERVED);
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
+
+	*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
+
+	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
+	case GEN8_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_2M:
+		*size = 2 * 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_4M:
+		*size = 4 * 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_8M:
+		*size = 8 * 1024 * 1024;
+		break;
+	default:
+		*size = 8 * 1024 * 1024;
+		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
+	}
+}
+
+int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
+{
+	resource_size_t reserved_base, stolen_top;
+	resource_size_t reserved_total, reserved_size;
+
+	mutex_init(&dev_priv->mm.stolen_lock);
+
+	if (intel_vgpu_active(dev_priv)) {
+		DRM_INFO("iGVT-g active, disabling use of stolen memory\n");
+		return 0;
+	}
+
+	if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) {
+		DRM_INFO("DMAR active, disabling use of stolen memory\n");
+		return 0;
+	}
+
+	if (resource_size(&intel_graphics_stolen_res) == 0)
+		return 0;
+
+	dev_priv->dsm = intel_graphics_stolen_res;
+
+	if (i915_adjust_stolen(dev_priv, &dev_priv->dsm))
+		return 0;
+
+	GEM_BUG_ON(dev_priv->dsm.start == 0);
+	GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start);
+
+	stolen_top = dev_priv->dsm.end + 1;
+	reserved_base = stolen_top;
+	reserved_size = 0;
+
+	switch (INTEL_GEN(dev_priv)) {
+	case 2:
+	case 3:
+		break;
+	case 4:
+		if (!IS_G4X(dev_priv))
+			break;
+		/* fall through */
+	case 5:
+		g4x_get_stolen_reserved(dev_priv,
+					&reserved_base, &reserved_size);
+		break;
+	case 6:
+		gen6_get_stolen_reserved(dev_priv,
+					 &reserved_base, &reserved_size);
+		break;
+	case 7:
+		if (IS_VALLEYVIEW(dev_priv))
+			vlv_get_stolen_reserved(dev_priv,
+						&reserved_base, &reserved_size);
+		else
+			gen7_get_stolen_reserved(dev_priv,
+						 &reserved_base, &reserved_size);
+		break;
+	case 8:
+	case 9:
+	case 10:
+		if (IS_LP(dev_priv))
+			chv_get_stolen_reserved(dev_priv,
+						&reserved_base, &reserved_size);
+		else
+			bdw_get_stolen_reserved(dev_priv,
+						&reserved_base, &reserved_size);
+		break;
+	case 11:
+	default:
+		icl_get_stolen_reserved(dev_priv, &reserved_base,
+					&reserved_size);
+		break;
+	}
+
+	/*
+	 * Our expectation is that the reserved space is at the top of the
+	 * stolen region and *never* at the bottom. If we see !reserved_base,
+	 * it likely means we failed to read the registers correctly.
+	 */
+	if (!reserved_base) {
+		DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n",
+			  &reserved_base, &reserved_size);
+		reserved_base = stolen_top;
+		reserved_size = 0;
+	}
+
+	dev_priv->dsm_reserved =
+		(struct resource) DEFINE_RES_MEM(reserved_base, reserved_size);
+
+	if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) {
+		DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n",
+			  &dev_priv->dsm_reserved, &dev_priv->dsm);
+		return 0;
+	}
+
+	/* It is possible for the reserved area to end before the end of stolen
+	 * memory, so just consider the start. */
+	reserved_total = stolen_top - reserved_base;
+
+	DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n",
+			 (u64)resource_size(&dev_priv->dsm) >> 10,
+			 ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10);
+
+	dev_priv->stolen_usable_size =
+		resource_size(&dev_priv->dsm) - reserved_total;
+
+	/* Basic memrange allocator for stolen space. */
+	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size);
+
+	return 0;
+}
+
+static struct sg_table *
+i915_pages_create_for_stolen(struct drm_device *dev,
+			     resource_size_t offset, resource_size_t size)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct sg_table *st;
+	struct scatterlist *sg;
+
+	GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm)));
+
+	/* We hide that we have no struct page backing our stolen object
+	 * by wrapping the contiguous physical allocation with a fake
+	 * dma mapping in a single scatterlist.
+	 */
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = size;
+
+	sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset;
+	sg_dma_len(sg) = size;
+
+	return st;
+}
+
+static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
+{
+	struct sg_table *pages =
+		i915_pages_create_for_stolen(obj->base.dev,
+					     obj->stolen->start,
+					     obj->stolen->size);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	__i915_gem_object_set_pages(obj, pages, obj->stolen->size);
+
+	return 0;
+}
+
+static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
+					     struct sg_table *pages)
+{
+	/* Should only be called from i915_gem_object_release_stolen() */
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static void
+i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen);
+
+	GEM_BUG_ON(!stolen);
+
+	__i915_gem_object_unpin_pages(obj);
+
+	i915_gem_stolen_remove_node(dev_priv, stolen);
+	kfree(stolen);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
+	.get_pages = i915_gem_object_get_pages_stolen,
+	.put_pages = i915_gem_object_put_pages_stolen,
+	.release = i915_gem_object_release_stolen,
+};
+
+static struct drm_i915_gem_object *
+_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+			       struct drm_mm_node *stolen)
+{
+	struct drm_i915_gem_object *obj;
+	unsigned int cache_level;
+
+	obj = i915_gem_object_alloc();
+	if (obj == NULL)
+		return NULL;
+
+	drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size);
+	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
+
+	obj->stolen = stolen;
+	obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
+	cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	if (i915_gem_object_pin_pages(obj))
+		goto cleanup;
+
+	return obj;
+
+cleanup:
+	i915_gem_object_free(obj);
+	return NULL;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+			      resource_size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node *stolen;
+	int ret;
+
+	if (!drm_mm_initialized(&dev_priv->mm.stolen))
+		return NULL;
+
+	if (size == 0)
+		return NULL;
+
+	stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
+	if (!stolen)
+		return NULL;
+
+	ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096);
+	if (ret) {
+		kfree(stolen);
+		return NULL;
+	}
+
+	obj = _i915_gem_object_create_stolen(dev_priv, stolen);
+	if (obj)
+		return obj;
+
+	i915_gem_stolen_remove_node(dev_priv, stolen);
+	kfree(stolen);
+	return NULL;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
+					       resource_size_t stolen_offset,
+					       resource_size_t gtt_offset,
+					       resource_size_t size)
+{
+	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node *stolen;
+	struct i915_vma *vma;
+	int ret;
+
+	if (!drm_mm_initialized(&dev_priv->mm.stolen))
+		return NULL;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
+			 &stolen_offset, &gtt_offset, &size);
+
+	/* KISS and expect everything to be page-aligned */
+	if (WARN_ON(size == 0) ||
+	    WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) ||
+	    WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT)))
+		return NULL;
+
+	stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
+	if (!stolen)
+		return NULL;
+
+	stolen->start = stolen_offset;
+	stolen->size = size;
+	mutex_lock(&dev_priv->mm.stolen_lock);
+	ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen);
+	mutex_unlock(&dev_priv->mm.stolen_lock);
+	if (ret) {
+		DRM_DEBUG_DRIVER("failed to allocate stolen space\n");
+		kfree(stolen);
+		return NULL;
+	}
+
+	obj = _i915_gem_object_create_stolen(dev_priv, stolen);
+	if (obj == NULL) {
+		DRM_DEBUG_DRIVER("failed to allocate stolen object\n");
+		i915_gem_stolen_remove_node(dev_priv, stolen);
+		kfree(stolen);
+		return NULL;
+	}
+
+	/* Some objects just need physical mem from stolen space */
+	if (gtt_offset == I915_GTT_OFFSET_NONE)
+		return obj;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
+	vma = i915_vma_instance(obj, &ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_pages;
+	}
+
+	/* To simplify the initialisation sequence between KMS and GTT,
+	 * we allow construction of the stolen object prior to
+	 * setting up the GTT space. The actual reservation will occur
+	 * later.
+	 */
+	ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
+				   size, gtt_offset, obj->cache_level,
+				   0);
+	if (ret) {
+		DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n");
+		goto err_pages;
+	}
+
+	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+
+	vma->pages = obj->mm.pages;
+	vma->flags |= I915_VMA_GLOBAL_BIND;
+	__i915_vma_set_map_and_fenceable(vma);
+
+	mutex_lock(&ggtt->vm.mutex);
+	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
+	mutex_unlock(&ggtt->vm.mutex);
+
+	GEM_BUG_ON(i915_gem_object_is_shrinkable(obj));
+	atomic_inc(&obj->bind_count);
+
+	return obj;
+
+err_pages:
+	i915_gem_object_unpin_pages(obj);
+err:
+	i915_gem_object_put(obj);
+	return NULL;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
new file mode 100644
index 000000000000..adb3074d9ce2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -0,0 +1,73 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/jiffies.h>
+
+#include <drm/drm_file.h>
+
+#include "i915_drv.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+/*
+ * 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+
+/*
+ * Throttle our rendering by waiting until the ring has completed our requests
+ * emitted over 20 msec ago.
+ *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
+ * This should get us reasonable parallelism between CPU and GPU but also
+ * relatively low latency when blocking on a particular request to finish.
+ */
+int
+i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
+	struct i915_request *request, *target = NULL;
+	long ret;
+
+	/* ABI: return -EIO if already wedged */
+	ret = i915_terminally_wedged(to_i915(dev));
+	if (ret)
+		return ret;
+
+	spin_lock(&file_priv->mm.lock);
+	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
+		if (time_after_eq(request->emitted_jiffies, recent_enough))
+			break;
+
+		if (target) {
+			list_del(&target->client_link);
+			target->file_priv = NULL;
+		}
+
+		target = request;
+	}
+	if (target)
+		i915_request_get(target);
+	spin_unlock(&file_priv->mm.lock);
+
+	if (!target)
+		return 0;
+
+	ret = i915_request_wait(target,
+				I915_WAIT_INTERRUPTIBLE,
+				MAX_SCHEDULE_TIMEOUT);
+	i915_request_put(target);
+
+	return ret < 0 ? ret : 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
new file mode 100644
index 000000000000..ca0c2f451742
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -0,0 +1,440 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008 Intel Corporation
+ */
+
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+#include "i915_gem.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+/**
+ * DOC: buffer object tiling
+ *
+ * i915_gem_set_tiling_ioctl() and i915_gem_get_tiling_ioctl() is the userspace
+ * interface to declare fence register requirements.
+ *
+ * In principle GEM doesn't care at all about the internal data layout of an
+ * object, and hence it also doesn't care about tiling or swizzling. There's two
+ * exceptions:
+ *
+ * - For X and Y tiling the hardware provides detilers for CPU access, so called
+ *   fences. Since there's only a limited amount of them the kernel must manage
+ *   these, and therefore userspace must tell the kernel the object tiling if it
+ *   wants to use fences for detiling.
+ * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which
+ *   depends upon the physical page frame number. When swapping such objects the
+ *   page frame number might change and the kernel must be able to fix this up
+ *   and hence now the tiling. Note that on a subset of platforms with
+ *   asymmetric memory channel population the swizzling pattern changes in an
+ *   unknown way, and for those the kernel simply forbids swapping completely.
+ *
+ * Since neither of this applies for new tiling layouts on modern platforms like
+ * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled.
+ * Anything else can be handled in userspace entirely without the kernel's
+ * invovlement.
+ */
+
+/**
+ * i915_gem_fence_size - required global GTT size for a fence
+ * @i915: i915 device
+ * @size: object size
+ * @tiling: tiling mode
+ * @stride: tiling stride
+ *
+ * Return the required global GTT size for a fence (view of a tiled object),
+ * taking into account potential fence register mapping.
+ */
+u32 i915_gem_fence_size(struct drm_i915_private *i915,
+			u32 size, unsigned int tiling, unsigned int stride)
+{
+	u32 ggtt_size;
+
+	GEM_BUG_ON(!size);
+
+	if (tiling == I915_TILING_NONE)
+		return size;
+
+	GEM_BUG_ON(!stride);
+
+	if (INTEL_GEN(i915) >= 4) {
+		stride *= i915_gem_tile_height(tiling);
+		GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE));
+		return roundup(size, stride);
+	}
+
+	/* Previous chips need a power-of-two fence region when tiling */
+	if (IS_GEN(i915, 3))
+		ggtt_size = 1024*1024;
+	else
+		ggtt_size = 512*1024;
+
+	while (ggtt_size < size)
+		ggtt_size <<= 1;
+
+	return ggtt_size;
+}
+
+/**
+ * i915_gem_fence_alignment - required global GTT alignment for a fence
+ * @i915: i915 device
+ * @size: object size
+ * @tiling: tiling mode
+ * @stride: tiling stride
+ *
+ * Return the required global GTT alignment for a fence (a view of a tiled
+ * object), taking into account potential fence register mapping.
+ */
+u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size,
+			     unsigned int tiling, unsigned int stride)
+{
+	GEM_BUG_ON(!size);
+
+	/*
+	 * Minimum alignment is 4k (GTT page size), but might be greater
+	 * if a fence register is needed for the object.
+	 */
+	if (tiling == I915_TILING_NONE)
+		return I915_GTT_MIN_ALIGNMENT;
+
+	if (INTEL_GEN(i915) >= 4)
+		return I965_FENCE_PAGE;
+
+	/*
+	 * Previous chips need to be aligned to the size of the smallest
+	 * fence register that can contain the object.
+	 */
+	return i915_gem_fence_size(i915, size, tiling, stride);
+}
+
+/* Check pitch constriants for all chips & tiling formats */
+static bool
+i915_tiling_ok(struct drm_i915_gem_object *obj,
+	       unsigned int tiling, unsigned int stride)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned int tile_width;
+
+	/* Linear is always fine */
+	if (tiling == I915_TILING_NONE)
+		return true;
+
+	if (tiling > I915_TILING_LAST)
+		return false;
+
+	/* check maximum stride & object size */
+	/* i965+ stores the end address of the gtt mapping in the fence
+	 * reg, so dont bother to check the size */
+	if (INTEL_GEN(i915) >= 7) {
+		if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL)
+			return false;
+	} else if (INTEL_GEN(i915) >= 4) {
+		if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
+			return false;
+	} else {
+		if (stride > 8192)
+			return false;
+
+		if (!is_power_of_2(stride))
+			return false;
+	}
+
+	if (IS_GEN(i915, 2) ||
+	    (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915)))
+		tile_width = 128;
+	else
+		tile_width = 512;
+
+	if (!stride || !IS_ALIGNED(stride, tile_width))
+		return false;
+
+	return true;
+}
+
+static bool i915_vma_fence_prepare(struct i915_vma *vma,
+				   int tiling_mode, unsigned int stride)
+{
+	struct drm_i915_private *i915 = vma->vm->i915;
+	u32 size, alignment;
+
+	if (!i915_vma_is_map_and_fenceable(vma))
+		return true;
+
+	size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride);
+	if (vma->node.size < size)
+		return false;
+
+	alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride);
+	if (!IS_ALIGNED(vma->node.start, alignment))
+		return false;
+
+	return true;
+}
+
+/* Make the current GTT allocation valid for the change in tiling. */
+static int
+i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
+			      int tiling_mode, unsigned int stride)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	if (tiling_mode == I915_TILING_NONE)
+		return 0;
+
+	for_each_ggtt_vma(vma, obj) {
+		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
+			continue;
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int
+i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+			   unsigned int tiling, unsigned int stride)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+	int err;
+
+	/* Make sure we don't cross-contaminate obj->tiling_and_stride */
+	BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK);
+
+	GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
+	GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	if ((tiling | stride) == obj->tiling_and_stride)
+		return 0;
+
+	if (i915_gem_object_is_framebuffer(obj))
+		return -EBUSY;
+
+	/* We need to rebind the object if its current allocation
+	 * no longer meets the alignment restrictions for its new
+	 * tiling mode. Otherwise we can just leave it alone, but
+	 * need to ensure that any fence register is updated before
+	 * the next fenced (either through the GTT or by the BLT unit
+	 * on older GPUs) access.
+	 *
+	 * After updating the tiling parameters, we then flag whether
+	 * we need to update an associated fence register. Note this
+	 * has to also include the unfenced register the GPU uses
+	 * whilst executing a fenced command for an untiled object.
+	 */
+
+	err = i915_gem_object_fence_prepare(obj, tiling, stride);
+	if (err)
+		return err;
+
+	i915_gem_object_lock(obj);
+	if (i915_gem_object_is_framebuffer(obj)) {
+		i915_gem_object_unlock(obj);
+		return -EBUSY;
+	}
+
+	/* If the memory has unknown (i.e. varying) swizzling, we pin the
+	 * pages to prevent them being swapped out and causing corruption
+	 * due to the change in swizzling.
+	 */
+	mutex_lock(&obj->mm.lock);
+	if (i915_gem_object_has_pages(obj) &&
+	    obj->mm.madv == I915_MADV_WILLNEED &&
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+		if (tiling == I915_TILING_NONE) {
+			GEM_BUG_ON(!obj->mm.quirked);
+			__i915_gem_object_unpin_pages(obj);
+			obj->mm.quirked = false;
+		}
+		if (!i915_gem_object_is_tiled(obj)) {
+			GEM_BUG_ON(obj->mm.quirked);
+			__i915_gem_object_pin_pages(obj);
+			obj->mm.quirked = true;
+		}
+	}
+	mutex_unlock(&obj->mm.lock);
+
+	for_each_ggtt_vma(vma, obj) {
+		vma->fence_size =
+			i915_gem_fence_size(i915, vma->size, tiling, stride);
+		vma->fence_alignment =
+			i915_gem_fence_alignment(i915,
+						 vma->size, tiling, stride);
+
+		if (vma->fence)
+			vma->fence->dirty = true;
+	}
+
+	obj->tiling_and_stride = tiling | stride;
+	i915_gem_object_unlock(obj);
+
+	/* Force the fence to be reacquired for GTT access */
+	i915_gem_object_release_mmap(obj);
+
+	/* Try to preallocate memory required to save swizzling on put-pages */
+	if (i915_gem_object_needs_bit17_swizzle(obj)) {
+		if (!obj->bit_17) {
+			obj->bit_17 = bitmap_zalloc(obj->base.size >> PAGE_SHIFT,
+						    GFP_KERNEL);
+		}
+	} else {
+		bitmap_free(obj->bit_17);
+		obj->bit_17 = NULL;
+	}
+
+	return 0;
+}
+
+/**
+ * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode
+ * @dev: DRM device
+ * @data: data pointer for the ioctl
+ * @file: DRM file for the ioctl call
+ *
+ * Sets the tiling mode of an object, returning the required swizzling of
+ * bit 6 of addresses in the object.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int
+i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_set_tiling *args = data;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * The tiling mode of proxy objects is handled by its generator, and
+	 * not allowed to be changed by userspace.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		err = -ENXIO;
+		goto err;
+	}
+
+	if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	if (args->tiling_mode == I915_TILING_NONE) {
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+		args->stride = 0;
+	} else {
+		if (args->tiling_mode == I915_TILING_X)
+			args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x;
+		else
+			args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y;
+
+		/* Hide bit 17 swizzling from the user.  This prevents old Mesa
+		 * from aborting the application on sw fallbacks to bit 17,
+		 * and we use the pread/pwrite bit17 paths to swizzle for it.
+		 * If there was a user that was relying on the swizzle
+		 * information for drm_intel_bo_map()ed reads/writes this would
+		 * break it, but we don't have any of those.
+		 */
+		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
+			args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
+		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
+			args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
+
+		/* If we can't handle the swizzling, make it untiled. */
+		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
+			args->tiling_mode = I915_TILING_NONE;
+			args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+			args->stride = 0;
+		}
+	}
+
+	err = mutex_lock_interruptible(&dev->struct_mutex);
+	if (err)
+		goto err;
+
+	err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
+	mutex_unlock(&dev->struct_mutex);
+
+	/* We have to maintain this existing ABI... */
+	args->stride = i915_gem_object_get_stride(obj);
+	args->tiling_mode = i915_gem_object_get_tiling(obj);
+
+err:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+/**
+ * i915_gem_get_tiling_ioctl - IOCTL handler to get tiling mode
+ * @dev: DRM device
+ * @data: data pointer for the ioctl
+ * @file: DRM file for the ioctl call
+ *
+ * Returns the current tiling mode and required bit 6 swizzling for the object.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int
+i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_get_tiling *args = data;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_object *obj;
+	int err = -ENOENT;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (obj) {
+		args->tiling_mode =
+			READ_ONCE(obj->tiling_and_stride) & TILING_MASK;
+		err = 0;
+	}
+	rcu_read_unlock();
+	if (unlikely(err))
+		return err;
+
+	switch (args->tiling_mode) {
+	case I915_TILING_X:
+		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
+		break;
+	case I915_TILING_Y:
+		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
+		break;
+	default:
+	case I915_TILING_NONE:
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+		break;
+	}
+
+	/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
+	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+		args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN;
+	else
+		args->phys_swizzle_mode = args->swizzle_mode;
+	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
+	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
new file mode 100644
index 000000000000..528b61678334
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -0,0 +1,833 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2012-2014 Intel Corporation
+ */
+
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+#include <linux/sched/mm.h>
+
+#include <drm/i915_drm.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+
+struct i915_mm_struct {
+	struct mm_struct *mm;
+	struct drm_i915_private *i915;
+	struct i915_mmu_notifier *mn;
+	struct hlist_node node;
+	struct kref kref;
+	struct work_struct work;
+};
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root_cached objects;
+	struct i915_mm_struct *mm;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mn;
+	struct drm_i915_gem_object *obj;
+	struct interval_tree_node it;
+};
+
+static void add_object(struct i915_mmu_object *mo)
+{
+	GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
+	interval_tree_insert(&mo->it, &mo->mn->objects);
+}
+
+static void del_object(struct i915_mmu_object *mo)
+{
+	if (RB_EMPTY_NODE(&mo->it.rb))
+		return;
+
+	interval_tree_remove(&mo->it, &mo->mn->objects);
+	RB_CLEAR_NODE(&mo->it.rb);
+}
+
+static void
+__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
+{
+	struct i915_mmu_object *mo = obj->userptr.mmu_object;
+
+	/*
+	 * During mm_invalidate_range we need to cancel any userptr that
+	 * overlaps the range being invalidated. Doing so requires the
+	 * struct_mutex, and that risks recursion. In order to cause
+	 * recursion, the user must alias the userptr address space with
+	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
+	 * to invalidate that mmaping, mm_invalidate_range is called with
+	 * the userptr address *and* the struct_mutex held.  To prevent that
+	 * we set a flag under the i915_mmu_notifier spinlock to indicate
+	 * whether this object is valid.
+	 */
+	if (!mo)
+		return;
+
+	spin_lock(&mo->mn->lock);
+	if (value)
+		add_object(mo);
+	else
+		del_object(mo);
+	spin_unlock(&mo->mn->lock);
+}
+
+static int
+userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+				  const struct mmu_notifier_range *range)
+{
+	struct i915_mmu_notifier *mn =
+		container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it;
+	struct mutex *unlock = NULL;
+	unsigned long end;
+	int ret = 0;
+
+	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
+		return 0;
+
+	/* interval ranges are inclusive, but invalidate range is exclusive */
+	end = range->end - 1;
+
+	spin_lock(&mn->lock);
+	it = interval_tree_iter_first(&mn->objects, range->start, end);
+	while (it) {
+		struct drm_i915_gem_object *obj;
+
+		if (!mmu_notifier_range_blockable(range)) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		/*
+		 * The mmu_object is released late when destroying the
+		 * GEM object so it is entirely possible to gain a
+		 * reference on an object in the process of being freed
+		 * since our serialisation is via the spinlock and not
+		 * the struct_mutex - and consequently use it after it
+		 * is freed and then double free it. To prevent that
+		 * use-after-free we only acquire a reference on the
+		 * object if it is not in the process of being destroyed.
+		 */
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		if (!kref_get_unless_zero(&obj->base.refcount)) {
+			it = interval_tree_iter_next(it, range->start, end);
+			continue;
+		}
+		spin_unlock(&mn->lock);
+
+		if (!unlock) {
+			unlock = &mn->mm->i915->drm.struct_mutex;
+
+			switch (mutex_trylock_recursive(unlock)) {
+			default:
+			case MUTEX_TRYLOCK_FAILED:
+				if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
+					i915_gem_object_put(obj);
+					return -EINTR;
+				}
+				/* fall through */
+			case MUTEX_TRYLOCK_SUCCESS:
+				break;
+
+			case MUTEX_TRYLOCK_RECURSIVE:
+				unlock = ERR_PTR(-EEXIST);
+				break;
+			}
+		}
+
+		ret = i915_gem_object_unbind(obj);
+		if (ret == 0)
+			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+		i915_gem_object_put(obj);
+		if (ret)
+			goto unlock;
+
+		spin_lock(&mn->lock);
+
+		/*
+		 * As we do not (yet) protect the mmu from concurrent insertion
+		 * over this range, there is no guarantee that this search will
+		 * terminate given a pathologic workload.
+		 */
+		it = interval_tree_iter_first(&mn->objects, range->start, end);
+	}
+	spin_unlock(&mn->lock);
+
+unlock:
+	if (!IS_ERR_OR_NULL(unlock))
+		mutex_unlock(unlock);
+
+	return ret;
+
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_create(struct i915_mm_struct *mm)
+{
+	struct i915_mmu_notifier *mn;
+
+	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mn->lock);
+	mn->mn.ops = &i915_gem_userptr_notifier;
+	mn->objects = RB_ROOT_CACHED;
+	mn->mm = mm;
+
+	return mn;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mo;
+
+	mo = fetch_and_zero(&obj->userptr.mmu_object);
+	if (!mo)
+		return;
+
+	spin_lock(&mo->mn->lock);
+	del_object(mo);
+	spin_unlock(&mo->mn->lock);
+	kfree(mo);
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_find(struct i915_mm_struct *mm)
+{
+	struct i915_mmu_notifier *mn;
+	int err = 0;
+
+	mn = mm->mn;
+	if (mn)
+		return mn;
+
+	mn = i915_mmu_notifier_create(mm);
+	if (IS_ERR(mn))
+		err = PTR_ERR(mn);
+
+	down_write(&mm->mm->mmap_sem);
+	mutex_lock(&mm->i915->mm_lock);
+	if (mm->mn == NULL && !err) {
+		/* Protected by mmap_sem (write-lock) */
+		err = __mmu_notifier_register(&mn->mn, mm->mm);
+		if (!err) {
+			/* Protected by mm_lock */
+			mm->mn = fetch_and_zero(&mn);
+		}
+	} else if (mm->mn) {
+		/*
+		 * Someone else raced and successfully installed the mmu
+		 * notifier, we can cancel our own errors.
+		 */
+		err = 0;
+	}
+	mutex_unlock(&mm->i915->mm_lock);
+	up_write(&mm->mm->mmap_sem);
+
+	if (mn && !IS_ERR(mn))
+		kfree(mn);
+
+	return err ? ERR_PTR(err) : mm->mn;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mn;
+	struct i915_mmu_object *mo;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	if (WARN_ON(obj->userptr.mm == NULL))
+		return -EINVAL;
+
+	mn = i915_mmu_notifier_find(obj->userptr.mm);
+	if (IS_ERR(mn))
+		return PTR_ERR(mn);
+
+	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
+	if (!mo)
+		return -ENOMEM;
+
+	mo->mn = mn;
+	mo->obj = obj;
+	mo->it.start = obj->userptr.ptr;
+	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
+	RB_CLEAR_NODE(&mo->it.rb);
+
+	obj->userptr.mmu_object = mo;
+	return 0;
+}
+
+static void
+i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
+		       struct mm_struct *mm)
+{
+	if (mn == NULL)
+		return;
+
+	mmu_notifier_unregister(&mn->mn, mm);
+	kfree(mn);
+}
+
+#else
+
+static void
+__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
+{
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+
+static void
+i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
+		       struct mm_struct *mm)
+{
+}
+
+#endif
+
+static struct i915_mm_struct *
+__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
+{
+	struct i915_mm_struct *mm;
+
+	/* Protected by dev_priv->mm_lock */
+	hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
+		if (mm->mm == real)
+			return mm;
+
+	return NULL;
+}
+
+static int
+i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_mm_struct *mm;
+	int ret = 0;
+
+	/* During release of the GEM object we hold the struct_mutex. This
+	 * precludes us from calling mmput() at that time as that may be
+	 * the last reference and so call exit_mmap(). exit_mmap() will
+	 * attempt to reap the vma, and if we were holding a GTT mmap
+	 * would then call drm_gem_vm_close() and attempt to reacquire
+	 * the struct mutex. So in order to avoid that recursion, we have
+	 * to defer releasing the mm reference until after we drop the
+	 * struct_mutex, i.e. we need to schedule a worker to do the clean
+	 * up.
+	 */
+	mutex_lock(&dev_priv->mm_lock);
+	mm = __i915_mm_struct_find(dev_priv, current->mm);
+	if (mm == NULL) {
+		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
+		if (mm == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		kref_init(&mm->kref);
+		mm->i915 = to_i915(obj->base.dev);
+
+		mm->mm = current->mm;
+		mmgrab(current->mm);
+
+		mm->mn = NULL;
+
+		/* Protected by dev_priv->mm_lock */
+		hash_add(dev_priv->mm_structs,
+			 &mm->node, (unsigned long)mm->mm);
+	} else
+		kref_get(&mm->kref);
+
+	obj->userptr.mm = mm;
+out:
+	mutex_unlock(&dev_priv->mm_lock);
+	return ret;
+}
+
+static void
+__i915_mm_struct_free__worker(struct work_struct *work)
+{
+	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
+	i915_mmu_notifier_free(mm->mn, mm->mm);
+	mmdrop(mm->mm);
+	kfree(mm);
+}
+
+static void
+__i915_mm_struct_free(struct kref *kref)
+{
+	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
+
+	/* Protected by dev_priv->mm_lock */
+	hash_del(&mm->node);
+	mutex_unlock(&mm->i915->mm_lock);
+
+	INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
+	queue_work(mm->i915->mm.userptr_wq, &mm->work);
+}
+
+static void
+i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
+{
+	if (obj->userptr.mm == NULL)
+		return;
+
+	kref_put_mutex(&obj->userptr.mm->kref,
+		       __i915_mm_struct_free,
+		       &to_i915(obj->base.dev)->mm_lock);
+	obj->userptr.mm = NULL;
+}
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+static struct sg_table *
+__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
+			       struct page **pvec, int num_pages)
+{
+	unsigned int max_segment = i915_sg_segment_size();
+	struct sg_table *st;
+	unsigned int sg_page_sizes;
+	int ret;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return ERR_PTR(-ENOMEM);
+
+alloc_table:
+	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
+					  0, num_pages << PAGE_SHIFT,
+					  max_segment,
+					  GFP_KERNEL);
+	if (ret) {
+		kfree(st);
+		return ERR_PTR(ret);
+	}
+
+	ret = i915_gem_gtt_prepare_pages(obj, st);
+	if (ret) {
+		sg_free_table(st);
+
+		if (max_segment > PAGE_SIZE) {
+			max_segment = PAGE_SIZE;
+			goto alloc_table;
+		}
+
+		kfree(st);
+		return ERR_PTR(ret);
+	}
+
+	sg_page_sizes = i915_sg_page_sizes(st->sgl);
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return st;
+}
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	const int npages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm->mm;
+		unsigned int flags = 0;
+
+		if (!i915_gem_object_is_readonly(obj))
+			flags |= FOLL_WRITE;
+
+		ret = -EFAULT;
+		if (mmget_not_zero(mm)) {
+			down_read(&mm->mmap_sem);
+			while (pinned < npages) {
+				ret = get_user_pages_remote
+					(work->task, mm,
+					 obj->userptr.ptr + pinned * PAGE_SIZE,
+					 npages - pinned,
+					 flags,
+					 pvec + pinned, NULL, NULL);
+				if (ret < 0)
+					break;
+
+				pinned += ret;
+			}
+			up_read(&mm->mmap_sem);
+			mmput(mm);
+		}
+	}
+
+	mutex_lock(&obj->mm.lock);
+	if (obj->userptr.work == &work->work) {
+		struct sg_table *pages = ERR_PTR(ret);
+
+		if (pinned == npages) {
+			pages = __i915_gem_userptr_alloc_pages(obj, pvec,
+							       npages);
+			if (!IS_ERR(pages)) {
+				pinned = 0;
+				pages = NULL;
+			}
+		}
+
+		obj->userptr.work = ERR_CAST(pages);
+		if (IS_ERR(pages))
+			__i915_gem_userptr_set_active(obj, false);
+	}
+	mutex_unlock(&obj->mm.lock);
+
+	release_pages(pvec, pinned);
+	kvfree(pvec);
+
+	i915_gem_object_put(obj);
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static struct sg_table *
+__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
+{
+	struct get_pages_work *work;
+
+	/* Spawn a worker so that we can acquire the
+	 * user pages without holding our mutex. Access
+	 * to the user pages requires mmap_sem, and we have
+	 * a strict lock ordering of mmap_sem, struct_mutex -
+	 * we already hold struct_mutex here and so cannot
+	 * call gup without encountering a lock inversion.
+	 *
+	 * Userspace will keep on repeating the operation
+	 * (thanks to EAGAIN) until either we hit the fast
+	 * path or the worker completes. If the worker is
+	 * cancelled or superseded, the task is still run
+	 * but the results ignored. (This leads to
+	 * complications that we may have a stray object
+	 * refcount that we need to be wary of when
+	 * checking for existing objects during creation.)
+	 * If the worker encounters an error, it reports
+	 * that error back to this function through
+	 * obj->userptr.work = ERR_PTR.
+	 */
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (work == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	obj->userptr.work = &work->work;
+
+	work->obj = i915_gem_object_get(obj);
+
+	work->task = current;
+	get_task_struct(work->task);
+
+	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+	queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
+
+	return ERR_PTR(-EAGAIN);
+}
+
+static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct mm_struct *mm = obj->userptr.mm->mm;
+	struct page **pvec;
+	struct sg_table *pages;
+	bool active;
+	int pinned;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	if (obj->userptr.work) {
+		/* active flag should still be held for the pending work */
+		if (IS_ERR(obj->userptr.work))
+			return PTR_ERR(obj->userptr.work);
+		else
+			return -EAGAIN;
+	}
+
+	pvec = NULL;
+	pinned = 0;
+
+	if (mm == current->mm) {
+		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
+				      GFP_KERNEL |
+				      __GFP_NORETRY |
+				      __GFP_NOWARN);
+		if (pvec) /* defer to worker if malloc fails */
+			pinned = __get_user_pages_fast(obj->userptr.ptr,
+						       num_pages,
+						       !i915_gem_object_is_readonly(obj),
+						       pvec);
+	}
+
+	active = false;
+	if (pinned < 0) {
+		pages = ERR_PTR(pinned);
+		pinned = 0;
+	} else if (pinned < num_pages) {
+		pages = __i915_gem_userptr_get_pages_schedule(obj);
+		active = pages == ERR_PTR(-EAGAIN);
+	} else {
+		pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
+		active = !IS_ERR(pages);
+	}
+	if (active)
+		__i915_gem_userptr_set_active(obj, true);
+
+	if (IS_ERR(pages))
+		release_pages(pvec, pinned);
+	kvfree(pvec);
+
+	return PTR_ERR_OR_ZERO(pages);
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
+			   struct sg_table *pages)
+{
+	struct sgt_iter sgt_iter;
+	struct page *page;
+
+	/* Cancel any inflight work and force them to restart their gup */
+	obj->userptr.work = NULL;
+	__i915_gem_userptr_set_active(obj, false);
+	if (!pages)
+		return;
+
+	__i915_gem_object_release_shmem(obj, pages, true);
+	i915_gem_gtt_finish_pages(obj, pages);
+
+	for_each_sgt_page(page, sgt_iter, pages) {
+		if (obj->mm.dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		put_page(page);
+	}
+	obj->mm.dirty = false;
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+	i915_gem_userptr_release__mm_struct(obj);
+}
+
+static int
+i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
+{
+	if (obj->userptr.mmu_object)
+		return 0;
+
+	return i915_gem_userptr_init__mmu_notifier(obj, 0);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE |
+		 I915_GEM_OBJECT_ASYNC_CANCEL,
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.dmabuf_export = i915_gem_userptr_dmabuf_export,
+	.release = i915_gem_userptr_release,
+};
+
+/*
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. It must be normal system memory, not a pointer into another map of IO
+ *    space (e.g. it must not be a GTT mmapping of another object).
+ * 3. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 4. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads and writes by the GPU may
+ *    incur the cost of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls. Another
+ * restriction is that we do not allow userptr surfaces to be pinned to the
+ * hardware and so we reject any attempt to create a framebuffer out of a
+ * userptr.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev,
+		       void *data,
+		       struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
+		/* We cannot support coherent userptr objects on hw without
+		 * LLC and broken snooping.
+		 */
+		return -ENODEV;
+	}
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (!args->user_size)
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	if (args->flags & I915_USERPTR_READ_ONLY) {
+		struct i915_address_space *vm;
+
+		/*
+		 * On almost all of the older hw, we cannot tell the GPU that
+		 * a page is readonly.
+		 */
+		vm = dev_priv->kernel_context->vm;
+		if (!vm || !vm->has_read_only)
+			return -ENODEV;
+	}
+
+	obj = i915_gem_object_alloc();
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+	obj->userptr.ptr = args->user_ptr;
+	if (args->flags & I915_USERPTR_READ_ONLY)
+		i915_gem_object_set_readonly(obj);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = i915_gem_userptr_init__mm_struct(obj);
+	if (ret == 0)
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	i915_gem_object_put(obj);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
+{
+	mutex_init(&dev_priv->mm_lock);
+	hash_init(dev_priv->mm_structs);
+
+	dev_priv->mm.userptr_wq =
+		alloc_workqueue("i915-userptr-acquire",
+				WQ_HIGHPRI | WQ_UNBOUND,
+				0);
+	if (!dev_priv->mm.userptr_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
+{
+	destroy_workqueue(dev_priv->mm.userptr_wq);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
new file mode 100644
index 000000000000..26ec6579b7cd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -0,0 +1,278 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/dma-fence-array.h>
+#include <linux/jiffies.h>
+
+#include "gt/intel_engine.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+static long
+i915_gem_object_wait_fence(struct dma_fence *fence,
+			   unsigned int flags,
+			   long timeout)
+{
+	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return timeout;
+
+	if (dma_fence_is_i915(fence))
+		return i915_request_wait(to_request(fence), flags, timeout);
+
+	return dma_fence_wait_timeout(fence,
+				      flags & I915_WAIT_INTERRUPTIBLE,
+				      timeout);
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+				 unsigned int flags,
+				 long timeout)
+{
+	unsigned int seq = __read_seqcount_begin(&resv->seq);
+	struct dma_fence *excl;
+	bool prune_fences = false;
+
+	if (flags & I915_WAIT_ALL) {
+		struct dma_fence **shared;
+		unsigned int count, i;
+		int ret;
+
+		ret = reservation_object_get_fences_rcu(resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			timeout = i915_gem_object_wait_fence(shared[i],
+							     flags, timeout);
+			if (timeout < 0)
+				break;
+
+			dma_fence_put(shared[i]);
+		}
+
+		for (; i < count; i++)
+			dma_fence_put(shared[i]);
+		kfree(shared);
+
+		/*
+		 * If both shared fences and an exclusive fence exist,
+		 * then by construction the shared fences must be later
+		 * than the exclusive fence. If we successfully wait for
+		 * all the shared fences, we know that the exclusive fence
+		 * must all be signaled. If all the shared fences are
+		 * signaled, we can prune the array and recover the
+		 * floating references on the fences/requests.
+		 */
+		prune_fences = count && timeout >= 0;
+	} else {
+		excl = reservation_object_get_excl_rcu(resv);
+	}
+
+	if (excl && timeout >= 0)
+		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
+
+	dma_fence_put(excl);
+
+	/*
+	 * Opportunistically prune the fences iff we know they have *all* been
+	 * signaled and that the reservation object has not been changed (i.e.
+	 * no new fences have been added).
+	 */
+	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
+		if (reservation_object_trylock(resv)) {
+			if (!__read_seqcount_retry(&resv->seq, seq))
+				reservation_object_add_excl_fence(resv, NULL);
+			reservation_object_unlock(resv);
+		}
+	}
+
+	return timeout;
+}
+
+static void __fence_set_priority(struct dma_fence *fence,
+				 const struct i915_sched_attr *attr)
+{
+	struct i915_request *rq;
+	struct intel_engine_cs *engine;
+
+	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
+		return;
+
+	rq = to_request(fence);
+	engine = rq->engine;
+
+	local_bh_disable();
+	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
+	if (engine->schedule)
+		engine->schedule(rq, attr);
+	rcu_read_unlock();
+	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
+}
+
+static void fence_set_priority(struct dma_fence *fence,
+			       const struct i915_sched_attr *attr)
+{
+	/* Recurse once into a fence-array */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+		int i;
+
+		for (i = 0; i < array->num_fences; i++)
+			__fence_set_priority(array->fences[i], attr);
+	} else {
+		__fence_set_priority(fence, attr);
+	}
+}
+
+int
+i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+			      unsigned int flags,
+			      const struct i915_sched_attr *attr)
+{
+	struct dma_fence *excl;
+
+	if (flags & I915_WAIT_ALL) {
+		struct dma_fence **shared;
+		unsigned int count, i;
+		int ret;
+
+		ret = reservation_object_get_fences_rcu(obj->base.resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			fence_set_priority(shared[i], attr);
+			dma_fence_put(shared[i]);
+		}
+
+		kfree(shared);
+	} else {
+		excl = reservation_object_get_excl_rcu(obj->base.resv);
+	}
+
+	if (excl) {
+		fence_set_priority(excl, attr);
+		dma_fence_put(excl);
+	}
+	return 0;
+}
+
+/**
+ * Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ */
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+		     unsigned int flags,
+		     long timeout)
+{
+	might_sleep();
+	GEM_BUG_ON(timeout < 0);
+
+	timeout = i915_gem_object_wait_reservation(obj->base.resv,
+						   flags, timeout);
+	return timeout < 0 ? timeout : 0;
+}
+
+static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+{
+	/* nsecs_to_jiffies64() does not guard against overflow */
+	if (NSEC_PER_SEC % HZ &&
+	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
+		return MAX_JIFFY_OFFSET;
+
+	return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
+}
+
+static unsigned long to_wait_timeout(s64 timeout_ns)
+{
+	if (timeout_ns < 0)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	if (timeout_ns == 0)
+		return 0;
+
+	return nsecs_to_jiffies_timeout(timeout_ns);
+}
+
+/**
+ * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
+ *
+ * Returns 0 if successful, else an error is returned with the remaining time in
+ * the timeout parameter.
+ *  -ETIME: object is still busy after timeout
+ *  -ERESTARTSYS: signal interrupted the wait
+ *  -ENONENT: object doesn't exist
+ * Also possible, but rare:
+ *  -EAGAIN: incomplete, restart syscall
+ *  -ENOMEM: damn
+ *  -ENODEV: Internal IRQ fail
+ *  -E?: The add request failed
+ *
+ * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
+ * non-zero timeout parameter the wait ioctl will wait for the given number of
+ * nanoseconds on an object becoming unbusy. Since the wait itself does so
+ * without holding struct_mutex the object may become re-busied before this
+ * function completes. A similar but shorter * race condition exists in the busy
+ * ioctl
+ */
+int
+i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_gem_wait *args = data;
+	struct drm_i915_gem_object *obj;
+	ktime_t start;
+	long ret;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	obj = i915_gem_object_lookup(file, args->bo_handle);
+	if (!obj)
+		return -ENOENT;
+
+	start = ktime_get();
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
+				   to_wait_timeout(args->timeout_ns));
+
+	if (args->timeout_ns > 0) {
+		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout_ns < 0)
+			args->timeout_ns = 0;
+
+		/*
+		 * Apparently ktime isn't accurate enough and occasionally has a
+		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+		 * things up to make the test happy. We allow up to 1 jiffy.
+		 *
+		 * This is a regression from the timespec->ktime conversion.
+		 */
+		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+			args->timeout_ns = 0;
+
+		/* Asked to wait beyond the jiffie/scheduler precision? */
+		if (ret == -ETIME && args->timeout_ns)
+			ret = -EAGAIN;
+	}
+
+	i915_gem_object_put(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
new file mode 100644
index 000000000000..099f3397aada
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -0,0 +1,57 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+
+#include "i915_drv.h"
+#include "i915_gemfs.h"
+
+int i915_gemfs_init(struct drm_i915_private *i915)
+{
+	struct file_system_type *type;
+	struct vfsmount *gemfs;
+
+	type = get_fs_type("tmpfs");
+	if (!type)
+		return -ENODEV;
+
+	gemfs = kern_mount(type);
+	if (IS_ERR(gemfs))
+		return PTR_ERR(gemfs);
+
+	/*
+	 * Enable huge-pages for objects that are at least HPAGE_PMD_SIZE, most
+	 * likely 2M. Note that within_size may overallocate huge-pages, if say
+	 * we allocate an object of size 2M + 4K, we may get 2M + 2M, but under
+	 * memory pressure shmem should split any huge-pages which can be
+	 * shrunk.
+	 */
+
+	if (has_transparent_hugepage()) {
+		struct super_block *sb = gemfs->mnt_sb;
+		/* FIXME: Disabled until we get W/A for read BW issue. */
+		char options[] = "huge=never";
+		int flags = 0;
+		int err;
+
+		err = sb->s_op->remount_fs(sb, &flags, options);
+		if (err) {
+			kern_unmount(gemfs);
+			return err;
+		}
+	}
+
+	i915->mm.gemfs = gemfs;
+
+	return 0;
+}
+
+void i915_gemfs_fini(struct drm_i915_private *i915)
+{
+	kern_unmount(i915->mm.gemfs);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h
new file mode 100644
index 000000000000..2a1e59af3e4a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h
@@ -0,0 +1,16 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#ifndef __I915_GEMFS_H__
+#define __I915_GEMFS_H__
+
+struct drm_i915_private;
+
+int i915_gemfs_init(struct drm_i915_private *i915);
+
+void i915_gemfs_fini(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
new file mode 100644
index 000000000000..3c5d17b2b670
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -0,0 +1,123 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_scatterlist.h"
+
+#include "huge_gem_object.h"
+
+static void huge_free_pages(struct drm_i915_gem_object *obj,
+			    struct sg_table *pages)
+{
+	unsigned long nreal = obj->scratch / PAGE_SIZE;
+	struct scatterlist *sg;
+
+	for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg))
+		__free_page(sg_page(sg));
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static int huge_get_pages(struct drm_i915_gem_object *obj)
+{
+#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
+	const unsigned long nreal = obj->scratch / PAGE_SIZE;
+	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct scatterlist *sg, *src, *end;
+	struct sg_table *pages;
+	unsigned long n;
+
+	pages = kmalloc(sizeof(*pages), GFP);
+	if (!pages)
+		return -ENOMEM;
+
+	if (sg_alloc_table(pages, npages, GFP)) {
+		kfree(pages);
+		return -ENOMEM;
+	}
+
+	sg = pages->sgl;
+	for (n = 0; n < nreal; n++) {
+		struct page *page;
+
+		page = alloc_page(GFP | __GFP_HIGHMEM);
+		if (!page) {
+			sg_mark_end(sg);
+			goto err;
+		}
+
+		sg_set_page(sg, page, PAGE_SIZE, 0);
+		sg = __sg_next(sg);
+	}
+	if (nreal < npages) {
+		for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) {
+			sg_set_page(sg, sg_page(src), PAGE_SIZE, 0);
+			src = __sg_next(src);
+			if (src == end)
+				src = pages->sgl;
+		}
+	}
+
+	if (i915_gem_gtt_prepare_pages(obj, pages))
+		goto err;
+
+	__i915_gem_object_set_pages(obj, pages, PAGE_SIZE);
+
+	return 0;
+
+err:
+	huge_free_pages(obj, pages);
+
+	return -ENOMEM;
+#undef GFP
+}
+
+static void huge_put_pages(struct drm_i915_gem_object *obj,
+			   struct sg_table *pages)
+{
+	i915_gem_gtt_finish_pages(obj, pages);
+	huge_free_pages(obj, pages);
+
+	obj->mm.dirty = false;
+}
+
+static const struct drm_i915_gem_object_ops huge_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = huge_get_pages,
+	.put_pages = huge_put_pages,
+};
+
+struct drm_i915_gem_object *
+huge_gem_object(struct drm_i915_private *i915,
+		phys_addr_t phys_size,
+		dma_addr_t dma_size)
+{
+	struct drm_i915_gem_object *obj;
+	unsigned int cache_level;
+
+	GEM_BUG_ON(!phys_size || phys_size > dma_size);
+	GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE));
+	GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE));
+
+	if (overflows_type(dma_size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, dma_size);
+	i915_gem_object_init(obj, &huge_ops);
+
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+	obj->scratch = phys_size;
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
new file mode 100644
index 000000000000..549c1394bcdc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __HUGE_GEM_OBJECT_H
+#define __HUGE_GEM_OBJECT_H
+
+struct drm_i915_gem_object *
+huge_gem_object(struct drm_i915_private *i915,
+		phys_addr_t phys_size,
+		dma_addr_t dma_size);
+
+static inline phys_addr_t
+huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
+{
+	return obj->scratch;
+}
+
+static inline dma_addr_t
+huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
+{
+	return obj->base.size;
+}
+
+#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
new file mode 100644
index 000000000000..b74729b6f353
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -0,0 +1,1777 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "i915_selftest.h"
+
+#include "gem/i915_gem_pm.h"
+
+#include "igt_gem_utils.h"
+#include "mock_context.h"
+
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gem_device.h"
+#include "selftests/i915_random.h"
+
+static const unsigned int page_sizes[] = {
+	I915_GTT_PAGE_SIZE_2M,
+	I915_GTT_PAGE_SIZE_64K,
+	I915_GTT_PAGE_SIZE_4K,
+};
+
+static unsigned int get_largest_page_size(struct drm_i915_private *i915,
+					  u64 rem)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
+		unsigned int page_size = page_sizes[i];
+
+		if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size)
+			return page_size;
+	}
+
+	return 0;
+}
+
+static void huge_pages_free_pages(struct sg_table *st)
+{
+	struct scatterlist *sg;
+
+	for (sg = st->sgl; sg; sg = __sg_next(sg)) {
+		if (sg_page(sg))
+			__free_pages(sg_page(sg), get_order(sg->length));
+	}
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static int get_huge_pages(struct drm_i915_gem_object *obj)
+{
+#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
+	unsigned int page_mask = obj->mm.page_mask;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned int sg_page_sizes;
+	u64 rem;
+
+	st = kmalloc(sizeof(*st), GFP);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	rem = obj->base.size;
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+
+	/*
+	 * Our goal here is simple, we want to greedily fill the object from
+	 * largest to smallest page-size, while ensuring that we use *every*
+	 * page-size as per the given page-mask.
+	 */
+	do {
+		unsigned int bit = ilog2(page_mask);
+		unsigned int page_size = BIT(bit);
+		int order = get_order(page_size);
+
+		do {
+			struct page *page;
+
+			GEM_BUG_ON(order >= MAX_ORDER);
+			page = alloc_pages(GFP | __GFP_ZERO, order);
+			if (!page)
+				goto err;
+
+			sg_set_page(sg, page, page_size, 0);
+			sg_page_sizes |= page_size;
+			st->nents++;
+
+			rem -= page_size;
+			if (!rem) {
+				sg_mark_end(sg);
+				break;
+			}
+
+			sg = __sg_next(sg);
+		} while ((rem - ((page_size-1) & page_mask)) >= page_size);
+
+		page_mask &= (page_size-1);
+	} while (page_mask);
+
+	if (i915_gem_gtt_prepare_pages(obj, st))
+		goto err;
+
+	obj->mm.madv = I915_MADV_DONTNEED;
+
+	GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+
+err:
+	sg_set_page(sg, NULL, 0, 0);
+	sg_mark_end(sg);
+	huge_pages_free_pages(st);
+
+	return -ENOMEM;
+}
+
+static void put_huge_pages(struct drm_i915_gem_object *obj,
+			   struct sg_table *pages)
+{
+	i915_gem_gtt_finish_pages(obj, pages);
+	huge_pages_free_pages(pages);
+
+	obj->mm.dirty = false;
+	obj->mm.madv = I915_MADV_WILLNEED;
+}
+
+static const struct drm_i915_gem_object_ops huge_page_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = get_huge_pages,
+	.put_pages = put_huge_pages,
+};
+
+static struct drm_i915_gem_object *
+huge_pages_object(struct drm_i915_private *i915,
+		  u64 size,
+		  unsigned int page_mask)
+{
+	struct drm_i915_gem_object *obj;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
+
+	if (size >> PAGE_SHIFT > INT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+	i915_gem_object_init(obj, &huge_page_ops);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->cache_level = I915_CACHE_NONE;
+
+	obj->mm.page_mask = page_mask;
+
+	return obj;
+}
+
+static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const u64 max_len = rounddown_pow_of_two(UINT_MAX);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned int sg_page_sizes;
+	u64 rem;
+
+	st = kmalloc(sizeof(*st), GFP);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	/* Use optimal page sized chunks to fill in the sg table */
+	rem = obj->base.size;
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+	do {
+		unsigned int page_size = get_largest_page_size(i915, rem);
+		unsigned int len = min(page_size * div_u64(rem, page_size),
+				       max_len);
+
+		GEM_BUG_ON(!page_size);
+
+		sg->offset = 0;
+		sg->length = len;
+		sg_dma_len(sg) = len;
+		sg_dma_address(sg) = page_size;
+
+		sg_page_sizes |= len;
+
+		st->nents++;
+
+		rem -= len;
+		if (!rem) {
+			sg_mark_end(sg);
+			break;
+		}
+
+		sg = sg_next(sg);
+	} while (1);
+
+	i915_sg_trim(st);
+
+	obj->mm.madv = I915_MADV_DONTNEED;
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+}
+
+static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned int page_size;
+
+	st = kmalloc(sizeof(*st), GFP);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, 1, GFP)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	st->nents = 1;
+
+	page_size = get_largest_page_size(i915, obj->base.size);
+	GEM_BUG_ON(!page_size);
+
+	sg->offset = 0;
+	sg->length = obj->base.size;
+	sg_dma_len(sg) = obj->base.size;
+	sg_dma_address(sg) = page_size;
+
+	obj->mm.madv = I915_MADV_DONTNEED;
+
+	__i915_gem_object_set_pages(obj, st, sg->length);
+
+	return 0;
+#undef GFP
+}
+
+static void fake_free_huge_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages)
+{
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
+				struct sg_table *pages)
+{
+	fake_free_huge_pages(obj, pages);
+	obj->mm.dirty = false;
+	obj->mm.madv = I915_MADV_WILLNEED;
+}
+
+static const struct drm_i915_gem_object_ops fake_ops = {
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = fake_get_huge_pages,
+	.put_pages = fake_put_huge_pages,
+};
+
+static const struct drm_i915_gem_object_ops fake_ops_single = {
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = fake_get_huge_pages_single,
+	.put_pages = fake_put_huge_pages,
+};
+
+static struct drm_i915_gem_object *
+fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
+{
+	struct drm_i915_gem_object *obj;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+
+	if (size >> PAGE_SHIFT > UINT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+
+	if (single)
+		i915_gem_object_init(obj, &fake_ops_single);
+	else
+		i915_gem_object_init(obj, &fake_ops);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->cache_level = I915_CACHE_NONE;
+
+	return obj;
+}
+
+static int igt_check_page_sizes(struct i915_vma *vma)
+{
+	struct drm_i915_private *i915 = vma->vm->i915;
+	unsigned int supported = INTEL_INFO(i915)->page_sizes;
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err = 0;
+
+	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
+		pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
+		       vma->page_sizes.sg & ~supported, supported);
+		err = -EINVAL;
+	}
+
+	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) {
+		pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
+		       vma->page_sizes.gtt & ~supported, supported);
+		err = -EINVAL;
+	}
+
+	if (vma->page_sizes.phys != obj->mm.page_sizes.phys) {
+		pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n",
+		       vma->page_sizes.phys, obj->mm.page_sizes.phys);
+		err = -EINVAL;
+	}
+
+	if (vma->page_sizes.sg != obj->mm.page_sizes.sg) {
+		pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n",
+		       vma->page_sizes.sg, obj->mm.page_sizes.sg);
+		err = -EINVAL;
+	}
+
+	if (obj->mm.page_sizes.gtt) {
+		pr_err("obj->page_sizes.gtt(%u) should never be set\n",
+		       obj->mm.page_sizes.gtt);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int igt_mock_exhaust_device_supported_pages(void *arg)
+{
+	struct i915_ppgtt *ppgtt = arg;
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	unsigned int saved_mask = INTEL_INFO(i915)->page_sizes;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int i, j, single;
+	int err;
+
+	/*
+	 * Sanity check creating objects with every valid page support
+	 * combination for our mock device.
+	 */
+
+	for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
+		unsigned int combination = 0;
+
+		for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
+			if (i & BIT(j))
+				combination |= page_sizes[j];
+		}
+
+		mkwrite_device_info(i915)->page_sizes = combination;
+
+		for (single = 0; single <= 1; ++single) {
+			obj = fake_huge_pages_object(i915, combination, !!single);
+			if (IS_ERR(obj)) {
+				err = PTR_ERR(obj);
+				goto out_device;
+			}
+
+			if (obj->base.size != combination) {
+				pr_err("obj->base.size=%zu, expected=%u\n",
+				       obj->base.size, combination);
+				err = -EINVAL;
+				goto out_put;
+			}
+
+			vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto out_put;
+			}
+
+			err = i915_vma_pin(vma, 0, 0, PIN_USER);
+			if (err)
+				goto out_close;
+
+			err = igt_check_page_sizes(vma);
+
+			if (vma->page_sizes.sg != combination) {
+				pr_err("page_sizes.sg=%u, expected=%u\n",
+				       vma->page_sizes.sg, combination);
+				err = -EINVAL;
+			}
+
+			i915_vma_unpin(vma);
+			i915_vma_close(vma);
+
+			i915_gem_object_put(obj);
+
+			if (err)
+				goto out_device;
+		}
+	}
+
+	goto out_device;
+
+out_close:
+	i915_vma_close(vma);
+out_put:
+	i915_gem_object_put(obj);
+out_device:
+	mkwrite_device_info(i915)->page_sizes = saved_mask;
+
+	return err;
+}
+
+static int igt_mock_ppgtt_misaligned_dma(void *arg)
+{
+	struct i915_ppgtt *ppgtt = arg;
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	unsigned long supported = INTEL_INFO(i915)->page_sizes;
+	struct drm_i915_gem_object *obj;
+	int bit;
+	int err;
+
+	/*
+	 * Sanity check dma misalignment for huge pages -- the dma addresses we
+	 * insert into the paging structures need to always respect the page
+	 * size alignment.
+	 */
+
+	bit = ilog2(I915_GTT_PAGE_SIZE_64K);
+
+	for_each_set_bit_from(bit, &supported,
+			      ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		IGT_TIMEOUT(end_time);
+		unsigned int page_size = BIT(bit);
+		unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+		unsigned int offset;
+		unsigned int size =
+			round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1;
+		struct i915_vma *vma;
+
+		obj = fake_huge_pages_object(i915, size, true);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		if (obj->base.size != size) {
+			pr_err("obj->base.size=%zu, expected=%u\n",
+			       obj->base.size, size);
+			err = -EINVAL;
+			goto out_put;
+		}
+
+		err = i915_gem_object_pin_pages(obj);
+		if (err)
+			goto out_put;
+
+		/* Force the page size for this object */
+		obj->mm.page_sizes.sg = page_size;
+
+		vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_unpin;
+		}
+
+		err = i915_vma_pin(vma, 0, 0, flags);
+		if (err) {
+			i915_vma_close(vma);
+			goto out_unpin;
+		}
+
+
+		err = igt_check_page_sizes(vma);
+
+		if (vma->page_sizes.gtt != page_size) {
+			pr_err("page_sizes.gtt=%u, expected %u\n",
+			       vma->page_sizes.gtt, page_size);
+			err = -EINVAL;
+		}
+
+		i915_vma_unpin(vma);
+
+		if (err) {
+			i915_vma_close(vma);
+			goto out_unpin;
+		}
+
+		/*
+		 * Try all the other valid offsets until the next
+		 * boundary -- should always fall back to using 4K
+		 * pages.
+		 */
+		for (offset = 4096; offset < page_size; offset += 4096) {
+			err = i915_vma_unbind(vma);
+			if (err) {
+				i915_vma_close(vma);
+				goto out_unpin;
+			}
+
+			err = i915_vma_pin(vma, 0, 0, flags | offset);
+			if (err) {
+				i915_vma_close(vma);
+				goto out_unpin;
+			}
+
+			err = igt_check_page_sizes(vma);
+
+			if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) {
+				pr_err("page_sizes.gtt=%u, expected %llu\n",
+				       vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K);
+				err = -EINVAL;
+			}
+
+			i915_vma_unpin(vma);
+
+			if (err) {
+				i915_vma_close(vma);
+				goto out_unpin;
+			}
+
+			if (igt_timeout(end_time,
+					"%s timed out at offset %x with page-size %x\n",
+					__func__, offset, page_size))
+				break;
+		}
+
+		i915_vma_close(vma);
+
+		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		i915_gem_object_put(obj);
+	}
+
+	return 0;
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out_put:
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+static void close_object_list(struct list_head *objects,
+			      struct i915_ppgtt *ppgtt)
+{
+	struct drm_i915_gem_object *obj, *on;
+
+	list_for_each_entry_safe(obj, on, objects, st_link) {
+		struct i915_vma *vma;
+
+		vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+		if (!IS_ERR(vma))
+			i915_vma_close(vma);
+
+		list_del(&obj->st_link);
+		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		i915_gem_object_put(obj);
+	}
+}
+
+static int igt_mock_ppgtt_huge_fill(void *arg)
+{
+	struct i915_ppgtt *ppgtt = arg;
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT;
+	unsigned long page_num;
+	bool single = false;
+	LIST_HEAD(objects);
+	IGT_TIMEOUT(end_time);
+	int err = -ENODEV;
+
+	for_each_prime_number_from(page_num, 1, max_pages) {
+		struct drm_i915_gem_object *obj;
+		u64 size = page_num << PAGE_SHIFT;
+		struct i915_vma *vma;
+		unsigned int expected_gtt = 0;
+		int i;
+
+		obj = fake_huge_pages_object(i915, size, single);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			break;
+		}
+
+		if (obj->base.size != size) {
+			pr_err("obj->base.size=%zd, expected=%llu\n",
+			       obj->base.size, size);
+			i915_gem_object_put(obj);
+			err = -EINVAL;
+			break;
+		}
+
+		err = i915_gem_object_pin_pages(obj);
+		if (err) {
+			i915_gem_object_put(obj);
+			break;
+		}
+
+		list_add(&obj->st_link, &objects);
+
+		vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			break;
+		}
+
+		err = i915_vma_pin(vma, 0, 0, PIN_USER);
+		if (err)
+			break;
+
+		err = igt_check_page_sizes(vma);
+		if (err) {
+			i915_vma_unpin(vma);
+			break;
+		}
+
+		/*
+		 * Figure out the expected gtt page size knowing that we go from
+		 * largest to smallest page size sg chunks, and that we align to
+		 * the largest page size.
+		 */
+		for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
+			unsigned int page_size = page_sizes[i];
+
+			if (HAS_PAGE_SIZES(i915, page_size) &&
+			    size >= page_size) {
+				expected_gtt |= page_size;
+				size &= page_size-1;
+			}
+		}
+
+		GEM_BUG_ON(!expected_gtt);
+		GEM_BUG_ON(size);
+
+		if (expected_gtt & I915_GTT_PAGE_SIZE_4K)
+			expected_gtt &= ~I915_GTT_PAGE_SIZE_64K;
+
+		i915_vma_unpin(vma);
+
+		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+			if (!IS_ALIGNED(vma->node.start,
+					I915_GTT_PAGE_SIZE_2M)) {
+				pr_err("node.start(%llx) not aligned to 2M\n",
+				       vma->node.start);
+				err = -EINVAL;
+				break;
+			}
+
+			if (!IS_ALIGNED(vma->node.size,
+					I915_GTT_PAGE_SIZE_2M)) {
+				pr_err("node.size(%llx) not aligned to 2M\n",
+				       vma->node.size);
+				err = -EINVAL;
+				break;
+			}
+		}
+
+		if (vma->page_sizes.gtt != expected_gtt) {
+			pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
+			       vma->page_sizes.gtt, expected_gtt,
+			       obj->base.size, yesno(!!single));
+			err = -EINVAL;
+			break;
+		}
+
+		if (igt_timeout(end_time,
+				"%s timed out at size %zd\n",
+				__func__, obj->base.size))
+			break;
+
+		single = !single;
+	}
+
+	close_object_list(&objects, ppgtt);
+
+	if (err == -ENOMEM || err == -ENOSPC)
+		err = 0;
+
+	return err;
+}
+
+static int igt_mock_ppgtt_64K(void *arg)
+{
+	struct i915_ppgtt *ppgtt = arg;
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	struct drm_i915_gem_object *obj;
+	const struct object_info {
+		unsigned int size;
+		unsigned int gtt;
+		unsigned int offset;
+	} objects[] = {
+		/* Cases with forced padding/alignment */
+		{
+			.size = SZ_64K,
+			.gtt = I915_GTT_PAGE_SIZE_64K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_64K + SZ_4K,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_64K - SZ_4K,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M,
+			.gtt = I915_GTT_PAGE_SIZE_64K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M - SZ_4K,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M + SZ_4K,
+			.gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M + SZ_64K,
+			.gtt = I915_GTT_PAGE_SIZE_64K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M - SZ_64K,
+			.gtt = I915_GTT_PAGE_SIZE_64K,
+			.offset = 0,
+		},
+		/* Try without any forced padding/alignment */
+		{
+			.size = SZ_64K,
+			.offset = SZ_2M,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+		},
+		{
+			.size = SZ_128K,
+			.offset = SZ_2M - SZ_64K,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+		},
+	};
+	struct i915_vma *vma;
+	int i, single;
+	int err;
+
+	/*
+	 * Sanity check some of the trickiness with 64K pages -- either we can
+	 * safely mark the whole page-table(2M block) as 64K, or we have to
+	 * always fallback to 4K.
+	 */
+
+	if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K))
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(objects); ++i) {
+		unsigned int size = objects[i].size;
+		unsigned int expected_gtt = objects[i].gtt;
+		unsigned int offset = objects[i].offset;
+		unsigned int flags = PIN_USER;
+
+		for (single = 0; single <= 1; single++) {
+			obj = fake_huge_pages_object(i915, size, !!single);
+			if (IS_ERR(obj))
+				return PTR_ERR(obj);
+
+			err = i915_gem_object_pin_pages(obj);
+			if (err)
+				goto out_object_put;
+
+			/*
+			 * Disable 2M pages -- We only want to use 64K/4K pages
+			 * for this test.
+			 */
+			obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
+
+			vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto out_object_unpin;
+			}
+
+			if (offset)
+				flags |= PIN_OFFSET_FIXED | offset;
+
+			err = i915_vma_pin(vma, 0, 0, flags);
+			if (err)
+				goto out_vma_close;
+
+			err = igt_check_page_sizes(vma);
+			if (err)
+				goto out_vma_unpin;
+
+			if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+				if (!IS_ALIGNED(vma->node.start,
+						I915_GTT_PAGE_SIZE_2M)) {
+					pr_err("node.start(%llx) not aligned to 2M\n",
+					       vma->node.start);
+					err = -EINVAL;
+					goto out_vma_unpin;
+				}
+
+				if (!IS_ALIGNED(vma->node.size,
+						I915_GTT_PAGE_SIZE_2M)) {
+					pr_err("node.size(%llx) not aligned to 2M\n",
+					       vma->node.size);
+					err = -EINVAL;
+					goto out_vma_unpin;
+				}
+			}
+
+			if (vma->page_sizes.gtt != expected_gtt) {
+				pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
+				       vma->page_sizes.gtt, expected_gtt, i,
+				       yesno(!!single));
+				err = -EINVAL;
+				goto out_vma_unpin;
+			}
+
+			i915_vma_unpin(vma);
+			i915_vma_close(vma);
+
+			i915_gem_object_unpin_pages(obj);
+			__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+			i915_gem_object_put(obj);
+		}
+	}
+
+	return 0;
+
+out_vma_unpin:
+	i915_vma_unpin(vma);
+out_vma_close:
+	i915_vma_close(vma);
+out_object_unpin:
+	i915_gem_object_unpin_pages(obj);
+out_object_put:
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+static struct i915_vma *
+gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val)
+{
+	struct drm_i915_private *i915 = vma->vm->i915;
+	const int gen = INTEL_GEN(i915);
+	unsigned int count = vma->size >> PAGE_SHIFT;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *batch;
+	unsigned int size;
+	u32 *cmd;
+	int n;
+	int err;
+
+	size = (1 + 4 * count) * sizeof(u32);
+	size = round_up(size, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	offset += vma->node.start;
+
+	for (n = 0; n < count; n++) {
+		if (gen >= 8) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
+			*cmd++ = lower_32_bits(offset);
+			*cmd++ = upper_32_bits(offset);
+			*cmd++ = val;
+		} else if (gen >= 4) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
+				(gen < 6 ? MI_USE_GGTT : 0);
+			*cmd++ = 0;
+			*cmd++ = offset;
+			*cmd++ = val;
+		} else {
+			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+			*cmd++ = offset;
+			*cmd++ = val;
+		}
+
+		offset += PAGE_SIZE;
+	}
+
+	*cmd = MI_BATCH_BUFFER_END;
+	i915_gem_chipset_flush(i915);
+
+	i915_gem_object_unpin_map(obj);
+
+	batch = i915_vma_instance(obj, vma->vm, NULL);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto err;
+	}
+
+	err = i915_vma_pin(batch, 0, 0, PIN_USER);
+	if (err)
+		goto err;
+
+	return batch;
+
+err:
+	i915_gem_object_put(obj);
+
+	return ERR_PTR(err);
+}
+
+static int gpu_write(struct i915_vma *vma,
+		     struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine,
+		     u32 dword,
+		     u32 value)
+{
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	int err;
+
+	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+
+	batch = gpu_write_dw(vma, dword * sizeof(u32), value);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	i915_vma_lock(batch);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
+	if (err)
+		goto err_request;
+
+	i915_vma_lock(vma);
+	err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto err_request;
+
+	err = engine->emit_bb_start(rq,
+				    batch->node.start, batch->node.size,
+				    0);
+err_request:
+	if (err)
+		i915_request_skip(rq, err);
+	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin(batch);
+	i915_vma_close(batch);
+	i915_vma_put(batch);
+
+	return err;
+}
+
+static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+	unsigned int needs_flush;
+	unsigned long n;
+	int err;
+
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
+	if (err)
+		return err;
+
+	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
+		u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
+
+		if (needs_flush & CLFLUSH_BEFORE)
+			drm_clflush_virt_range(ptr, PAGE_SIZE);
+
+		if (ptr[dword] != val) {
+			pr_err("n=%lu ptr[%u]=%u, val=%u\n",
+			       n, dword, ptr[dword], val);
+			kunmap_atomic(ptr);
+			err = -EINVAL;
+			break;
+		}
+
+		kunmap_atomic(ptr);
+	}
+
+	i915_gem_object_finish_access(obj);
+
+	return err;
+}
+
+static int __igt_write_huge(struct i915_gem_context *ctx,
+			    struct intel_engine_cs *engine,
+			    struct drm_i915_gem_object *obj,
+			    u64 size, u64 offset,
+			    u32 dword, u32 val)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+	struct i915_vma *vma;
+	int err;
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_unbind(vma);
+	if (err)
+		goto out_vma_close;
+
+	err = i915_vma_pin(vma, size, 0, flags | offset);
+	if (err) {
+		/*
+		 * The ggtt may have some pages reserved so
+		 * refrain from erroring out.
+		 */
+		if (err == -ENOSPC && i915_is_ggtt(vm))
+			err = 0;
+
+		goto out_vma_close;
+	}
+
+	err = igt_check_page_sizes(vma);
+	if (err)
+		goto out_vma_unpin;
+
+	err = gpu_write(vma, ctx, engine, dword, val);
+	if (err) {
+		pr_err("gpu-write failed at offset=%llx\n", offset);
+		goto out_vma_unpin;
+	}
+
+	err = cpu_check(obj, dword, val);
+	if (err) {
+		pr_err("cpu-check failed at offset=%llx\n", offset);
+		goto out_vma_unpin;
+	}
+
+out_vma_unpin:
+	i915_vma_unpin(vma);
+out_vma_close:
+	i915_vma_destroy(vma);
+
+	return err;
+}
+
+static int igt_write_huge(struct i915_gem_context *ctx,
+			  struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	static struct intel_engine_cs *engines[I915_NUM_ENGINES];
+	struct intel_engine_cs *engine;
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	unsigned int max_page_size;
+	unsigned int id;
+	u64 max;
+	u64 num;
+	u64 size;
+	int *order;
+	int i, n;
+	int err = 0;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	size = obj->base.size;
+	if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
+		size = round_up(size, I915_GTT_PAGE_SIZE_2M);
+
+	max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
+	max = div_u64((vm->total - size), max_page_size);
+
+	n = 0;
+	for_each_engine(engine, i915, id) {
+		if (!intel_engine_can_store_dword(engine)) {
+			pr_info("store-dword-imm not supported on engine=%u\n",
+				id);
+			continue;
+		}
+		engines[n++] = engine;
+	}
+
+	if (!n)
+		return 0;
+
+	/*
+	 * To keep things interesting when alternating between engines in our
+	 * randomized order, lets also make feeding to the same engine a few
+	 * times in succession a possibility by enlarging the permutation array.
+	 */
+	order = i915_random_order(n * I915_NUM_ENGINES, &prng);
+	if (!order)
+		return -ENOMEM;
+
+	/*
+	 * Try various offsets in an ascending/descending fashion until we
+	 * timeout -- we want to avoid issues hidden by effectively always using
+	 * offset = 0.
+	 */
+	i = 0;
+	for_each_prime_number_from(num, 0, max) {
+		u64 offset_low = num * max_page_size;
+		u64 offset_high = (max - num) * max_page_size;
+		u32 dword = offset_in_page(num) / 4;
+
+		engine = engines[order[i] % n];
+		i = (i + 1) % (n * I915_NUM_ENGINES);
+
+		/*
+		 * In order to utilize 64K pages we need to both pad the vma
+		 * size and ensure the vma offset is at the start of the pt
+		 * boundary, however to improve coverage we opt for testing both
+		 * aligned and unaligned offsets.
+		 */
+		if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
+			offset_low = round_down(offset_low,
+						I915_GTT_PAGE_SIZE_2M);
+
+		err = __igt_write_huge(ctx, engine, obj, size, offset_low,
+				       dword, num + 1);
+		if (err)
+			break;
+
+		err = __igt_write_huge(ctx, engine, obj, size, offset_high,
+				       dword, num + 1);
+		if (err)
+			break;
+
+		if (igt_timeout(end_time,
+				"%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
+				__func__, engine->id, offset_low, offset_high,
+				max_page_size))
+			break;
+	}
+
+	kfree(order);
+
+	return err;
+}
+
+static int igt_ppgtt_exhaust_huge(void *arg)
+{
+	struct i915_gem_context *ctx = arg;
+	struct drm_i915_private *i915 = ctx->i915;
+	unsigned long supported = INTEL_INFO(i915)->page_sizes;
+	static unsigned int pages[ARRAY_SIZE(page_sizes)];
+	struct drm_i915_gem_object *obj;
+	unsigned int size_mask;
+	unsigned int page_mask;
+	int n, i;
+	int err = -ENODEV;
+
+	if (supported == I915_GTT_PAGE_SIZE_4K)
+		return 0;
+
+	/*
+	 * Sanity check creating objects with a varying mix of page sizes --
+	 * ensuring that our writes lands in the right place.
+	 */
+
+	n = 0;
+	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1)
+		pages[n++] = BIT(i);
+
+	for (size_mask = 2; size_mask < BIT(n); size_mask++) {
+		unsigned int size = 0;
+
+		for (i = 0; i < n; i++) {
+			if (size_mask & BIT(i))
+				size |= pages[i];
+		}
+
+		/*
+		 * For our page mask we want to enumerate all the page-size
+		 * combinations which will fit into our chosen object size.
+		 */
+		for (page_mask = 2; page_mask <= size_mask; page_mask++) {
+			unsigned int page_sizes = 0;
+
+			for (i = 0; i < n; i++) {
+				if (page_mask & BIT(i))
+					page_sizes |= pages[i];
+			}
+
+			/*
+			 * Ensure that we can actually fill the given object
+			 * with our chosen page mask.
+			 */
+			if (!IS_ALIGNED(size, BIT(__ffs(page_sizes))))
+				continue;
+
+			obj = huge_pages_object(i915, size, page_sizes);
+			if (IS_ERR(obj)) {
+				err = PTR_ERR(obj);
+				goto out_device;
+			}
+
+			err = i915_gem_object_pin_pages(obj);
+			if (err) {
+				i915_gem_object_put(obj);
+
+				if (err == -ENOMEM) {
+					pr_info("unable to get pages, size=%u, pages=%u\n",
+						size, page_sizes);
+					err = 0;
+					break;
+				}
+
+				pr_err("pin_pages failed, size=%u, pages=%u\n",
+				       size_mask, page_mask);
+
+				goto out_device;
+			}
+
+			/* Force the page-size for the gtt insertion */
+			obj->mm.page_sizes.sg = page_sizes;
+
+			err = igt_write_huge(ctx, obj);
+			if (err) {
+				pr_err("exhaust write-huge failed with size=%u\n",
+				       size);
+				goto out_unpin;
+			}
+
+			i915_gem_object_unpin_pages(obj);
+			__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+			i915_gem_object_put(obj);
+		}
+	}
+
+	goto out_device;
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_put(obj);
+out_device:
+	mkwrite_device_info(i915)->page_sizes = supported;
+
+	return err;
+}
+
+static int igt_ppgtt_internal_huge(void *arg)
+{
+	struct i915_gem_context *ctx = arg;
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_object *obj;
+	static const unsigned int sizes[] = {
+		SZ_64K,
+		SZ_128K,
+		SZ_256K,
+		SZ_512K,
+		SZ_1M,
+		SZ_2M,
+	};
+	int i;
+	int err;
+
+	/*
+	 * Sanity check that the HW uses huge pages correctly through internal
+	 * -- ensure that our writes land in the right place.
+	 */
+
+	for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
+		unsigned int size = sizes[i];
+
+		obj = i915_gem_object_create_internal(i915, size);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = i915_gem_object_pin_pages(obj);
+		if (err)
+			goto out_put;
+
+		if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
+			pr_info("internal unable to allocate huge-page(s) with size=%u\n",
+				size);
+			goto out_unpin;
+		}
+
+		err = igt_write_huge(ctx, obj);
+		if (err) {
+			pr_err("internal write-huge failed with size=%u\n",
+			       size);
+			goto out_unpin;
+		}
+
+		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		i915_gem_object_put(obj);
+	}
+
+	return 0;
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out_put:
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
+{
+	return i915->mm.gemfs && has_transparent_hugepage();
+}
+
+static int igt_ppgtt_gemfs_huge(void *arg)
+{
+	struct i915_gem_context *ctx = arg;
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_object *obj;
+	static const unsigned int sizes[] = {
+		SZ_2M,
+		SZ_4M,
+		SZ_8M,
+		SZ_16M,
+		SZ_32M,
+	};
+	int i;
+	int err;
+
+	/*
+	 * Sanity check that the HW uses huge pages correctly through gemfs --
+	 * ensure that our writes land in the right place.
+	 */
+
+	if (!igt_can_allocate_thp(i915)) {
+		pr_info("missing THP support, skipping\n");
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
+		unsigned int size = sizes[i];
+
+		obj = i915_gem_object_create_shmem(i915, size);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = i915_gem_object_pin_pages(obj);
+		if (err)
+			goto out_put;
+
+		if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
+			pr_info("finishing test early, gemfs unable to allocate huge-page(s) with size=%u\n",
+				size);
+			goto out_unpin;
+		}
+
+		err = igt_write_huge(ctx, obj);
+		if (err) {
+			pr_err("gemfs write-huge failed with size=%u\n",
+			       size);
+			goto out_unpin;
+		}
+
+		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		i915_gem_object_put(obj);
+	}
+
+	return 0;
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out_put:
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+static int igt_ppgtt_pin_update(void *arg)
+{
+	struct i915_gem_context *ctx = arg;
+	struct drm_i915_private *dev_priv = ctx->i915;
+	unsigned long supported = INTEL_INFO(dev_priv)->page_sizes;
+	struct i915_address_space *vm = ctx->vm;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+	int first, last;
+	int err;
+
+	/*
+	 * Make sure there's no funny business when doing a PIN_UPDATE -- in the
+	 * past we had a subtle issue with being able to incorrectly do multiple
+	 * alloc va ranges on the same object when doing a PIN_UPDATE, which
+	 * resulted in some pretty nasty bugs, though only when using
+	 * huge-gtt-pages.
+	 */
+
+	if (!vm || !i915_vm_is_4lvl(vm)) {
+		pr_info("48b PPGTT not supported, skipping\n");
+		return 0;
+	}
+
+	first = ilog2(I915_GTT_PAGE_SIZE_64K);
+	last = ilog2(I915_GTT_PAGE_SIZE_2M);
+
+	for_each_set_bit_from(first, &supported, last + 1) {
+		unsigned int page_size = BIT(first);
+
+		obj = i915_gem_object_create_internal(dev_priv, page_size);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_put;
+		}
+
+		err = i915_vma_pin(vma, SZ_2M, 0, flags);
+		if (err)
+			goto out_close;
+
+		if (vma->page_sizes.sg < page_size) {
+			pr_info("Unable to allocate page-size %x, finishing test early\n",
+				page_size);
+			goto out_unpin;
+		}
+
+		err = igt_check_page_sizes(vma);
+		if (err)
+			goto out_unpin;
+
+		if (vma->page_sizes.gtt != page_size) {
+			dma_addr_t addr = i915_gem_object_get_dma_address(obj, 0);
+
+			/*
+			 * The only valid reason for this to ever fail would be
+			 * if the dma-mapper screwed us over when we did the
+			 * dma_map_sg(), since it has the final say over the dma
+			 * address.
+			 */
+			if (IS_ALIGNED(addr, page_size)) {
+				pr_err("page_sizes.gtt=%u, expected=%u\n",
+				       vma->page_sizes.gtt, page_size);
+				err = -EINVAL;
+			} else {
+				pr_info("dma address misaligned, finishing test early\n");
+			}
+
+			goto out_unpin;
+		}
+
+		err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE);
+		if (err)
+			goto out_unpin;
+
+		i915_vma_unpin(vma);
+		i915_vma_close(vma);
+
+		i915_gem_object_put(obj);
+	}
+
+	obj = i915_gem_object_create_internal(dev_priv, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_put;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, flags);
+	if (err)
+		goto out_close;
+
+	/*
+	 * Make sure we don't end up with something like where the pde is still
+	 * pointing to the 2M page, and the pt we just filled-in is dangling --
+	 * we can check this by writing to the first page where it would then
+	 * land in the now stale 2M page.
+	 */
+
+	err = gpu_write(vma, ctx, dev_priv->engine[RCS0], 0, 0xdeadbeaf);
+	if (err)
+		goto out_unpin;
+
+	err = cpu_check(obj, 0, 0xdeadbeaf);
+
+out_unpin:
+	i915_vma_unpin(vma);
+out_close:
+	i915_vma_close(vma);
+out_put:
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+static int igt_tmpfs_fallback(void *arg)
+{
+	struct i915_gem_context *ctx = arg;
+	struct drm_i915_private *i915 = ctx->i915;
+	struct vfsmount *gemfs = i915->mm.gemfs;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	u32 *vaddr;
+	int err = 0;
+
+	/*
+	 * Make sure that we don't burst into a ball of flames upon falling back
+	 * to tmpfs, which we rely on if on the off-chance we encouter a failure
+	 * when setting up gemfs.
+	 */
+
+	i915->mm.gemfs = NULL;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out_restore;
+	}
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto out_put;
+	}
+	*vaddr = 0xdeadbeaf;
+
+	__i915_gem_object_flush_map(obj, 0, 64);
+	i915_gem_object_unpin_map(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_put;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto out_close;
+
+	err = igt_check_page_sizes(vma);
+
+	i915_vma_unpin(vma);
+out_close:
+	i915_vma_close(vma);
+out_put:
+	i915_gem_object_put(obj);
+out_restore:
+	i915->mm.gemfs = gemfs;
+
+	return err;
+}
+
+static int igt_shrink_thp(void *arg)
+{
+	struct i915_gem_context *ctx = arg;
+	struct drm_i915_private *i915 = ctx->i915;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	unsigned int flags = PIN_USER;
+	int err;
+
+	/*
+	 * Sanity check shrinking huge-paged object -- make sure nothing blows
+	 * up.
+	 */
+
+	if (!igt_can_allocate_thp(i915)) {
+		pr_info("missing THP support, skipping\n");
+		return 0;
+	}
+
+	obj = i915_gem_object_create_shmem(i915, SZ_2M);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_put;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, flags);
+	if (err)
+		goto out_close;
+
+	if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
+		pr_info("failed to allocate THP, finishing test early\n");
+		goto out_unpin;
+	}
+
+	err = igt_check_page_sizes(vma);
+	if (err)
+		goto out_unpin;
+
+	err = gpu_write(vma, ctx, i915->engine[RCS0], 0, 0xdeadbeaf);
+	if (err)
+		goto out_unpin;
+
+	i915_vma_unpin(vma);
+
+	/*
+	 * Now that the pages are *unpinned* shrink-all should invoke
+	 * shmem to truncate our pages.
+	 */
+	i915_gem_shrink_all(i915);
+	if (i915_gem_object_has_pages(obj)) {
+		pr_err("shrink-all didn't truncate the pages\n");
+		err = -EINVAL;
+		goto out_close;
+	}
+
+	if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) {
+		pr_err("residual page-size bits left\n");
+		err = -EINVAL;
+		goto out_close;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, flags);
+	if (err)
+		goto out_close;
+
+	err = cpu_check(obj, 0, 0xdeadbeaf);
+
+out_unpin:
+	i915_vma_unpin(vma);
+out_close:
+	i915_vma_close(vma);
+out_put:
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+int i915_gem_huge_page_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_mock_exhaust_device_supported_pages),
+		SUBTEST(igt_mock_ppgtt_misaligned_dma),
+		SUBTEST(igt_mock_ppgtt_huge_fill),
+		SUBTEST(igt_mock_ppgtt_64K),
+	};
+	struct drm_i915_private *dev_priv;
+	struct i915_ppgtt *ppgtt;
+	int err;
+
+	dev_priv = mock_gem_device();
+	if (!dev_priv)
+		return -ENOMEM;
+
+	/* Pretend to be a device which supports the 48b PPGTT */
+	mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
+	mkwrite_device_info(dev_priv)->ppgtt_size = 48;
+
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	ppgtt = i915_ppgtt_create(dev_priv);
+	if (IS_ERR(ppgtt)) {
+		err = PTR_ERR(ppgtt);
+		goto out_unlock;
+	}
+
+	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
+		pr_err("failed to create 48b PPGTT\n");
+		err = -EINVAL;
+		goto out_close;
+	}
+
+	/* If we were ever hit this then it's time to mock the 64K scratch */
+	if (!i915_vm_has_scratch_64K(&ppgtt->vm)) {
+		pr_err("PPGTT missing 64K scratch page\n");
+		err = -EINVAL;
+		goto out_close;
+	}
+
+	err = i915_subtests(tests, ppgtt);
+
+out_close:
+	i915_vm_put(&ppgtt->vm);
+
+out_unlock:
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+	drm_dev_put(&dev_priv->drm);
+
+	return err;
+}
+
+int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_shrink_thp),
+		SUBTEST(igt_ppgtt_pin_update),
+		SUBTEST(igt_tmpfs_fallback),
+		SUBTEST(igt_ppgtt_exhaust_huge),
+		SUBTEST(igt_ppgtt_gemfs_huge),
+		SUBTEST(igt_ppgtt_internal_huge),
+	};
+	struct drm_file *file;
+	struct i915_gem_context *ctx;
+	intel_wakeref_t wakeref;
+	int err;
+
+	if (!HAS_PPGTT(dev_priv)) {
+		pr_info("PPGTT not supported, skipping live-selftests\n");
+		return 0;
+	}
+
+	if (i915_terminally_wedged(dev_priv))
+		return 0;
+
+	file = mock_file(dev_priv);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+
+	ctx = live_context(dev_priv, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_unlock;
+	}
+
+	if (ctx->vm)
+		ctx->vm->scrub_64K = true;
+
+	err = i915_subtests(tests, ctx);
+
+out_unlock:
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	mock_file_free(dev_priv, file);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
new file mode 100644
index 000000000000..f3a5eb807c1c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_drm.h"
+#include "mock_context.h"
+
+static int igt_client_fill(void *arg)
+{
+	struct intel_context *ce = arg;
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_gem_object *obj;
+	struct rnd_state prng;
+	IGT_TIMEOUT(end);
+	u32 *vaddr;
+	int err = 0;
+
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+
+	do {
+		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		u32 val = prandom_u32_state(&prng);
+		u32 i;
+
+		sz = round_up(sz, PAGE_SIZE);
+
+		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+
+		obj = i915_gem_object_create_internal(i915, sz);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto err_flush;
+		}
+
+		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put;
+		}
+
+		/*
+		 * XXX: The goal is move this to get_pages, so try to dirty the
+		 * CPU cache first to check that we do the required clflush
+		 * before scheduling the blt for !llc platforms. This matches
+		 * some version of reality where at get_pages the pages
+		 * themselves may not yet be coherent with the GPU(swap-in). If
+		 * we are missing the flush then we should see the stale cache
+		 * values after we do the set_to_cpu_domain and pick it up as a
+		 * test failure.
+		 */
+		memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+
+		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+			obj->cache_dirty = true;
+
+		err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages,
+						       &obj->mm.page_sizes,
+						       val);
+		if (err)
+			goto err_unpin;
+
+		/*
+		 * XXX: For now do the wait without the object resv lock to
+		 * ensure we don't deadlock.
+		 */
+		err = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			goto err_unpin;
+
+		i915_gem_object_lock(obj);
+		err = i915_gem_object_set_to_cpu_domain(obj, false);
+		i915_gem_object_unlock(obj);
+		if (err)
+			goto err_unpin;
+
+		for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+			if (vaddr[i] != val) {
+				pr_err("vaddr[%u]=%x, expected=%x\n", i,
+				       vaddr[i], val);
+				err = -EINVAL;
+				goto err_unpin;
+			}
+		}
+
+		i915_gem_object_unpin_map(obj);
+		i915_gem_object_put(obj);
+	} while (!time_after(jiffies, end));
+
+	goto err_flush;
+
+err_unpin:
+	i915_gem_object_unpin_map(obj);
+err_put:
+	i915_gem_object_put(obj);
+err_flush:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
+int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_client_fill),
+	};
+
+	if (i915_terminally_wedged(i915))
+		return 0;
+
+	if (!HAS_ENGINE(i915, BCS0))
+		return 0;
+
+	return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
new file mode 100644
index 000000000000..8f22d3f18422
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -0,0 +1,391 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+
+static int cpu_set(struct drm_i915_gem_object *obj,
+		   unsigned long offset,
+		   u32 v)
+{
+	unsigned int needs_clflush;
+	struct page *page;
+	void *map;
+	u32 *cpu;
+	int err;
+
+	err = i915_gem_object_prepare_write(obj, &needs_clflush);
+	if (err)
+		return err;
+
+	page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+	map = kmap_atomic(page);
+	cpu = map + offset_in_page(offset);
+
+	if (needs_clflush & CLFLUSH_BEFORE)
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+
+	*cpu = v;
+
+	if (needs_clflush & CLFLUSH_AFTER)
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+
+	kunmap_atomic(map);
+	i915_gem_object_finish_access(obj);
+
+	return 0;
+}
+
+static int cpu_get(struct drm_i915_gem_object *obj,
+		   unsigned long offset,
+		   u32 *v)
+{
+	unsigned int needs_clflush;
+	struct page *page;
+	void *map;
+	u32 *cpu;
+	int err;
+
+	err = i915_gem_object_prepare_read(obj, &needs_clflush);
+	if (err)
+		return err;
+
+	page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+	map = kmap_atomic(page);
+	cpu = map + offset_in_page(offset);
+
+	if (needs_clflush & CLFLUSH_BEFORE)
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+
+	*v = *cpu;
+
+	kunmap_atomic(map);
+	i915_gem_object_finish_access(obj);
+
+	return 0;
+}
+
+static int gtt_set(struct drm_i915_gem_object *obj,
+		   unsigned long offset,
+		   u32 v)
+{
+	struct i915_vma *vma;
+	u32 __iomem *map;
+	int err;
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err)
+		return err;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	iowrite32(v, &map[offset / sizeof(*map)]);
+	i915_vma_unpin_iomap(vma);
+
+	return 0;
+}
+
+static int gtt_get(struct drm_i915_gem_object *obj,
+		   unsigned long offset,
+		   u32 *v)
+{
+	struct i915_vma *vma;
+	u32 __iomem *map;
+	int err;
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
+	if (err)
+		return err;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	*v = ioread32(&map[offset / sizeof(*map)]);
+	i915_vma_unpin_iomap(vma);
+
+	return 0;
+}
+
+static int wc_set(struct drm_i915_gem_object *obj,
+		  unsigned long offset,
+		  u32 v)
+{
+	u32 *map;
+	int err;
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_wc_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err)
+		return err;
+
+	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	map[offset / sizeof(*map)] = v;
+	i915_gem_object_unpin_map(obj);
+
+	return 0;
+}
+
+static int wc_get(struct drm_i915_gem_object *obj,
+		  unsigned long offset,
+		  u32 *v)
+{
+	u32 *map;
+	int err;
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_wc_domain(obj, false);
+	i915_gem_object_unlock(obj);
+	if (err)
+		return err;
+
+	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	*v = map[offset / sizeof(*map)];
+	i915_gem_object_unpin_map(obj);
+
+	return 0;
+}
+
+static int gpu_set(struct drm_i915_gem_object *obj,
+		   unsigned long offset,
+		   u32 v)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err)
+		return err;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
+	if (IS_ERR(rq)) {
+		i915_vma_unpin(vma);
+		return PTR_ERR(rq);
+	}
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		i915_vma_unpin(vma);
+		return PTR_ERR(cs);
+	}
+
+	if (INTEL_GEN(i915) >= 8) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
+		*cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
+		*cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
+		*cs++ = v;
+	} else if (INTEL_GEN(i915) >= 4) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = 0;
+		*cs++ = i915_ggtt_offset(vma) + offset;
+		*cs++ = v;
+	} else {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*cs++ = i915_ggtt_offset(vma) + offset;
+		*cs++ = v;
+		*cs++ = MI_NOOP;
+	}
+	intel_ring_advance(rq, cs);
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	i915_vma_unpin(vma);
+
+	i915_request_add(rq);
+
+	return err;
+}
+
+static bool always_valid(struct drm_i915_private *i915)
+{
+	return true;
+}
+
+static bool needs_fence_registers(struct drm_i915_private *i915)
+{
+	return !i915_terminally_wedged(i915);
+}
+
+static bool needs_mi_store_dword(struct drm_i915_private *i915)
+{
+	if (i915_terminally_wedged(i915))
+		return false;
+
+	return intel_engine_can_store_dword(i915->engine[RCS0]);
+}
+
+static const struct igt_coherency_mode {
+	const char *name;
+	int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v);
+	int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v);
+	bool (*valid)(struct drm_i915_private *i915);
+} igt_coherency_mode[] = {
+	{ "cpu", cpu_set, cpu_get, always_valid },
+	{ "gtt", gtt_set, gtt_get, needs_fence_registers },
+	{ "wc", wc_set, wc_get, always_valid },
+	{ "gpu", gpu_set, NULL, needs_mi_store_dword },
+	{ },
+};
+
+static int igt_gem_coherency(void *arg)
+{
+	const unsigned int ncachelines = PAGE_SIZE/64;
+	I915_RND_STATE(prng);
+	struct drm_i915_private *i915 = arg;
+	const struct igt_coherency_mode *read, *write, *over;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	unsigned long count, n;
+	u32 *offsets, *values;
+	int err = 0;
+
+	/* We repeatedly write, overwrite and read from a sequence of
+	 * cachelines in order to try and detect incoherency (unflushed writes
+	 * from either the CPU or GPU). Each setter/getter uses our cache
+	 * domain API which should prevent incoherency.
+	 */
+
+	offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL);
+	if (!offsets)
+		return -ENOMEM;
+	for (count = 0; count < ncachelines; count++)
+		offsets[count] = count * 64 + 4 * (count % 16);
+
+	values = offsets + ncachelines;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	for (over = igt_coherency_mode; over->name; over++) {
+		if (!over->set)
+			continue;
+
+		if (!over->valid(i915))
+			continue;
+
+		for (write = igt_coherency_mode; write->name; write++) {
+			if (!write->set)
+				continue;
+
+			if (!write->valid(i915))
+				continue;
+
+			for (read = igt_coherency_mode; read->name; read++) {
+				if (!read->get)
+					continue;
+
+				if (!read->valid(i915))
+					continue;
+
+				for_each_prime_number_from(count, 1, ncachelines) {
+					obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+					if (IS_ERR(obj)) {
+						err = PTR_ERR(obj);
+						goto unlock;
+					}
+
+					i915_random_reorder(offsets, ncachelines, &prng);
+					for (n = 0; n < count; n++)
+						values[n] = prandom_u32_state(&prng);
+
+					for (n = 0; n < count; n++) {
+						err = over->set(obj, offsets[n], ~values[n]);
+						if (err) {
+							pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
+							       n, count, over->name, err);
+							goto put_object;
+						}
+					}
+
+					for (n = 0; n < count; n++) {
+						err = write->set(obj, offsets[n], values[n]);
+						if (err) {
+							pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
+							       n, count, write->name, err);
+							goto put_object;
+						}
+					}
+
+					for (n = 0; n < count; n++) {
+						u32 found;
+
+						err = read->get(obj, offsets[n], &found);
+						if (err) {
+							pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
+							       n, count, read->name, err);
+							goto put_object;
+						}
+
+						if (found != values[n]) {
+							pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
+							       n, count, over->name,
+							       write->name, values[n],
+							       read->name, found,
+							       ~values[n], offsets[n]);
+							err = -EINVAL;
+							goto put_object;
+						}
+					}
+
+					i915_gem_object_put(obj);
+				}
+			}
+		}
+	}
+unlock:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	kfree(offsets);
+	return err;
+
+put_object:
+	i915_gem_object_put(obj);
+	goto unlock;
+}
+
+int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gem_coherency),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
new file mode 100644
index 000000000000..eaa2b16574c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -0,0 +1,1754 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_reset.h"
+#include "i915_selftest.h"
+
+#include "gem/selftests/igt_gem_utils.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gem_device.h"
+
+#include "huge_gem_object.h"
+#include "igt_gem_utils.h"
+
+#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
+
+static int live_nop_switch(void *arg)
+{
+	const unsigned int nctx = 1024;
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context **ctx;
+	enum intel_engine_id id;
+	intel_wakeref_t wakeref;
+	struct igt_live_test t;
+	struct drm_file *file;
+	unsigned long n;
+	int err = -ENODEV;
+
+	/*
+	 * Create as many contexts as we can feasibly get away with
+	 * and check we can switch between them rapidly.
+	 *
+	 * Serves as very simple stress test for submission and HW switching
+	 * between contexts.
+	 */
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = live_context(i915, file);
+		if (IS_ERR(ctx[n])) {
+			err = PTR_ERR(ctx[n]);
+			goto out_unlock;
+		}
+	}
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+		unsigned long end_time, prime;
+		ktime_t times[2] = {};
+
+		times[0] = ktime_get_raw();
+		for (n = 0; n < nctx; n++) {
+			rq = igt_request_alloc(ctx[n], engine);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				goto out_unlock;
+			}
+			i915_request_add(rq);
+		}
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+			pr_err("Failed to populated %d contexts\n", nctx);
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto out_unlock;
+		}
+
+		times[1] = ktime_get_raw();
+
+		pr_info("Populated %d contexts on %s in %lluns\n",
+			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
+
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
+		if (err)
+			goto out_unlock;
+
+		end_time = jiffies + i915_selftest.timeout_jiffies;
+		for_each_prime_number_from(prime, 2, 8192) {
+			times[1] = ktime_get_raw();
+
+			for (n = 0; n < prime; n++) {
+				rq = igt_request_alloc(ctx[n % nctx], engine);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					goto out_unlock;
+				}
+
+				/*
+				 * This space is left intentionally blank.
+				 *
+				 * We do not actually want to perform any
+				 * action with this request, we just want
+				 * to measure the latency in allocation
+				 * and submission of our breadcrumbs -
+				 * ensuring that the bare request is sufficient
+				 * for the system to work (i.e. proper HEAD
+				 * tracking of the rings, interrupt handling,
+				 * etc). It also gives us the lowest bounds
+				 * for latency.
+				 */
+
+				i915_request_add(rq);
+			}
+			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+				pr_err("Switching between %ld contexts timed out\n",
+				       prime);
+				i915_gem_set_wedged(i915);
+				break;
+			}
+
+			times[1] = ktime_sub(ktime_get_raw(), times[1]);
+			if (prime == 2)
+				times[0] = times[1];
+
+			if (__igt_timeout(end_time, NULL))
+				break;
+		}
+
+		err = igt_live_test_end(&t);
+		if (err)
+			goto out_unlock;
+
+		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
+			engine->name,
+			ktime_to_ns(times[0]),
+			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
+	}
+
+out_unlock:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	mock_file_free(i915, file);
+	return err;
+}
+
+static struct i915_vma *
+gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
+{
+	struct drm_i915_gem_object *obj;
+	const int gen = INTEL_GEN(vma->vm->i915);
+	unsigned long n, size;
+	u32 *cmd;
+	int err;
+
+	size = (4 * count + 1) * sizeof(u32);
+	size = round_up(size, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vma->vm->i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size);
+	offset += vma->node.start;
+
+	for (n = 0; n < count; n++) {
+		if (gen >= 8) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
+			*cmd++ = lower_32_bits(offset);
+			*cmd++ = upper_32_bits(offset);
+			*cmd++ = value;
+		} else if (gen >= 4) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
+				(gen < 6 ? MI_USE_GGTT : 0);
+			*cmd++ = 0;
+			*cmd++ = offset;
+			*cmd++ = value;
+		} else {
+			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+			*cmd++ = offset;
+			*cmd++ = value;
+		}
+		offset += PAGE_SIZE;
+	}
+	*cmd = MI_BATCH_BUFFER_END;
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	vma = i915_vma_instance(obj, vma->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err;
+
+	return vma;
+
+err:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static unsigned long real_page_count(struct drm_i915_gem_object *obj)
+{
+	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
+}
+
+static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
+{
+	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
+}
+
+static int gpu_fill(struct drm_i915_gem_object *obj,
+		    struct i915_gem_context *ctx,
+		    struct intel_engine_cs *engine,
+		    unsigned int dw)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	struct i915_vma *batch;
+	unsigned int flags;
+	int err;
+
+	GEM_BUG_ON(obj->base.size > vm->total);
+	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
+	if (err)
+		return err;
+
+	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
+	if (err)
+		return err;
+
+	/* Within the GTT the huge objects maps every page onto
+	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
+	 * We set the nth dword within the page using the nth
+	 * mapping via the GTT - this should exercise the GTT mapping
+	 * whilst checking that each context provides a unique view
+	 * into the object.
+	 */
+	batch = gpu_fill_dw(vma,
+			    (dw * real_page_count(obj)) << PAGE_SHIFT |
+			    (dw * sizeof(u32)),
+			    real_page_count(obj),
+			    dw);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto err_vma;
+	}
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	flags = 0;
+	if (INTEL_GEN(vm->i915) <= 5)
+		flags |= I915_DISPATCH_SECURE;
+
+	err = engine->emit_bb_start(rq,
+				    batch->node.start, batch->node.size,
+				    flags);
+	if (err)
+		goto err_request;
+
+	i915_vma_lock(batch);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
+	if (err)
+		goto skip_request;
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto skip_request;
+
+	i915_request_add(rq);
+
+	i915_vma_unpin(batch);
+	i915_vma_close(batch);
+	i915_vma_put(batch);
+
+	i915_vma_unpin(vma);
+
+	return 0;
+
+skip_request:
+	i915_request_skip(rq, err);
+err_request:
+	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin(batch);
+	i915_vma_put(batch);
+err_vma:
+	i915_vma_unpin(vma);
+	return err;
+}
+
+static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
+{
+	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
+	unsigned int n, m, need_flush;
+	int err;
+
+	err = i915_gem_object_prepare_write(obj, &need_flush);
+	if (err)
+		return err;
+
+	for (n = 0; n < real_page_count(obj); n++) {
+		u32 *map;
+
+		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		for (m = 0; m < DW_PER_PAGE; m++)
+			map[m] = value;
+		if (!has_llc)
+			drm_clflush_virt_range(map, PAGE_SIZE);
+		kunmap_atomic(map);
+	}
+
+	i915_gem_object_finish_access(obj);
+	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
+	obj->write_domain = 0;
+	return 0;
+}
+
+static noinline int cpu_check(struct drm_i915_gem_object *obj,
+			      unsigned int idx, unsigned int max)
+{
+	unsigned int n, m, needs_flush;
+	int err;
+
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
+	if (err)
+		return err;
+
+	for (n = 0; n < real_page_count(obj); n++) {
+		u32 *map;
+
+		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		if (needs_flush & CLFLUSH_BEFORE)
+			drm_clflush_virt_range(map, PAGE_SIZE);
+
+		for (m = 0; m < max; m++) {
+			if (map[m] != m) {
+				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
+				       __builtin_return_address(0), idx,
+				       n, real_page_count(obj), m, max,
+				       map[m], m);
+				err = -EINVAL;
+				goto out_unmap;
+			}
+		}
+
+		for (; m < DW_PER_PAGE; m++) {
+			if (map[m] != STACK_MAGIC) {
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
+				       __builtin_return_address(0), idx, n, m,
+				       map[m], STACK_MAGIC);
+				err = -EINVAL;
+				goto out_unmap;
+			}
+		}
+
+out_unmap:
+		kunmap_atomic(map);
+		if (err)
+			break;
+	}
+
+	i915_gem_object_finish_access(obj);
+	return err;
+}
+
+static int file_add_object(struct drm_file *file,
+			    struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	GEM_BUG_ON(obj->base.handle_count);
+
+	/* tie the object to the drm_file for easy reaping */
+	err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL);
+	if (err < 0)
+		return  err;
+
+	i915_gem_object_get(obj);
+	obj->base.handle_count++;
+	return 0;
+}
+
+static struct drm_i915_gem_object *
+create_test_object(struct i915_gem_context *ctx,
+		   struct drm_file *file,
+		   struct list_head *objects)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm;
+	u64 size;
+	int err;
+
+	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
+	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
+
+	obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size);
+	if (IS_ERR(obj))
+		return obj;
+
+	err = file_add_object(file, obj);
+	i915_gem_object_put(obj);
+	if (err)
+		return ERR_PTR(err);
+
+	err = cpu_fill(obj, STACK_MAGIC);
+	if (err) {
+		pr_err("Failed to fill object with cpu, err=%d\n",
+		       err);
+		return ERR_PTR(err);
+	}
+
+	list_add_tail(&obj->st_link, objects);
+	return obj;
+}
+
+static unsigned long max_dwords(struct drm_i915_gem_object *obj)
+{
+	unsigned long npages = fake_page_count(obj);
+
+	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
+	return npages / DW_PER_PAGE;
+}
+
+static int igt_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENODEV;
+
+	/*
+	 * Create a few different contexts (with different mm) and write
+	 * through each ctx/mm using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	for_each_engine(engine, i915, id) {
+		struct drm_i915_gem_object *obj = NULL;
+		unsigned long ncontexts, ndwords, dw;
+		struct igt_live_test t;
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (!engine->context_size)
+			continue; /* No logical context support in HW */
+
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
+		if (err)
+			goto out_unlock;
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+			intel_wakeref_t wakeref;
+
+			ctx = live_context(i915, file);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
+
+			if (!obj) {
+				obj = create_test_object(ctx, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+			}
+
+			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->vm), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+		}
+
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
+		}
+
+out_unlock:
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int igt_shared_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *parent;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_live_test t;
+	struct drm_file *file;
+	int err = 0;
+
+	/*
+	 * Create a few different contexts with the same mm and write
+	 * through each ctx using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	parent = live_context(i915, file);
+	if (IS_ERR(parent)) {
+		err = PTR_ERR(parent);
+		goto out_unlock;
+	}
+
+	if (!parent->vm) { /* not full-ppgtt; nothing to share */
+		err = 0;
+		goto out_unlock;
+	}
+
+	err = igt_live_test_begin(&t, i915, __func__, "");
+	if (err)
+		goto out_unlock;
+
+	for_each_engine(engine, i915, id) {
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_i915_gem_object *obj = NULL;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		dw = 0;
+		ndwords = 0;
+		ncontexts = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+			intel_wakeref_t wakeref;
+
+			ctx = kernel_context(i915);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_test;
+			}
+
+			__assign_ppgtt(ctx, parent->vm);
+
+			if (!obj) {
+				obj = create_test_object(parent, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					kernel_context_close(ctx);
+					goto out_test;
+				}
+			}
+
+			err = 0;
+			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->vm), err);
+				kernel_context_close(ctx);
+				goto out_test;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+
+			kernel_context_close(ctx);
+		}
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				goto out_test;
+
+			dw += rem;
+		}
+	}
+out_test:
+	if (igt_live_test_end(&t))
+		err = -EIO;
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	mock_file_free(i915, file);
+	return err;
+}
+
+static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj;
+	u32 *cmd;
+	int err;
+
+	if (INTEL_GEN(vma->vm->i915) < 8)
+		return ERR_PTR(-EINVAL);
+
+	obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
+	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	*cmd++ = lower_32_bits(vma->node.start);
+	*cmd++ = upper_32_bits(vma->node.start);
+	*cmd = MI_BATCH_BUFFER_END;
+
+	__i915_gem_object_flush_map(obj, 0, 64);
+	i915_gem_object_unpin_map(obj);
+
+	vma = i915_vma_instance(obj, vma->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err;
+
+	return vma;
+
+err:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static int
+emit_rpcs_query(struct drm_i915_gem_object *obj,
+		struct intel_context *ce,
+		struct i915_request **rq_out)
+{
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	struct i915_vma *vma;
+	int err;
+
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
+
+	vma = i915_vma_instance(obj, ce->gem_context->vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
+	if (err)
+		return err;
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		return err;
+
+	batch = rpcs_query_batch(vma);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto err_vma;
+	}
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	err = rq->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					0);
+	if (err)
+		goto err_request;
+
+	i915_vma_lock(batch);
+	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
+	if (err)
+		goto skip_request;
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto skip_request;
+
+	i915_vma_unpin(batch);
+	i915_vma_close(batch);
+	i915_vma_put(batch);
+
+	i915_vma_unpin(vma);
+
+	*rq_out = i915_request_get(rq);
+
+	i915_request_add(rq);
+
+	return 0;
+
+skip_request:
+	i915_request_skip(rq, err);
+err_request:
+	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin(batch);
+	i915_vma_put(batch);
+err_vma:
+	i915_vma_unpin(vma);
+
+	return err;
+}
+
+#define TEST_IDLE	BIT(0)
+#define TEST_BUSY	BIT(1)
+#define TEST_RESET	BIT(2)
+
+static int
+__sseu_prepare(struct drm_i915_private *i915,
+	       const char *name,
+	       unsigned int flags,
+	       struct intel_context *ce,
+	       struct igt_spinner **spin)
+{
+	struct i915_request *rq;
+	int ret;
+
+	*spin = NULL;
+	if (!(flags & (TEST_BUSY | TEST_RESET)))
+		return 0;
+
+	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
+	if (!*spin)
+		return -ENOMEM;
+
+	ret = igt_spinner_init(*spin, i915);
+	if (ret)
+		goto err_free;
+
+	rq = igt_spinner_create_request(*spin,
+					ce->gem_context,
+					ce->engine,
+					MI_NOOP);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto err_fini;
+	}
+
+	i915_request_add(rq);
+
+	if (!igt_wait_for_spinner(*spin, rq)) {
+		pr_err("%s: Spinner failed to start!\n", name);
+		ret = -ETIMEDOUT;
+		goto err_end;
+	}
+
+	return 0;
+
+err_end:
+	igt_spinner_end(*spin);
+err_fini:
+	igt_spinner_fini(*spin);
+err_free:
+	kfree(fetch_and_zero(spin));
+	return ret;
+}
+
+static int
+__read_slice_count(struct drm_i915_private *i915,
+		   struct intel_context *ce,
+		   struct drm_i915_gem_object *obj,
+		   struct igt_spinner *spin,
+		   u32 *rpcs)
+{
+	struct i915_request *rq = NULL;
+	u32 s_mask, s_shift;
+	unsigned int cnt;
+	u32 *buf, val;
+	long ret;
+
+	ret = emit_rpcs_query(obj, ce, &rq);
+	if (ret)
+		return ret;
+
+	if (spin)
+		igt_spinner_end(spin);
+
+	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+	i915_request_put(rq);
+	if (ret < 0)
+		return ret;
+
+	buf = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		return ret;
+	}
+
+	if (INTEL_GEN(i915) >= 11) {
+		s_mask = GEN11_RPCS_S_CNT_MASK;
+		s_shift = GEN11_RPCS_S_CNT_SHIFT;
+	} else {
+		s_mask = GEN8_RPCS_S_CNT_MASK;
+		s_shift = GEN8_RPCS_S_CNT_SHIFT;
+	}
+
+	val = *buf;
+	cnt = (val & s_mask) >> s_shift;
+	*rpcs = val;
+
+	i915_gem_object_unpin_map(obj);
+
+	return cnt;
+}
+
+static int
+__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
+	     const char *prefix, const char *suffix)
+{
+	if (slices == expected)
+		return 0;
+
+	if (slices < 0) {
+		pr_err("%s: %s read slice count failed with %d%s\n",
+		       name, prefix, slices, suffix);
+		return slices;
+	}
+
+	pr_err("%s: %s slice count %d is not %u%s\n",
+	       name, prefix, slices, expected, suffix);
+
+	pr_info("RPCS=0x%x; %u%sx%u%s\n",
+		rpcs, slices,
+		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
+		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
+		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
+
+	return -EINVAL;
+}
+
+static int
+__sseu_finish(struct drm_i915_private *i915,
+	      const char *name,
+	      unsigned int flags,
+	      struct intel_context *ce,
+	      struct drm_i915_gem_object *obj,
+	      unsigned int expected,
+	      struct igt_spinner *spin)
+{
+	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
+	u32 rpcs = 0;
+	int ret = 0;
+
+	if (flags & TEST_RESET) {
+		ret = i915_reset_engine(ce->engine, "sseu");
+		if (ret)
+			goto out;
+	}
+
+	ret = __read_slice_count(i915, ce, obj,
+				 flags & TEST_RESET ? NULL : spin, &rpcs);
+	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
+	if (ret)
+		goto out;
+
+	ret = __read_slice_count(i915, ce->engine->kernel_context, obj,
+				 NULL, &rpcs);
+	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
+
+out:
+	if (spin)
+		igt_spinner_end(spin);
+
+	if ((flags & TEST_IDLE) && ret == 0) {
+		ret = i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT);
+		if (ret)
+			return ret;
+
+		ret = __read_slice_count(i915, ce, obj, NULL, &rpcs);
+		ret = __check_rpcs(name, rpcs, ret, expected,
+				   "Context", " after idle!");
+	}
+
+	return ret;
+}
+
+static int
+__sseu_test(struct drm_i915_private *i915,
+	    const char *name,
+	    unsigned int flags,
+	    struct intel_context *ce,
+	    struct drm_i915_gem_object *obj,
+	    struct intel_sseu sseu)
+{
+	struct igt_spinner *spin = NULL;
+	int ret;
+
+	ret = __sseu_prepare(i915, name, flags, ce, &spin);
+	if (ret)
+		return ret;
+
+	ret = __intel_context_reconfigure_sseu(ce, sseu);
+	if (ret)
+		goto out_spin;
+
+	ret = __sseu_finish(i915, name, flags, ce, obj,
+			    hweight32(sseu.slice_mask), spin);
+
+out_spin:
+	if (spin) {
+		igt_spinner_end(spin);
+		igt_spinner_fini(spin);
+		kfree(spin);
+	}
+	return ret;
+}
+
+static int
+__igt_ctx_sseu(struct drm_i915_private *i915,
+	       const char *name,
+	       unsigned int flags)
+{
+	struct intel_engine_cs *engine = i915->engine[RCS0];
+	struct intel_sseu default_sseu = engine->sseu;
+	struct drm_i915_gem_object *obj;
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	struct intel_sseu pg_sseu;
+	intel_wakeref_t wakeref;
+	struct drm_file *file;
+	int ret;
+
+	if (INTEL_GEN(i915) < 9)
+		return 0;
+
+	if (!RUNTIME_INFO(i915)->sseu.has_slice_pg)
+		return 0;
+
+	if (hweight32(default_sseu.slice_mask) < 2)
+		return 0;
+
+	/*
+	 * Gen11 VME friendly power-gated configuration with half enabled
+	 * sub-slices.
+	 */
+	pg_sseu = default_sseu;
+	pg_sseu.slice_mask = 1;
+	pg_sseu.subslice_mask =
+		~(~0 << (hweight32(default_sseu.subslice_mask) / 2));
+
+	pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
+		name, flags, hweight32(default_sseu.slice_mask),
+		hweight32(pg_sseu.slice_mask));
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	if (flags & TEST_RESET)
+		igt_global_reset_lock(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	ctx = live_context(i915, file);
+	if (IS_ERR(ctx)) {
+		ret = PTR_ERR(ctx);
+		goto out_unlock;
+	}
+	i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		ret = PTR_ERR(obj);
+		goto out_unlock;
+	}
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	ce = i915_gem_context_get_engine(ctx, RCS0);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		goto out_rpm;
+	}
+
+	ret = intel_context_pin(ce);
+	if (ret)
+		goto out_context;
+
+	/* First set the default mask. */
+	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
+	if (ret)
+		goto out_fail;
+
+	/* Then set a power-gated configuration. */
+	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
+	if (ret)
+		goto out_fail;
+
+	/* Back to defaults. */
+	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
+	if (ret)
+		goto out_fail;
+
+	/* One last power-gated configuration for the road. */
+	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
+	if (ret)
+		goto out_fail;
+
+out_fail:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		ret = -EIO;
+
+	intel_context_unpin(ce);
+out_context:
+	intel_context_put(ce);
+out_rpm:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	i915_gem_object_put(obj);
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (flags & TEST_RESET)
+		igt_global_reset_unlock(i915);
+
+	mock_file_free(i915, file);
+
+	if (ret)
+		pr_err("%s: Failed with %d!\n", name, ret);
+
+	return ret;
+}
+
+static int igt_ctx_sseu(void *arg)
+{
+	struct {
+		const char *name;
+		unsigned int flags;
+	} *phase, phases[] = {
+		{ .name = "basic", .flags = 0 },
+		{ .name = "idle", .flags = TEST_IDLE },
+		{ .name = "busy", .flags = TEST_BUSY },
+		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
+		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
+		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
+	};
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
+	     i++, phase++)
+		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
+
+	return ret;
+}
+
+static int igt_ctx_readonly(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj = NULL;
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	unsigned long idx, ndwords, dw;
+	struct igt_live_test t;
+	struct drm_file *file;
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	LIST_HEAD(objects);
+	int err = -ENODEV;
+
+	/*
+	 * Create a few read-only objects (with the occasional writable object)
+	 * and try to write into these object checking that the GPU discards
+	 * any write to a read-only object.
+	 */
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	err = igt_live_test_begin(&t, i915, __func__, "");
+	if (err)
+		goto out_unlock;
+
+	ctx = live_context(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_unlock;
+	}
+
+	vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm;
+	if (!vm || !vm->has_read_only) {
+		err = 0;
+		goto out_unlock;
+	}
+
+	ndwords = 0;
+	dw = 0;
+	while (!time_after(jiffies, end_time)) {
+		struct intel_engine_cs *engine;
+		unsigned int id;
+
+		for_each_engine(engine, i915, id) {
+			intel_wakeref_t wakeref;
+
+			if (!intel_engine_can_store_dword(engine))
+				continue;
+
+			if (!obj) {
+				obj = create_test_object(ctx, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+
+				if (prandom_u32_state(&prng) & 1)
+					i915_gem_object_set_readonly(obj);
+			}
+
+			err = 0;
+			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->vm), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+			ndwords++;
+		}
+	}
+	pr_info("Submitted %lu dwords (across %u engines)\n",
+		ndwords, RUNTIME_INFO(i915)->num_engines);
+
+	dw = 0;
+	idx = 0;
+	list_for_each_entry(obj, &objects, st_link) {
+		unsigned int rem =
+			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+		unsigned int num_writes;
+
+		num_writes = rem;
+		if (i915_gem_object_is_readonly(obj))
+			num_writes = 0;
+
+		err = cpu_check(obj, idx++, num_writes);
+		if (err)
+			break;
+
+		dw += rem;
+	}
+
+out_unlock:
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	mock_file_free(i915, file);
+	return err;
+}
+
+static int check_scratch(struct i915_gem_context *ctx, u64 offset)
+{
+	struct drm_mm_node *node =
+		__drm_mm_interval_first(&ctx->vm->mm,
+					offset, offset + sizeof(u32) - 1);
+	if (!node || node->start > offset)
+		return 0;
+
+	GEM_BUG_ON(offset >= node->start + node->size);
+
+	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
+	       upper_32_bits(offset), lower_32_bits(offset));
+	return -EINVAL;
+}
+
+static int write_to_scratch(struct i915_gem_context *ctx,
+			    struct intel_engine_cs *engine,
+			    u64 offset, u32 value)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cmd;
+	int err;
+
+	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
+	if (INTEL_GEN(i915) >= 8) {
+		*cmd++ = lower_32_bits(offset);
+		*cmd++ = upper_32_bits(offset);
+	} else {
+		*cmd++ = 0;
+		*cmd++ = offset;
+	}
+	*cmd++ = value;
+	*cmd = MI_BATCH_BUFFER_END;
+	__i915_gem_object_flush_map(obj, 0, 64);
+	i915_gem_object_unpin_map(obj);
+
+	vma = i915_vma_instance(obj, ctx->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
+	if (err)
+		goto err;
+
+	err = check_scratch(ctx, offset);
+	if (err)
+		goto err_unpin;
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
+	if (err)
+		goto err_request;
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
+	if (err)
+		goto skip_request;
+
+	i915_vma_unpin(vma);
+	i915_vma_close(vma);
+	i915_vma_put(vma);
+
+	i915_request_add(rq);
+
+	return 0;
+
+skip_request:
+	i915_request_skip(rq, err);
+err_request:
+	i915_request_add(rq);
+err_unpin:
+	i915_vma_unpin(vma);
+err:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int read_from_scratch(struct i915_gem_context *ctx,
+			     struct intel_engine_cs *engine,
+			     u64 offset, u32 *value)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_object *obj;
+	const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
+	const u32 result = 0x100;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cmd;
+	int err;
+
+	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	memset(cmd, POISON_INUSE, PAGE_SIZE);
+	if (INTEL_GEN(i915) >= 8) {
+		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
+		*cmd++ = RCS_GPR0;
+		*cmd++ = lower_32_bits(offset);
+		*cmd++ = upper_32_bits(offset);
+		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
+		*cmd++ = RCS_GPR0;
+		*cmd++ = result;
+		*cmd++ = 0;
+	} else {
+		*cmd++ = MI_LOAD_REGISTER_MEM;
+		*cmd++ = RCS_GPR0;
+		*cmd++ = offset;
+		*cmd++ = MI_STORE_REGISTER_MEM;
+		*cmd++ = RCS_GPR0;
+		*cmd++ = result;
+	}
+	*cmd = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	vma = i915_vma_instance(obj, ctx->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
+	if (err)
+		goto err;
+
+	err = check_scratch(ctx, offset);
+	if (err)
+		goto err_unpin;
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
+	if (err)
+		goto err_request;
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto skip_request;
+
+	i915_vma_unpin(vma);
+	i915_vma_close(vma);
+
+	i915_request_add(rq);
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
+	if (err)
+		goto err;
+
+	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	*value = cmd[result / sizeof(*cmd)];
+	i915_gem_object_unpin_map(obj);
+	i915_gem_object_put(obj);
+
+	return 0;
+
+skip_request:
+	i915_request_skip(rq, err);
+err_request:
+	i915_request_add(rq);
+err_unpin:
+	i915_vma_unpin(vma);
+err:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int igt_vm_isolation(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx_a, *ctx_b;
+	struct intel_engine_cs *engine;
+	intel_wakeref_t wakeref;
+	struct igt_live_test t;
+	struct drm_file *file;
+	I915_RND_STATE(prng);
+	unsigned long count;
+	unsigned int id;
+	u64 vm_total;
+	int err;
+
+	if (INTEL_GEN(i915) < 7)
+		return 0;
+
+	/*
+	 * The simple goal here is that a write into one context is not
+	 * observed in a second (separate page tables and scratch).
+	 */
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	err = igt_live_test_begin(&t, i915, __func__, "");
+	if (err)
+		goto out_unlock;
+
+	ctx_a = live_context(i915, file);
+	if (IS_ERR(ctx_a)) {
+		err = PTR_ERR(ctx_a);
+		goto out_unlock;
+	}
+
+	ctx_b = live_context(i915, file);
+	if (IS_ERR(ctx_b)) {
+		err = PTR_ERR(ctx_b);
+		goto out_unlock;
+	}
+
+	/* We can only test vm isolation, if the vm are distinct */
+	if (ctx_a->vm == ctx_b->vm)
+		goto out_unlock;
+
+	vm_total = ctx_a->vm->total;
+	GEM_BUG_ON(ctx_b->vm->total != vm_total);
+	vm_total -= I915_GTT_PAGE_SIZE;
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	count = 0;
+	for_each_engine(engine, i915, id) {
+		IGT_TIMEOUT(end_time);
+		unsigned long this = 0;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		while (!__igt_timeout(end_time, NULL)) {
+			u32 value = 0xc5c5c5c5;
+			u64 offset;
+
+			div64_u64_rem(i915_prandom_u64_state(&prng),
+				      vm_total, &offset);
+			offset &= -sizeof(u32);
+			offset += I915_GTT_PAGE_SIZE;
+
+			err = write_to_scratch(ctx_a, engine,
+					       offset, 0xdeadbeef);
+			if (err == 0)
+				err = read_from_scratch(ctx_b, engine,
+							offset, &value);
+			if (err)
+				goto out_rpm;
+
+			if (value) {
+				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
+				       engine->name, value,
+				       upper_32_bits(offset),
+				       lower_32_bits(offset),
+				       this);
+				err = -EINVAL;
+				goto out_rpm;
+			}
+
+			this++;
+		}
+		count += this;
+	}
+	pr_info("Checked %lu scratch offsets across %d engines\n",
+		count, RUNTIME_INFO(i915)->num_engines);
+
+out_rpm:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+out_unlock:
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	mock_file_free(i915, file);
+	return err;
+}
+
+static __maybe_unused const char *
+__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines)
+{
+	struct intel_engine_cs *engine;
+	intel_engine_mask_t tmp;
+
+	if (engines == ALL_ENGINES)
+		return "all";
+
+	for_each_engine_masked(engine, i915, engines, tmp)
+		return engine->name;
+
+	return "none";
+}
+
+static bool skip_unused_engines(struct intel_context *ce, void *data)
+{
+	return !ce->state;
+}
+
+static void mock_barrier_task(void *data)
+{
+	unsigned int *counter = data;
+
+	++*counter;
+}
+
+static int mock_context_barrier(void *arg)
+{
+#undef pr_fmt
+#define pr_fmt(x) "context_barrier_task():" # x
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx;
+	struct i915_request *rq;
+	unsigned int counter;
+	int err;
+
+	/*
+	 * The context barrier provides us with a callback after it emits
+	 * a request; useful for retiring old state after loading new.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	ctx = mock_context(i915, "mock");
+	if (!ctx) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	counter = 0;
+	err = context_barrier_task(ctx, 0,
+				   NULL, NULL, mock_barrier_task, &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	if (counter == 0) {
+		pr_err("Did not retire immediately with 0 engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	counter = 0;
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   skip_unused_engines,
+				   NULL,
+				   mock_barrier_task,
+				   &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	if (counter == 0) {
+		pr_err("Did not retire immediately for all unused engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	rq = igt_request_alloc(ctx, i915->engine[RCS0]);
+	if (IS_ERR(rq)) {
+		pr_err("Request allocation failed!\n");
+		goto out;
+	}
+	i915_request_add(rq);
+
+	counter = 0;
+	context_barrier_inject_fault = BIT(RCS0);
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   NULL, NULL, mock_barrier_task, &counter);
+	context_barrier_inject_fault = 0;
+	if (err == -ENXIO)
+		err = 0;
+	else
+		pr_err("Did not hit fault injection!\n");
+	if (counter != 0) {
+		pr_err("Invoked callback on error!\n");
+		err = -EIO;
+	}
+	if (err)
+		goto out;
+
+	counter = 0;
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   skip_unused_engines,
+				   NULL,
+				   mock_barrier_task,
+				   &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	mock_device_flush(i915);
+	if (counter == 0) {
+		pr_err("Did not retire on each active engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	mock_context_close(ctx);
+unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+#undef pr_fmt
+#define pr_fmt(x) x
+}
+
+int i915_gem_context_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_context_barrier),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
+
+int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_nop_switch),
+		SUBTEST(igt_ctx_exec),
+		SUBTEST(igt_ctx_readonly),
+		SUBTEST(igt_ctx_sseu),
+		SUBTEST(igt_shared_ctx_exec),
+		SUBTEST(igt_vm_isolation),
+	};
+
+	if (i915_terminally_wedged(dev_priv))
+		return 0;
+
+	return i915_subtests(tests, dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
new file mode 100644
index 000000000000..e3a64edef918
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -0,0 +1,387 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_selftest.h"
+
+#include "mock_dmabuf.h"
+#include "selftests/mock_gem_device.h"
+
+static int igt_dmabuf_export(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	i915_gem_object_put(obj);
+	if (IS_ERR(dmabuf)) {
+		pr_err("i915_gem_prime_export failed with err=%d\n",
+		       (int)PTR_ERR(dmabuf));
+		return PTR_ERR(dmabuf);
+	}
+
+	dma_buf_put(dmabuf);
+	return 0;
+}
+
+static int igt_dmabuf_import_self(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct drm_gem_object *import;
+	struct dma_buf *dmabuf;
+	int err;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	if (IS_ERR(dmabuf)) {
+		pr_err("i915_gem_prime_export failed with err=%d\n",
+		       (int)PTR_ERR(dmabuf));
+		err = PTR_ERR(dmabuf);
+		goto out;
+	}
+
+	import = i915_gem_prime_import(&i915->drm, dmabuf);
+	if (IS_ERR(import)) {
+		pr_err("i915_gem_prime_import failed with err=%d\n",
+		       (int)PTR_ERR(import));
+		err = PTR_ERR(import);
+		goto out_dmabuf;
+	}
+
+	if (import != &obj->base) {
+		pr_err("i915_gem_prime_import created a new object!\n");
+		err = -EINVAL;
+		goto out_import;
+	}
+
+	err = 0;
+out_import:
+	i915_gem_object_put(to_intel_bo(import));
+out_dmabuf:
+	dma_buf_put(dmabuf);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int igt_dmabuf_import(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	void *obj_map, *dma_map;
+	u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff };
+	int err, i;
+
+	dmabuf = mock_dmabuf(1);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf));
+	if (IS_ERR(obj)) {
+		pr_err("i915_gem_prime_import failed with err=%d\n",
+		       (int)PTR_ERR(obj));
+		err = PTR_ERR(obj);
+		goto out_dmabuf;
+	}
+
+	if (obj->base.dev != &i915->drm) {
+		pr_err("i915_gem_prime_import created a non-i915 object!\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	if (obj->base.size != PAGE_SIZE) {
+		pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n",
+		       (long long)obj->base.size, PAGE_SIZE);
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	dma_map = dma_buf_vmap(dmabuf);
+	if (!dma_map) {
+		pr_err("dma_buf_vmap failed\n");
+		err = -ENOMEM;
+		goto out_obj;
+	}
+
+	if (0) { /* Can not yet map dmabuf */
+		obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(obj_map)) {
+			err = PTR_ERR(obj_map);
+			pr_err("i915_gem_object_pin_map failed with err=%d\n", err);
+			goto out_dma_map;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(pattern); i++) {
+			memset(dma_map, pattern[i], PAGE_SIZE);
+			if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) {
+				err = -EINVAL;
+				pr_err("imported vmap not all set to %x!\n", pattern[i]);
+				i915_gem_object_unpin_map(obj);
+				goto out_dma_map;
+			}
+		}
+
+		for (i = 0; i < ARRAY_SIZE(pattern); i++) {
+			memset(obj_map, pattern[i], PAGE_SIZE);
+			if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) {
+				err = -EINVAL;
+				pr_err("exported vmap not all set to %x!\n", pattern[i]);
+				i915_gem_object_unpin_map(obj);
+				goto out_dma_map;
+			}
+		}
+
+		i915_gem_object_unpin_map(obj);
+	}
+
+	err = 0;
+out_dma_map:
+	dma_buf_vunmap(dmabuf, dma_map);
+out_obj:
+	i915_gem_object_put(obj);
+out_dmabuf:
+	dma_buf_put(dmabuf);
+	return err;
+}
+
+static int igt_dmabuf_import_ownership(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	void *ptr;
+	int err;
+
+	dmabuf = mock_dmabuf(1);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	ptr = dma_buf_vmap(dmabuf);
+	if (!ptr) {
+		pr_err("dma_buf_vmap failed\n");
+		err = -ENOMEM;
+		goto err_dmabuf;
+	}
+
+	memset(ptr, 0xc5, PAGE_SIZE);
+	dma_buf_vunmap(dmabuf, ptr);
+
+	obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf));
+	if (IS_ERR(obj)) {
+		pr_err("i915_gem_prime_import failed with err=%d\n",
+		       (int)PTR_ERR(obj));
+		err = PTR_ERR(obj);
+		goto err_dmabuf;
+	}
+
+	dma_buf_put(dmabuf);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		pr_err("i915_gem_object_pin_pages failed with err=%d\n", err);
+		goto out_obj;
+	}
+
+	err = 0;
+	i915_gem_object_unpin_pages(obj);
+out_obj:
+	i915_gem_object_put(obj);
+	return err;
+
+err_dmabuf:
+	dma_buf_put(dmabuf);
+	return err;
+}
+
+static int igt_dmabuf_export_vmap(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	void *ptr;
+	int err;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	if (IS_ERR(dmabuf)) {
+		pr_err("i915_gem_prime_export failed with err=%d\n",
+		       (int)PTR_ERR(dmabuf));
+		err = PTR_ERR(dmabuf);
+		goto err_obj;
+	}
+	i915_gem_object_put(obj);
+
+	ptr = dma_buf_vmap(dmabuf);
+	if (!ptr) {
+		pr_err("dma_buf_vmap failed\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (memchr_inv(ptr, 0, dmabuf->size)) {
+		pr_err("Exported object not initialiased to zero!\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	memset(ptr, 0xc5, dmabuf->size);
+
+	err = 0;
+	dma_buf_vunmap(dmabuf, ptr);
+out:
+	dma_buf_put(dmabuf);
+	return err;
+
+err_obj:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int igt_dmabuf_export_kmap(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	void *ptr;
+	int err;
+
+	obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0);
+	i915_gem_object_put(obj);
+	if (IS_ERR(dmabuf)) {
+		err = PTR_ERR(dmabuf);
+		pr_err("i915_gem_prime_export failed with err=%d\n", err);
+		return err;
+	}
+
+	ptr = dma_buf_kmap(dmabuf, 0);
+	if (!ptr) {
+		pr_err("dma_buf_kmap failed\n");
+		err = -ENOMEM;
+		goto err;
+	}
+
+	if (memchr_inv(ptr, 0, PAGE_SIZE)) {
+		dma_buf_kunmap(dmabuf, 0, ptr);
+		pr_err("Exported page[0] not initialiased to zero!\n");
+		err = -EINVAL;
+		goto err;
+	}
+
+	memset(ptr, 0xc5, PAGE_SIZE);
+	dma_buf_kunmap(dmabuf, 0, ptr);
+
+	ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(ptr)) {
+		err = PTR_ERR(ptr);
+		pr_err("i915_gem_object_pin_map failed with err=%d\n", err);
+		goto err;
+	}
+	memset(ptr + PAGE_SIZE, 0xaa, PAGE_SIZE);
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	ptr = dma_buf_kmap(dmabuf, 1);
+	if (!ptr) {
+		pr_err("dma_buf_kmap failed\n");
+		err = -ENOMEM;
+		goto err;
+	}
+
+	if (memchr_inv(ptr, 0xaa, PAGE_SIZE)) {
+		dma_buf_kunmap(dmabuf, 1, ptr);
+		pr_err("Exported page[1] not set to 0xaa!\n");
+		err = -EINVAL;
+		goto err;
+	}
+
+	memset(ptr, 0xc5, PAGE_SIZE);
+	dma_buf_kunmap(dmabuf, 1, ptr);
+
+	ptr = dma_buf_kmap(dmabuf, 0);
+	if (!ptr) {
+		pr_err("dma_buf_kmap failed\n");
+		err = -ENOMEM;
+		goto err;
+	}
+	if (memchr_inv(ptr, 0xc5, PAGE_SIZE)) {
+		dma_buf_kunmap(dmabuf, 0, ptr);
+		pr_err("Exported page[0] did not retain 0xc5!\n");
+		err = -EINVAL;
+		goto err;
+	}
+	dma_buf_kunmap(dmabuf, 0, ptr);
+
+	ptr = dma_buf_kmap(dmabuf, 2);
+	if (ptr) {
+		pr_err("Erroneously kmapped beyond the end of the object!\n");
+		dma_buf_kunmap(dmabuf, 2, ptr);
+		err = -EINVAL;
+		goto err;
+	}
+
+	ptr = dma_buf_kmap(dmabuf, -1);
+	if (ptr) {
+		pr_err("Erroneously kmapped before the start of the object!\n");
+		dma_buf_kunmap(dmabuf, -1, ptr);
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = 0;
+err:
+	dma_buf_put(dmabuf);
+	return err;
+}
+
+int i915_gem_dmabuf_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_dmabuf_export),
+		SUBTEST(igt_dmabuf_import_self),
+		SUBTEST(igt_dmabuf_import),
+		SUBTEST(igt_dmabuf_import_ownership),
+		SUBTEST(igt_dmabuf_export_vmap),
+		SUBTEST(igt_dmabuf_export_kmap),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
+
+int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_dmabuf_export),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
new file mode 100644
index 000000000000..5c81f4b4813a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -0,0 +1,506 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "gt/intel_gt_pm.h"
+#include "huge_gem_object.h"
+#include "i915_selftest.h"
+#include "selftests/igt_flush_test.h"
+
+struct tile {
+	unsigned int width;
+	unsigned int height;
+	unsigned int stride;
+	unsigned int size;
+	unsigned int tiling;
+	unsigned int swizzle;
+};
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+	return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct tile *tile, u64 v)
+{
+	u64 x, y;
+
+	if (tile->tiling == I915_TILING_NONE)
+		return v;
+
+	y = div64_u64_rem(v, tile->stride, &x);
+	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
+
+	if (tile->tiling == I915_TILING_X) {
+		v += y * tile->width;
+		v += div64_u64_rem(x, tile->width, &x) << tile->size;
+		v += x;
+	} else if (tile->width == 128) {
+		const unsigned int ytile_span = 16;
+		const unsigned int ytile_height = 512;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	} else {
+		const unsigned int ytile_span = 32;
+		const unsigned int ytile_height = 256;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	}
+
+	switch (tile->swizzle) {
+	case I915_BIT_6_SWIZZLE_9:
+		v ^= swizzle_bit(9, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+		break;
+	}
+
+	return v;
+}
+
+static int check_partial_mapping(struct drm_i915_gem_object *obj,
+				 const struct tile *tile,
+				 unsigned long end_time)
+{
+	const unsigned int nreal = obj->scratch / PAGE_SIZE;
+	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct i915_vma *vma;
+	unsigned long page;
+	int err;
+
+	if (igt_timeout(end_time,
+			"%s: timed out before tiling=%d stride=%d\n",
+			__func__, tile->tiling, tile->stride))
+		return -EINTR;
+
+	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+	if (err) {
+		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+		       tile->tiling, tile->stride, err);
+		return err;
+	}
+
+	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
+		return err;
+	}
+
+	for_each_prime_number_from(page, 1, npages) {
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+		u32 __iomem *io;
+		struct page *p;
+		unsigned int n;
+		u64 offset;
+		u32 *cpu;
+
+		GEM_BUG_ON(view.partial.size > nreal);
+		cond_resched();
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+		if (IS_ERR(vma)) {
+			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(vma));
+			return PTR_ERR(vma);
+		}
+
+		n = page - view.partial.offset;
+		GEM_BUG_ON(n >= view.partial.size);
+
+		io = i915_vma_pin_iomap(vma);
+		i915_vma_unpin(vma);
+		if (IS_ERR(io)) {
+			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(io));
+			return PTR_ERR(io);
+		}
+
+		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+		i915_vma_unpin_iomap(vma);
+
+		offset = tiled_offset(tile, page << PAGE_SHIFT);
+		if (offset >= obj->base.size)
+			continue;
+
+		i915_gem_flush_ggtt_writes(to_i915(obj->base.dev));
+
+		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+		cpu = kmap(p) + offset_in_page(offset);
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		if (*cpu != (u32)page) {
+			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
+			       page, n,
+			       view.partial.offset,
+			       view.partial.size,
+			       vma->size >> PAGE_SHIFT,
+			       tile->tiling ? tile_row_pages(obj) : 0,
+			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+			       offset >> PAGE_SHIFT,
+			       (unsigned int)offset_in_page(offset),
+			       offset,
+			       (u32)page, *cpu);
+			err = -EINVAL;
+		}
+		*cpu = 0;
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		kunmap(p);
+		if (err)
+			return err;
+
+		i915_vma_destroy(vma);
+	}
+
+	return 0;
+}
+
+static int igt_partial_tiling(void *arg)
+{
+	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	int tiling;
+	int err;
+
+	/* We want to check the page mapping and fencing of a large object
+	 * mmapped through the GTT. The object we create is larger than can
+	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
+	 * We then check that a write through each partial GGTT vma ends up
+	 * in the right set of pages within the object, and with the expected
+	 * tiling, which we verify by manual swizzling.
+	 */
+
+	obj = huge_gem_object(i915,
+			      nreal << PAGE_SHIFT,
+			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	if (1) {
+		IGT_TIMEOUT(end);
+		struct tile tile;
+
+		tile.height = 1;
+		tile.width = 1;
+		tile.size = 0;
+		tile.stride = 0;
+		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+		tile.tiling = I915_TILING_NONE;
+
+		err = check_partial_mapping(obj, &tile, end);
+		if (err && err != -EINTR)
+			goto out_unlock;
+	}
+
+	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
+		IGT_TIMEOUT(end);
+		unsigned int max_pitch;
+		unsigned int pitch;
+		struct tile tile;
+
+		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
+			/*
+			 * The swizzling pattern is actually unknown as it
+			 * varies based on physical address of each page.
+			 * See i915_gem_detect_bit_6_swizzle().
+			 */
+			break;
+
+		tile.tiling = tiling;
+		switch (tiling) {
+		case I915_TILING_X:
+			tile.swizzle = i915->mm.bit_6_swizzle_x;
+			break;
+		case I915_TILING_Y:
+			tile.swizzle = i915->mm.bit_6_swizzle_y;
+			break;
+		}
+
+		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
+		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+			continue;
+
+		if (INTEL_GEN(i915) <= 2) {
+			tile.height = 16;
+			tile.width = 128;
+			tile.size = 11;
+		} else if (tile.tiling == I915_TILING_Y &&
+			   HAS_128_BYTE_Y_TILING(i915)) {
+			tile.height = 32;
+			tile.width = 128;
+			tile.size = 12;
+		} else {
+			tile.height = 8;
+			tile.width = 512;
+			tile.size = 12;
+		}
+
+		if (INTEL_GEN(i915) < 4)
+			max_pitch = 8192 / tile.width;
+		else if (INTEL_GEN(i915) < 7)
+			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
+		else
+			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
+
+		for (pitch = max_pitch; pitch; pitch >>= 1) {
+			tile.stride = tile.width * pitch;
+			err = check_partial_mapping(obj, &tile, end);
+			if (err == -EINTR)
+				goto next_tiling;
+			if (err)
+				goto out_unlock;
+
+			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
+				tile.stride = tile.width * (pitch - 1);
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+
+			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
+				tile.stride = tile.width * (pitch + 1);
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+		if (INTEL_GEN(i915) >= 4) {
+			for_each_prime_number(pitch, max_pitch) {
+				tile.stride = tile.width * pitch;
+				err = check_partial_mapping(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+next_tiling: ;
+	}
+
+out_unlock:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int make_obj_busy(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+
+	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		return err;
+
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
+	if (IS_ERR(rq)) {
+		i915_vma_unpin(vma);
+		return PTR_ERR(rq);
+	}
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+
+	i915_request_add(rq);
+
+	i915_vma_unpin(vma);
+	i915_gem_object_put(obj); /* leave it only alive via its active ref */
+
+	return err;
+}
+
+static bool assert_mmap_offset(struct drm_i915_private *i915,
+			       unsigned long size,
+			       int expected)
+{
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	obj = i915_gem_object_create_internal(i915, size);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = create_mmap_offset(obj);
+	i915_gem_object_put(obj);
+
+	return err == expected;
+}
+
+static void disable_retire_worker(struct drm_i915_private *i915)
+{
+	i915_gem_shrinker_unregister(i915);
+
+	intel_gt_pm_get(i915);
+
+	cancel_delayed_work_sync(&i915->gem.retire_work);
+	flush_work(&i915->gem.idle_work);
+}
+
+static void restore_retire_worker(struct drm_i915_private *i915)
+{
+	intel_gt_pm_put(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	i915_gem_shrinker_register(i915);
+}
+
+static int igt_mmap_offset_exhaustion(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node resv, *hole;
+	u64 hole_start, hole_end;
+	int loop, err;
+
+	/* Disable background reaper */
+	disable_retire_worker(i915);
+	GEM_BUG_ON(!i915->gt.awake);
+
+	/* Trim the device mmap space to only a page */
+	memset(&resv, 0, sizeof(resv));
+	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
+		resv.start = hole_start;
+		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
+		err = drm_mm_reserve_node(mm, &resv);
+		if (err) {
+			pr_err("Failed to trim VMA manager, err=%d\n", err);
+			goto out_park;
+		}
+		break;
+	}
+
+	/* Just fits! */
+	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
+		pr_err("Unable to insert object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Too large */
+	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
+		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Fill the hole, further allocation attempts should then fail */
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out;
+	}
+
+	err = create_mmap_offset(obj);
+	if (err) {
+		pr_err("Unable to insert object into reclaimed hole\n");
+		goto err_obj;
+	}
+
+	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
+		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
+		err = -EINVAL;
+		goto err_obj;
+	}
+
+	i915_gem_object_put(obj);
+
+	/* Now fill with busy dead objects that we expect to reap */
+	for (loop = 0; loop < 3; loop++) {
+		if (i915_terminally_wedged(i915))
+			break;
+
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto out;
+		}
+
+		mutex_lock(&i915->drm.struct_mutex);
+		err = make_obj_busy(obj);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err) {
+			pr_err("[loop %d] Failed to busy the object\n", loop);
+			goto err_obj;
+		}
+
+		/* NB we rely on the _active_ reference to access obj now */
+		GEM_BUG_ON(!i915_gem_object_is_active(obj));
+		err = create_mmap_offset(obj);
+		if (err) {
+			pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
+			       loop, err);
+			goto out;
+		}
+	}
+
+out:
+	drm_mm_remove_node(&resv);
+out_park:
+	restore_retire_worker(i915);
+	return err;
+err_obj:
+	i915_gem_object_put(obj);
+	goto out;
+}
+
+int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_partial_tiling),
+		SUBTEST(igt_mmap_offset_exhaustion),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
new file mode 100644
index 000000000000..2b6db6f799de
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -0,0 +1,99 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "huge_gem_object.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_gem_device.h"
+
+static int igt_gem_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err = -ENOMEM;
+
+	/* Basic test to ensure we can create an object */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	err = 0;
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+static int igt_gem_huge(void *arg)
+{
+	const unsigned int nreal = 509; /* just to be awkward */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	unsigned int n;
+	int err;
+
+	/* Basic sanitycheck of our huge fake object allocation */
+
+	obj = huge_gem_object(i915,
+			      nreal * PAGE_SIZE,
+			      i915->ggtt.vm.total + PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	for (n = 0; n < obj->base.size / PAGE_SIZE; n++) {
+		if (i915_gem_object_get_page(obj, n) !=
+		    i915_gem_object_get_page(obj, n % nreal)) {
+			pr_err("Page lookup mismatch at index %u [%u]\n",
+			       n, n % nreal);
+			err = -EINVAL;
+			goto out_unpin;
+		}
+	}
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+int i915_gem_object_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gem_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
+
+int i915_gem_object_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gem_huge),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
new file mode 100644
index 000000000000..e23d8c9e9298
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_drm.h"
+#include "mock_context.h"
+
+static int igt_fill_blt(void *arg)
+{
+	struct intel_context *ce = arg;
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_gem_object *obj;
+	struct rnd_state prng;
+	IGT_TIMEOUT(end);
+	u32 *vaddr;
+	int err = 0;
+
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+
+	do {
+		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		u32 val = prandom_u32_state(&prng);
+		u32 i;
+
+		sz = round_up(sz, PAGE_SIZE);
+
+		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+
+		obj = i915_gem_object_create_internal(i915, sz);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto err_flush;
+		}
+
+		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put;
+		}
+
+		/*
+		 * Make sure the potentially async clflush does its job, if
+		 * required.
+		 */
+		memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+
+		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+			obj->cache_dirty = true;
+
+		mutex_lock(&i915->drm.struct_mutex);
+		err = i915_gem_object_fill_blt(obj, ce, val);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err)
+			goto err_unpin;
+
+		i915_gem_object_lock(obj);
+		err = i915_gem_object_set_to_cpu_domain(obj, false);
+		i915_gem_object_unlock(obj);
+		if (err)
+			goto err_unpin;
+
+		for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+			if (vaddr[i] != val) {
+				pr_err("vaddr[%u]=%x, expected=%x\n", i,
+				       vaddr[i], val);
+				err = -EINVAL;
+				goto err_unpin;
+			}
+		}
+
+		i915_gem_object_unpin_map(obj);
+		i915_gem_object_put(obj);
+	} while (!time_after(jiffies, end));
+
+	goto err_flush;
+
+err_unpin:
+	i915_gem_object_unpin_map(obj);
+err_put:
+	i915_gem_object_put(obj);
+err_flush:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
+int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_fill_blt),
+	};
+
+	if (i915_terminally_wedged(i915))
+		return 0;
+
+	if (!HAS_ENGINE(i915, BCS0))
+		return 0;
+
+	return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
new file mode 100644
index 000000000000..94a15e3f6db8
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "selftests/mock_gem_device.h"
+
+static int mock_phys_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	/* Create an object and bind it to a contiguous set of physical pages,
+	 * i.e. exercise the i915_gem_object_phys API.
+	 */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err) {
+		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
+		goto out_obj;
+	}
+
+	if (obj->ops != &i915_gem_phys_ops) {
+		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	if (!atomic_read(&obj->mm.pages_pin_count)) {
+		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	/* Make the object dirty so that put_pages must do copy back the data */
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
+		       err);
+		goto out_obj;
+	}
+
+out_obj:
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+int i915_gem_phys_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_phys_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
new file mode 100644
index 000000000000..b232e6d2cd92
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "igt_gem_utils.h"
+
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+
+#include "i915_request.h"
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = i915_gem_context_get_engine(ctx, engine->id);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = intel_context_create_request(ce);
+	intel_context_put(ce);
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
new file mode 100644
index 000000000000..0f17251cf75d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
@@ -0,0 +1,17 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef __IGT_GEM_UTILS_H__
+#define __IGT_GEM_UTILS_H__
+
+struct i915_request;
+struct i915_gem_context;
+struct intel_engine_cs;
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+
+#endif /* __IGT_GEM_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
new file mode 100644
index 000000000000..be8974ccff24
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -0,0 +1,111 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "mock_context.h"
+#include "selftests/mock_gtt.h"
+
+struct i915_gem_context *
+mock_context(struct drm_i915_private *i915,
+	     const char *name)
+{
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	kref_init(&ctx->ref);
+	INIT_LIST_HEAD(&ctx->link);
+	ctx->i915 = i915;
+
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx);
+	if (IS_ERR(e))
+		goto err_free;
+	RCU_INIT_POINTER(ctx->engines, e);
+
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	INIT_LIST_HEAD(&ctx->hw_id_link);
+	mutex_init(&ctx->mutex);
+
+	ret = i915_gem_context_pin_hw_id(ctx);
+	if (ret < 0)
+		goto err_engines;
+
+	if (name) {
+		struct i915_ppgtt *ppgtt;
+
+		ctx->name = kstrdup(name, GFP_KERNEL);
+		if (!ctx->name)
+			goto err_put;
+
+		ppgtt = mock_ppgtt(i915, name);
+		if (!ppgtt)
+			goto err_put;
+
+		__set_ppgtt(ctx, &ppgtt->vm);
+		i915_vm_put(&ppgtt->vm);
+	}
+
+	return ctx;
+
+err_engines:
+	free_engines(rcu_access_pointer(ctx->engines));
+err_free:
+	kfree(ctx);
+	return NULL;
+
+err_put:
+	i915_gem_context_set_closed(ctx);
+	i915_gem_context_put(ctx);
+	return NULL;
+}
+
+void mock_context_close(struct i915_gem_context *ctx)
+{
+	context_close(ctx);
+}
+
+void mock_init_contexts(struct drm_i915_private *i915)
+{
+	init_contexts(i915);
+}
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct drm_file *file)
+{
+	struct i915_gem_context *ctx;
+	int err;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	ctx = i915_gem_create_context(i915, 0);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	err = gem_context_register(ctx, file->driver_priv);
+	if (err < 0)
+		goto err_ctx;
+
+	return ctx;
+
+err_ctx:
+	context_close(ctx);
+	return ERR_PTR(err);
+}
+
+struct i915_gem_context *
+kernel_context(struct drm_i915_private *i915)
+{
+	return i915_gem_context_create_kernel(i915, I915_PRIORITY_NORMAL);
+}
+
+void kernel_context_close(struct i915_gem_context *ctx)
+{
+	context_close(ctx);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
new file mode 100644
index 000000000000..0b926653914f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_CONTEXT_H
+#define __MOCK_CONTEXT_H
+
+void mock_init_contexts(struct drm_i915_private *i915);
+
+struct i915_gem_context *
+mock_context(struct drm_i915_private *i915,
+	     const char *name);
+
+void mock_context_close(struct i915_gem_context *ctx);
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct drm_file *file);
+
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
+void kernel_context_close(struct i915_gem_context *ctx);
+
+#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
new file mode 100644
index 000000000000..b9e059d4328a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -0,0 +1,144 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "mock_dmabuf.h"
+
+static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment,
+					 enum dma_data_direction dir)
+{
+	struct mock_dmabuf *mock = to_mock(attachment->dmabuf);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i, err;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return ERR_PTR(-ENOMEM);
+
+	err = sg_alloc_table(st, mock->npages, GFP_KERNEL);
+	if (err)
+		goto err_free;
+
+	sg = st->sgl;
+	for (i = 0; i < mock->npages; i++) {
+		sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0);
+		sg = sg_next(sg);
+	}
+
+	if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
+		err = -ENOMEM;
+		goto err_st;
+	}
+
+	return st;
+
+err_st:
+	sg_free_table(st);
+err_free:
+	kfree(st);
+	return ERR_PTR(err);
+}
+
+static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment,
+			       struct sg_table *st,
+			       enum dma_data_direction dir)
+{
+	dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir);
+	sg_free_table(st);
+	kfree(st);
+}
+
+static void mock_dmabuf_release(struct dma_buf *dma_buf)
+{
+	struct mock_dmabuf *mock = to_mock(dma_buf);
+	int i;
+
+	for (i = 0; i < mock->npages; i++)
+		put_page(mock->pages[i]);
+
+	kfree(mock);
+}
+
+static void *mock_dmabuf_vmap(struct dma_buf *dma_buf)
+{
+	struct mock_dmabuf *mock = to_mock(dma_buf);
+
+	return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL);
+}
+
+static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
+{
+	struct mock_dmabuf *mock = to_mock(dma_buf);
+
+	vm_unmap_ram(vaddr, mock->npages);
+}
+
+static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
+{
+	struct mock_dmabuf *mock = to_mock(dma_buf);
+
+	return kmap(mock->pages[page_num]);
+}
+
+static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
+{
+	struct mock_dmabuf *mock = to_mock(dma_buf);
+
+	return kunmap(mock->pages[page_num]);
+}
+
+static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
+{
+	return -ENODEV;
+}
+
+static const struct dma_buf_ops mock_dmabuf_ops =  {
+	.map_dma_buf = mock_map_dma_buf,
+	.unmap_dma_buf = mock_unmap_dma_buf,
+	.release = mock_dmabuf_release,
+	.map = mock_dmabuf_kmap,
+	.unmap = mock_dmabuf_kunmap,
+	.mmap = mock_dmabuf_mmap,
+	.vmap = mock_dmabuf_vmap,
+	.vunmap = mock_dmabuf_vunmap,
+};
+
+static struct dma_buf *mock_dmabuf(int npages)
+{
+	struct mock_dmabuf *mock;
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct dma_buf *dmabuf;
+	int i;
+
+	mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *),
+		       GFP_KERNEL);
+	if (!mock)
+		return ERR_PTR(-ENOMEM);
+
+	mock->npages = npages;
+	for (i = 0; i < npages; i++) {
+		mock->pages[i] = alloc_page(GFP_KERNEL);
+		if (!mock->pages[i])
+			goto err;
+	}
+
+	exp_info.ops = &mock_dmabuf_ops;
+	exp_info.size = npages * PAGE_SIZE;
+	exp_info.flags = O_CLOEXEC;
+	exp_info.priv = mock;
+
+	dmabuf = dma_buf_export(&exp_info);
+	if (IS_ERR(dmabuf))
+		goto err;
+
+	return dmabuf;
+
+err:
+	while (i--)
+		put_page(mock->pages[i]);
+	kfree(mock);
+	return ERR_PTR(-ENOMEM);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
new file mode 100644
index 000000000000..f0f8bbd82dfc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_DMABUF_H__
+#define __MOCK_DMABUF_H__
+
+#include <linux/dma-buf.h>
+
+struct mock_dmabuf {
+	int npages;
+	struct page *pages[];
+};
+
+static struct mock_dmabuf *to_mock(struct dma_buf *buf)
+{
+	return buf->priv;
+}
+
+#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
new file mode 100644
index 000000000000..370360b4a148
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_GEM_OBJECT_H__
+#define __MOCK_GEM_OBJECT_H__
+
+struct mock_object {
+	struct drm_i915_gem_object base;
+};
+
+#endif /* !__MOCK_GEM_OBJECT_H__ */